From b8176ffbba9b6c0849e3da075f0d4a3cfca6e092 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Tue, 3 Nov 2020 13:49:36 +0100 Subject: [RecompressArchive] Properly implement duplicate filename detection Implements #3670 --- .../java/org/openslx/satserver/util/Archive.java | 25 ++++- .../taskmanager/tasks/RecompressArchive.java | 104 ++++++++++++++++----- 2 files changed, 99 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/openslx/satserver/util/Archive.java b/src/main/java/org/openslx/satserver/util/Archive.java index 8ad2123..25285e7 100644 --- a/src/main/java/org/openslx/satserver/util/Archive.java +++ b/src/main/java/org/openslx/satserver/util/Archive.java @@ -28,6 +28,7 @@ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream; import org.apache.commons.io.FileUtils; +import org.apache.commons.io.FilenameUtils; public class Archive { @@ -102,13 +103,27 @@ public class Archive public static TarArchiveEntry createTarArchiveEntry( ArchiveEntry inEntry, int defaultUser, int defaultGroup, int defaultDirMode, int defaultFileMode ) { - if ( inEntry instanceof TarArchiveEntry ) { - // Source is tar - easy - return (TarArchiveEntry)inEntry; + String name = inEntry.getName(); + if ( Util.isEmpty( name ) ) + return null; + if ( !name.startsWith( "/" ) ) { + name = "/" + name; } - final TarArchiveEntry outEntry = new TarArchiveEntry( inEntry.getName() ); + name = FilenameUtils.normalize( name ); + if ( name == null ) + return null; + if ( Util.isEmpty( name ) ) + return null; + final TarArchiveEntry outEntry = new TarArchiveEntry( name ); outEntry.setSize( inEntry.getSize() ); - if ( inEntry instanceof ArArchiveEntry ) { + if ( inEntry instanceof TarArchiveEntry ) { + // Source is tar - easy + outEntry.setUserName( ( (TarArchiveEntry)inEntry ).getUserName() ); + outEntry.setGroupName( ( (TarArchiveEntry)inEntry ).getGroupName() ); + outEntry.setIds( ( (TarArchiveEntry)inEntry ).getUserId(), ( (TarArchiveEntry)inEntry ).getGroupId() ); + outEntry.setMode( ( (TarArchiveEntry)inEntry ).getMode() ); + outEntry.setModTime( ( (TarArchiveEntry)inEntry ).getLastModifiedDate() ); + } else if ( inEntry instanceof ArArchiveEntry ) { // Source is ar - has most of the stuff tar supports; transform outEntry.setIds( ( (ArArchiveEntry)inEntry ).getUserId(), ( (ArArchiveEntry)inEntry ).getGroupId() ); outEntry.setMode( ( (ArArchiveEntry)inEntry ).getMode() ); diff --git a/src/main/java/org/openslx/taskmanager/tasks/RecompressArchive.java b/src/main/java/org/openslx/taskmanager/tasks/RecompressArchive.java index fc23842..1fc897d 100644 --- a/src/main/java/org/openslx/taskmanager/tasks/RecompressArchive.java +++ b/src/main/java/org/openslx/taskmanager/tasks/RecompressArchive.java @@ -5,10 +5,11 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.HashSet; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; -import java.util.Set; -import java.util.concurrent.CopyOnWriteArrayList; +import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.ArchiveException; @@ -27,8 +28,8 @@ public class RecompressArchive extends AbstractTask { @Expose - private String[] inputFiles; - + private Map inputFiles; + @Expose private String outputFile; @@ -78,57 +79,68 @@ public class RecompressArchive extends AbstractTask return false; } + // Remember all files added, so we can issue warnings for duplicate entries + Map> entries = new HashMap<>(); // Open input file archives, one by one - for ( final String inputFile : this.inputFiles ) { + for ( final Entry it : this.inputFiles.entrySet() ) { + final A a = new A( it.getKey(), it.getValue() ); ArchiveInputStream inArchive = null; try { try { - inArchive = Archive.getArchiveInputStream( inputFile ); + inArchive = Archive.getArchiveInputStream( a.inputFile ); } catch ( FileNotFoundException e1 ) { - status.error = "(" + inputFile + ") " + e1.getMessage(); + status.error = "(" + a.inputFile + ") " + e1.getMessage(); status.errorCode = Output.ErrorCode.NOT_FOUND; return false; } catch ( IllegalArgumentException e1 ) { - status.error = "(" + inputFile + ") " + e1.getMessage(); + status.error = "(" + a.inputFile + ") " + e1.getMessage(); status.errorCode = Output.ErrorCode.UNKNOWN_ERROR; return false; } catch ( ArchiveException e1 ) { - status.error = "(" + inputFile + ") " + e1.getMessage(); + status.error = "(" + a.inputFile + ") " + e1.getMessage(); status.errorCode = Output.ErrorCode.UNKNOWN_FORMAT; return false; } // It's open ArchiveEntry inEntry; - - // Remember all files added, so we can issue warnings for duplicate entries - Set entries = new HashSet<>(); // Iterate over every entry while ( ( inEntry = inArchive.getNextEntry() ) != null ) { if ( !inArchive.canReadEntryData( inEntry ) ) continue; // Skip unreadable entries + if ( inEntry.getName().equals( "/" ) || inEntry.getName().equals( "./" ) ) + continue; // Construct TarArchiveEntry - we want unix stuff like uid/gid, links, file/dir mode, so try to get from source archive TarArchiveEntry outEntry = Archive.createTarArchiveEntry( inEntry, 0, 0, 0755, 0644 ); + if ( outEntry == null ) { + status.addWarning( "Ignoring invalid entry " + inEntry.getName() + " in " + a.inputFile ); + continue; + } // Ask lib if the entry can be written if ( !outArchive.canWriteEntryData( outEntry ) ) { - status.warnings.add( "Commons-compress says entry '" + outEntry.getName() + "' cannot be written." ); + status.addWarning( "Commons-compress says entry '" + outEntry.getName() + "' cannot be written." ); continue; } - // Dupcheck - if ( entries.contains( outEntry.getName() ) ) { - status.warnings.add( "Duplicate entry: " + outEntry.getName() ); - } else { - entries.add( outEntry.getName() ); + if ( !outEntry.isDirectory() ) { + // Dupcheck + List existing = entries.get( outEntry.getName() ); + if ( existing == null ) { + entries.put( outEntry.getName(), existing = new ArrayList() ); + existing.add( a ); + } else { + existing.add( a ); + continue; // Just record, but don't bloat archive + } } // Actually add entry now try { outArchive.putArchiveEntry( outEntry ); } catch (Exception e) { - status.error = "(" + inputFile + ") Could not putArchiveEntry('" + inEntry.getName() + "'): " + e.getMessage(); + status.error = "(" + a.inputFile + ") Could not putArchiveEntry('" + outEntry.getName() + "'): " + e.getMessage(); status.errorCode = Output.ErrorCode.TAR_ERROR; return false; } if ( !Util.streamCopy( inArchive, outArchive, outEntry.getSize() ) ) { - status.error = "(" + inputFile + ") Could not copy entry '" + inEntry.getName() + "' to destination archive."; + status.error = "(" + a.inputFile + ") Could not copy entry '" + inEntry.getName() + "' to destination archive."; status.errorCode = Output.ErrorCode.WRITE_FAILED; return false; } @@ -140,6 +152,30 @@ public class RecompressArchive extends AbstractTask Util.multiClose( inArchive ); } } // End of loop over input archives + + // Check for dups + boolean hasAnyDup = false; + for ( Entry> fit : entries.entrySet() ) { + if ( fit.getValue().size() < 2 ) + continue; + boolean hasDup = false; + for ( A lit : fit.getValue() ) { + if ( lit.dupCheck ) { + hasDup = true; + break; + } + } + if ( !hasDup ) // No file requested to be checked for duplicates + continue; + hasAnyDup = true; + status.addWarning( "Duplicate entry: " + fit.getKey() ); + for ( A lit : fit.getValue() ) { + status.addWarning( " Source: " + lit.inputFile ); + } + } + if ( hasAnyDup ) { + status.addWarning( "You have duplicate files in your source archives. It's undefined which one ends up in the final archive." ); + } return true; } finally { @@ -157,8 +193,7 @@ public class RecompressArchive extends AbstractTask status.errorCode = Output.ErrorCode.UNKNOWN_ERROR; return false; } - for ( int i = 0; i < this.inputFiles.length; ++i ) { - String inputFile = this.inputFiles[i]; + for ( String inputFile : this.inputFiles.keySet() ) { if ( inputFile == null || inputFile.length() == 0 || inputFile.charAt( 0 ) != '/' ) { status.error = "Input: Need absolute path (got: " + inputFile + ")"; status.errorCode = Output.ErrorCode.INVALID_DIRECTORY; @@ -170,7 +205,6 @@ public class RecompressArchive extends AbstractTask status.errorCode = Output.ErrorCode.INVALID_DIRECTORY; return false; } - this.inputFiles[i] = inputFile; } // Output file if ( this.outputFile == null || this.outputFile.length() == 0 || this.outputFile.charAt( 0 ) != '/' ) { @@ -198,7 +232,7 @@ public class RecompressArchive extends AbstractTask }; protected ErrorCode errorCode = null; - protected List warnings = new CopyOnWriteArrayList<>(); + protected String warnings = null; public static class Entry { @@ -206,6 +240,26 @@ public class RecompressArchive extends AbstractTask protected boolean isdir = false; protected long size = -1; } + + public void addWarning( String txt ) + { + if (warnings == null) { + warnings = txt; + } else { + warnings += "\n" + txt; + } + } + } + + private static class A + { + public final String inputFile; + public final Boolean dupCheck; + public A( String file, Boolean dupCheck ) + { + this.inputFile = file; + this.dupCheck = dupCheck; + } } } -- cgit v1.2.3-55-g7522