From 0ec20db824044c36412508310705f32e3503aa4c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 7 Jan 2005 05:58:05 +0000 Subject: auto-import hardlink-1.0-1.1 on branch devel from hardlink-1.0-1.1.src.rpm --- misc-utils/hardlink.c | 344 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 misc-utils/hardlink.c (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c new file mode 100644 index 000000000..ad2c2ffcf --- /dev/null +++ b/misc-utils/hardlink.c @@ -0,0 +1,344 @@ +/* Copyright (C) 2001 Red Hat, Inc. + + Written by Jakub Jelinek . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this program; see the file COPYING. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Changes by Rémy Card to use constants and add option -n. */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NHASH 131072 /* Must be a power of 2! */ +#define NAMELEN 4096 +#define NBUF 64 + +struct _f; +typedef struct _h { + struct _h *next; + struct _f *chain; + off_t size; + time_t mtime; +} h; + +typedef struct _d { + struct _d *next; + char name[0]; +} d; + +d *dirs; + +h *hps[NHASH]; + +int no_link = 0; +int verbose = 0; +int content_only = 0; + +typedef struct _f { + struct _f *next; + ino_t ino; + dev_t dev; + unsigned int cksum; + char name[0]; +} f; + +inline unsigned int hash(off_t size, time_t mtime) +{ + return (size ^ mtime) & (NHASH - 1); +} + +inline int stcmp(struct stat *st1, struct stat *st2, int content_only) +{ + if (content_only) + return st1->st_size != st2->st_size; + return st1->st_mode != st2->st_mode || st1->st_uid != st2->st_uid || + st1->st_gid != st2->st_gid || st1->st_size != st2->st_size || + st1->st_mtime != st2->st_mtime; +} + +long long ndirs, nobjects, nregfiles, nmmap, ncomp, nlinks, nsaved; + +void doexit(int i) +{ + if (verbose) { + fprintf(stderr, "\n\n"); + fprintf(stderr, "Directories %lld\n", ndirs); + fprintf(stderr, "Objects %lld\n", nobjects); + fprintf(stderr, "IFREG %lld\n", nregfiles); + fprintf(stderr, "Mmaps %lld\n", nmmap); + fprintf(stderr, "Comparisons %lld\n", ncomp); + fprintf(stderr, "%s %lld\n", (no_link ? "Would link" : "Linked"), nlinks); + fprintf(stderr, "%s %lld\n", (no_link ? "Would save" : "saved"), nsaved); + } + exit(i); +} + +void usage(prog) +{ + fprintf (stderr, "Usage: %s [-cnv] directories...\n", prog); + exit(255); +} + +unsigned int buf[NBUF]; +char nambuf1[NAMELEN], nambuf2[NAMELEN]; + +void rf (char *name) +{ + struct stat st, st2, st3; + nobjects++; + if (lstat (name, &st)) + return; + if (S_ISDIR (st.st_mode)) { + d * dp = malloc(sizeof(d) + 1 + strlen (name)); + if (!dp) { + fprintf(stderr, "\nOut of memory 3\n"); + doexit(3); + } + strcpy (dp->name, name); + dp->next = dirs; + dirs = dp; + } else if (S_ISREG (st.st_mode)) { + int fd, i; + f * fp, * fp2; + h * hp; + char *p, *q; + char *n1, *n2; + int cksumsize = sizeof(buf); + unsigned int cksum; + time_t mtime = content_only ? 0 : st.st_mtime; + unsigned int hsh = hash (st.st_size, mtime); + nregfiles++; + if (verbose > 1) + fprintf(stderr, " %s", name); + fd = open (name, O_RDONLY); + if (fd < 0) return; + if (st.st_size < sizeof(buf)) { + cksumsize = st.st_size; + memset (((char *)buf) + cksumsize, 0, (sizeof(buf) - cksumsize) % sizeof(buf[0])); + } + if (read (fd, buf, cksumsize) != cksumsize) { + close(fd); + if (verbose > 1) + fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + return; + } + cksumsize = (cksumsize + sizeof(buf[0]) - 1) / sizeof(buf[0]); + for (i = 0, cksum = 0; i < cksumsize; i++) { + if (cksum + buf[i] < cksum) + cksum += buf[i] + 1; + else + cksum += buf[i]; + } + for (hp = hps[hsh]; hp; hp = hp->next) + if (hp->size == st.st_size && hp->mtime == mtime) + break; + if (!hp) { + hp = malloc(sizeof(h)); + if (!hp) { + fprintf(stderr, "\nOut of memory 1\n"); + doexit(1); + } + hp->size = st.st_size; + hp->mtime = mtime; + hp->chain = NULL; + hp->next = hps[hsh]; + hps[hsh] = hp; + } + for (fp = hp->chain; fp; fp = fp->next) + if (fp->cksum == cksum) + break; + for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) + if (fp2->ino == st.st_ino && fp2->dev == st.st_dev) { + close(fd); + if (verbose > 1) + fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + return; + } + if (fp) { + p = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + nmmap++; + if (p == (void *)-1) { + close(fd); + fprintf(stderr, "\nFailed to mmap %s\n", name); + return; + } + } + for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) + if (!lstat (fp2->name, &st2) && S_ISREG (st2.st_mode) && + !stcmp (&st, &st2, content_only) && + st2.st_ino != st.st_ino && + st2.st_dev == st.st_dev) { + int fd2 = open (fp2->name, O_RDONLY); + if (fd2 < 0) continue; + if (fstat (fd2, &st2) || !S_ISREG (st2.st_mode)) { + close (fd2); + continue; + } + ncomp++; + q = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd2, 0); + if (q == (void *)-1) { + close(fd2); + fprintf(stderr, "\nFailed to mmap %s\n", fp2->name); + continue; + } + if (memcmp (p, q, st.st_size)) { + munmap (q, st.st_size); + close(fd2); + continue; + } + munmap (q, st.st_size); + close(fd2); + if (lstat (name, &st3)) { + fprintf(stderr, "\nCould not stat %s again\n", name); + munmap (p, st.st_size); + close(fd); + return; + } + st3.st_atime = st.st_atime; + if (stcmp (&st, &st3, 0)) { + fprintf(stderr, "\nFile %s changed underneath us\n", name); + munmap (p, st.st_size); + close(fd); + return; + } + n1 = fp2->name; + n2 = name; + if (!no_link) { + strcpy (stpcpy (nambuf2, n2), ".$$$___cleanit___$$$"); + if (rename (n2, nambuf2)) { + fprintf(stderr, "\nFailed to rename %s to %s\n", n2, nambuf2); + continue; + } + if (link (n1, n2)) { + fprintf(stderr, "\nFailed to hardlink %s to %s\n", n1, n2); + if (rename (nambuf2, n2)) { + fprintf(stderr, "\nBad bad - failed to rename back %s to %s\n", nambuf2, n2); + } + munmap (p, st.st_size); + close(fd); + return; + } + unlink (nambuf2); + } + nlinks++; + if (st3.st_nlink > 1) { + /* We actually did not save anything this time, since the link second argument + had some other links as well. */ + if (verbose > 1) + fprintf(stderr, "\r%*s\r%s %s to %s\n", (int)strlen(name)+2, "", (no_link ? "Would link" : "Linked"), n1, n2); + } else { + nsaved+=((st.st_size+4095)/4096)*4096; + if (verbose > 1) + fprintf(stderr, "\r%*s\r%s %s to %s, %s %ld\n", (int)strlen(name)+2, "", (no_link ? "Would link" : "Linked"), n1, n2, (no_link ? "would save" : "saved"), st.st_size); + } + munmap (p, st.st_size); + close(fd); + return; + } + if (fp) + munmap (p, st.st_size); + fp2 = malloc(sizeof(f) + 1 + strlen (name)); + if (!fp2) { + fprintf(stderr, "\nOut of memory 2\n"); + doexit(2); + } + close(fd); + fp2->ino = st.st_ino; + fp2->dev = st.st_dev; + fp2->cksum = cksum; + strcpy(fp2->name, name); + if (fp) { + fp2->next = fp->next; + fp->next = fp2; + } else { + fp2->next = hp->chain; + hp->chain = fp2; + } + if (verbose > 1) + fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + return; + } +} + +int main(int argc, char **argv) +{ + int ch; + int i; + char *p; + d * dp; + DIR *dh; + struct dirent *di; + while ((ch = getopt (argc, argv, "cnv")) != -1) { + switch (ch) { + case 'n': + no_link++; + break; + case 'v': + verbose++; + break; + case 'c': + content_only++; + break; + default: + usage(argv[0]); + } + } + if (optind >= argc) + usage(argv[0]); + for (i = optind; i < argc; i++) + rf(argv[i]); + while (dirs) { + dp = dirs; + dirs = dp->next; + strcpy (nambuf1, dp->name); + free (dp); + strcat (nambuf1, "/"); + p = strchr (nambuf1, 0); + dh = opendir (nambuf1); + if (dh == NULL) + continue; + ndirs++; + while ((di = readdir (dh)) != NULL) { + if (!di->d_name[0]) + continue; + if (di->d_name[0] == '.') { + char *q; + if (!di->d_name[1] || !strcmp (di->d_name, "..") || !strncmp (di->d_name, ".in.", 4)) + continue; + q = strrchr (di->d_name, '.'); + if (q && strlen (q) == 7 && q != di->d_name) { + *p = 0; + if (verbose) + fprintf(stderr, "Skipping %s%s\n", nambuf1, di->d_name); + continue; + } + } + strcpy (p, di->d_name); + rf(nambuf1); + } + closedir(dh); + } + doexit(0); + return 0; +} -- cgit v1.2.3-55-g7522 From 5edefc924240843c842f77020da0b7c418c1a50a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 9 Feb 2005 23:12:47 +0000 Subject: kill warning --- misc-utils/hardlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index ad2c2ffcf..52cb41f42 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -94,7 +94,7 @@ void doexit(int i) exit(i); } -void usage(prog) +void usage(char *prog) { fprintf (stderr, "Usage: %s [-cnv] directories...\n", prog); exit(255); -- cgit v1.2.3-55-g7522 From a16d7294a868f4dacb1482d49820f0226e50e949 Mon Sep 17 00:00:00 2001 From: Jeremy Katz Date: Wed, 27 Apr 2005 22:05:13 +0000 Subject: - don't try to hardlink 0 byte files (#154404) --- misc-utils/hardlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 52cb41f42..7a6cf6f52 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -175,7 +175,7 @@ void rf (char *name) fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); return; } - if (fp) { + if (fp && st.st_size > 0) { p = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); nmmap++; if (p == (void *)-1) { @@ -191,7 +191,7 @@ void rf (char *name) st2.st_dev == st.st_dev) { int fd2 = open (fp2->name, O_RDONLY); if (fd2 < 0) continue; - if (fstat (fd2, &st2) || !S_ISREG (st2.st_mode)) { + if (fstat (fd2, &st2) || !S_ISREG (st2.st_mode) || st2.st_size == 0) { close (fd2); continue; } -- cgit v1.2.3-55-g7522 From a810992a7215994d020897215946fbddad3d8e68 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 26 Aug 2005 04:42:38 +0000 Subject: Document hardlink command line options. (Ville Skytta) (#161738) --- misc-utils/hardlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 7a6cf6f52..1e102101f 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -97,6 +97,9 @@ void doexit(int i) void usage(char *prog) { fprintf (stderr, "Usage: %s [-cnv] directories...\n", prog); + fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); + fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); + fprintf (stderr, " -v Operate in verbose mode.\n"); exit(255); } -- cgit v1.2.3-55-g7522 From cabf1c1cb7409db30917e1d7e8546ec850e43c1d Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Mon, 7 Nov 2005 14:07:33 +0000 Subject: add -h option --- misc-utils/hardlink.1 | 39 +++++++++++++++++++++++++++++++++++++++ misc-utils/hardlink.c | 8 +++++--- 2 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 misc-utils/hardlink.1 (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 new file mode 100644 index 000000000..7ffc2b460 --- /dev/null +++ b/misc-utils/hardlink.1 @@ -0,0 +1,39 @@ +.TH "hardlink" "1" +.SH "NAME" +hardlink \- Consolidate duplicate files via hardlinks +.SH "SYNOPSIS" +.PP +\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-h\fP] directory1 [ directory2 ... ] +.SH "DESCRIPTION" +.PP +This manual page documents \fBhardlink\fP, a +program which consolidates duplicate files in one or more directories +using hardlinks. +.PP +\fBhardlink\fP traverses one +or more directories searching for duplicate files. When it finds duplicate +files, it uses one of them as the master. It then removes all other +duplicates and places a hardlink for each one pointing to the master file. +This allows for conservation of disk space where multiple directories +on a single filesystem contain many duplicate files. +.PP +Since hard links can only span a single filesystem, \fBhardlink\fP +is only useful when all directories specified are on the same filesystem. +.SH "OPTIONS" +.PP +.IP "\fB-c\fP" 10 +Compare only the contents of the files being considered for consolidation. +Disregards permission, ownership and other differences. +.IP "\fB-n\fP" 10 +Do not perform the consolidation; only print what would be changed. +.IP "\fB-v\fP" 10 +Enable verbose logging. +.IP "\fB-h\fP" 10 +Show help. +.SH "AUTHOR" +.PP +\fBhardlink\fP was written by Jakub Jelinek . +.PP +Man page written by Brian Long. +.PP +Man page updated by Jindrich Novy diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 1e102101f..fd511c894 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -96,10 +96,11 @@ void doexit(int i) void usage(char *prog) { - fprintf (stderr, "Usage: %s [-cnv] directories...\n", prog); + fprintf (stderr, "Usage: %s [-cnvh] directories...\n", prog); fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); fprintf (stderr, " -v Operate in verbose mode.\n"); + fprintf (stderr, " -h Show help.\n"); exit(255); } @@ -125,7 +126,7 @@ void rf (char *name) int fd, i; f * fp, * fp2; h * hp; - char *p, *q; + char *p = NULL, *q; char *n1, *n2; int cksumsize = sizeof(buf); unsigned int cksum; @@ -292,7 +293,7 @@ int main(int argc, char **argv) d * dp; DIR *dh; struct dirent *di; - while ((ch = getopt (argc, argv, "cnv")) != -1) { + while ((ch = getopt (argc, argv, "cnvh")) != -1) { switch (ch) { case 'n': no_link++; @@ -303,6 +304,7 @@ int main(int argc, char **argv) case 'c': content_only++; break; + case 'h': default: usage(argv[0]); } -- cgit v1.2.3-55-g7522 From b4ece7768f292758e99de782ba248d3b0a9751d7 Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Mon, 14 Nov 2005 08:31:25 +0000 Subject: mostly spec cleanup --- misc-utils/hardlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index fd511c894..4625f1ea3 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -17,7 +17,8 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* Changes by Rémy Card to use constants and add option -n. */ +/* Changes by Rémy Card to use constants and add option -n. */ +/* Changes by Jindrich Novy to add option -h. */ #define _GNU_SOURCE #include -- cgit v1.2.3-55-g7522 From a272f143e7daca35c3617ba02a9371dbe4b5a0ac Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Sun, 29 Oct 2006 07:19:35 +0000 Subject: - update docs to describe highest verbosity -vv option (#210816) - use dist Resolves: 210816 --- misc-utils/hardlink.1 | 6 ++++-- misc-utils/hardlink.c | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index 7ffc2b460..09fd7ac90 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -3,7 +3,7 @@ hardlink \- Consolidate duplicate files via hardlinks .SH "SYNOPSIS" .PP -\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-h\fP] directory1 [ directory2 ... ] +\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-vv\fP] [\fB-h\fP] directory1 [ directory2 ... ] .SH "DESCRIPTION" .PP This manual page documents \fBhardlink\fP, a @@ -27,7 +27,9 @@ Disregards permission, ownership and other differences. .IP "\fB-n\fP" 10 Do not perform the consolidation; only print what would be changed. .IP "\fB-v\fP" 10 -Enable verbose logging. +Print summary after hardlinking. +.IP "\fB-vv\fP" 10 +Print every hardlinked file and bytes saved. Also print summary after hardlinking. .IP "\fB-h\fP" 10 Show help. .SH "AUTHOR" diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 4625f1ea3..a07d90c16 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -100,7 +100,8 @@ void usage(char *prog) fprintf (stderr, "Usage: %s [-cnvh] directories...\n", prog); fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); - fprintf (stderr, " -v Operate in verbose mode.\n"); + fprintf (stderr, " -v Print summary after hardlinking.\n"); + fprintf (stderr, " -vv Print every hardlinked file and bytes saved + summary.\n"); fprintf (stderr, " -h Show help.\n"); exit(255); } -- cgit v1.2.3-55-g7522 From 0cd6b1d3539a25a8bd5513e3c7d9b092a5ca47d3 Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Thu, 3 Mar 2011 12:15:33 +0100 Subject: fix URL and remove mmap() (#676962, #672917) --- misc-utils/hardlink.c | 52 +++++++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 33 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index a07d90c16..225c71f45 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -18,7 +18,7 @@ Boston, MA 02111-1307, USA. */ /* Changes by Rémy Card to use constants and add option -n. */ -/* Changes by Jindrich Novy to add option -h. */ +/* Changes by Jindrich Novy to add option -h, replace mmap(2) */ #define _GNU_SOURCE #include @@ -31,7 +31,8 @@ #include #include -#define NHASH 131072 /* Must be a power of 2! */ +#define NHASH (1<<17) /* Must be a power of 2! */ +#define NIOBUF (1<<12) #define NAMELEN 4096 #define NBUF 64 @@ -78,7 +79,7 @@ inline int stcmp(struct stat *st1, struct stat *st2, int content_only) st1->st_mtime != st2->st_mtime; } -long long ndirs, nobjects, nregfiles, nmmap, ncomp, nlinks, nsaved; +long long ndirs, nobjects, nregfiles, ncomp, nlinks, nsaved; void doexit(int i) { @@ -87,7 +88,6 @@ void doexit(int i) fprintf(stderr, "Directories %lld\n", ndirs); fprintf(stderr, "Objects %lld\n", nobjects); fprintf(stderr, "IFREG %lld\n", nregfiles); - fprintf(stderr, "Mmaps %lld\n", nmmap); fprintf(stderr, "Comparisons %lld\n", ncomp); fprintf(stderr, "%s %lld\n", (no_link ? "Would link" : "Linked"), nlinks); fprintf(stderr, "%s %lld\n", (no_link ? "Would save" : "saved"), nsaved); @@ -107,6 +107,7 @@ void usage(char *prog) } unsigned int buf[NBUF]; +char iobuf1[NIOBUF], iobuf2[NIOBUF]; char nambuf1[NAMELEN], nambuf2[NAMELEN]; void rf (char *name) @@ -128,12 +129,12 @@ void rf (char *name) int fd, i; f * fp, * fp2; h * hp; - char *p = NULL, *q; char *n1, *n2; int cksumsize = sizeof(buf); unsigned int cksum; time_t mtime = content_only ? 0 : st.st_mtime; unsigned int hsh = hash (st.st_size, mtime); + off_t fsize; nregfiles++; if (verbose > 1) fprintf(stderr, " %s", name); @@ -181,15 +182,6 @@ void rf (char *name) fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); return; } - if (fp && st.st_size > 0) { - p = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); - nmmap++; - if (p == (void *)-1) { - close(fd); - fprintf(stderr, "\nFailed to mmap %s\n", name); - return; - } - } for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) if (!lstat (fp2->name, &st2) && S_ISREG (st2.st_mode) && !stcmp (&st, &st2, content_only) && @@ -202,29 +194,27 @@ void rf (char *name) continue; } ncomp++; - q = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd2, 0); - if (q == (void *)-1) { - close(fd2); - fprintf(stderr, "\nFailed to mmap %s\n", fp2->name); - continue; - } - if (memcmp (p, q, st.st_size)) { - munmap (q, st.st_size); - close(fd2); - continue; - } - munmap (q, st.st_size); - close(fd2); + lseek(fd, 0, SEEK_SET); + for (fsize = st.st_size; fsize > 0; fsize -= NIOBUF) { + off_t rsize = fsize >= NIOBUF ? NIOBUF : fsize; + if (read (fd, iobuf1, rsize) != rsize || read (fd2, iobuf2, rsize) != rsize) { + close(fd); + close(fd2); + fprintf(stderr, "\nReading error\n"); + return; + } + if (memcmp (iobuf1, iobuf2, rsize)) break; + } + close(fd2); + if (fsize > 0) continue; if (lstat (name, &st3)) { fprintf(stderr, "\nCould not stat %s again\n", name); - munmap (p, st.st_size); close(fd); return; } st3.st_atime = st.st_atime; if (stcmp (&st, &st3, 0)) { fprintf(stderr, "\nFile %s changed underneath us\n", name); - munmap (p, st.st_size); close(fd); return; } @@ -241,7 +231,6 @@ void rf (char *name) if (rename (nambuf2, n2)) { fprintf(stderr, "\nBad bad - failed to rename back %s to %s\n", nambuf2, n2); } - munmap (p, st.st_size); close(fd); return; } @@ -258,12 +247,9 @@ void rf (char *name) if (verbose > 1) fprintf(stderr, "\r%*s\r%s %s to %s, %s %ld\n", (int)strlen(name)+2, "", (no_link ? "Would link" : "Linked"), n1, n2, (no_link ? "would save" : "saved"), st.st_size); } - munmap (p, st.st_size); close(fd); return; } - if (fp) - munmap (p, st.st_size); fp2 = malloc(sizeof(f) + 1 + strlen (name)); if (!fp2) { fprintf(stderr, "\nOut of memory 2\n"); -- cgit v1.2.3-55-g7522 From 94b040b06ba018af0799e4117fe5c8dc99f70ef4 Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Fri, 21 Oct 2011 02:49:08 +0200 Subject: fix possible buffer overflows, integer overflows, update man page --- misc-utils/hardlink.1 | 13 ++++-- misc-utils/hardlink.c | 125 ++++++++++++++++++++++++++++++++++---------------- 2 files changed, 95 insertions(+), 43 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index 09fd7ac90..0590e8464 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -6,11 +6,11 @@ hardlink \- Consolidate duplicate files via hardlinks \fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-vv\fP] [\fB-h\fP] directory1 [ directory2 ... ] .SH "DESCRIPTION" .PP -This manual page documents \fBhardlink\fP, a +This manual page documents \fBhardlink\fP, a program which consolidates duplicate files in one or more directories using hardlinks. .PP -\fBhardlink\fP traverses one +\fBhardlink\fP traverses one or more directories searching for duplicate files. When it finds duplicate files, it uses one of them as the master. It then removes all other duplicates and places a hardlink for each one pointing to the master file. @@ -34,8 +34,15 @@ Print every hardlinked file and bytes saved. Also print summary after hardlinkin Show help. .SH "AUTHOR" .PP -\fBhardlink\fP was written by Jakub Jelinek . +\fBhardlink\fP was written by Jakub Jelinek . .PP Man page written by Brian Long. .PP Man page updated by Jindrich Novy +.SH "BUGS" +.PP +\fBhardlink\fP assumes that its target directory trees do not change from under +it. If a directory tree does change, this may result in \fBhardlink\fP +accessing files and/or directories outside of the intended directory tree. +Thus, you must avoid running \fBhardlink\fP on potentially changing directory +trees, and especially on directory trees under control of another user. diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 225c71f45..51a71cf97 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -1,24 +1,24 @@ /* Copyright (C) 2001 Red Hat, Inc. Written by Jakub Jelinek . - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - + /* Changes by Rémy Card to use constants and add option -n. */ -/* Changes by Jindrich Novy to add option -h, replace mmap(2) */ +/* Changes by Jindrich Novy to add option -h, replace mmap(2), fix overflows */ #define _GNU_SOURCE #include @@ -83,7 +83,7 @@ long long ndirs, nobjects, nregfiles, ncomp, nlinks, nsaved; void doexit(int i) { - if (verbose) { + if (verbose) { fprintf(stderr, "\n\n"); fprintf(stderr, "Directories %lld\n", ndirs); fprintf(stderr, "Objects %lld\n", nobjects); @@ -108,28 +108,59 @@ void usage(char *prog) unsigned int buf[NBUF]; char iobuf1[NIOBUF], iobuf2[NIOBUF]; -char nambuf1[NAMELEN], nambuf2[NAMELEN]; -void rf (char *name) +inline size_t add2(size_t a, size_t b) +{ + size_t sum = a + b; + if (sum < a) { + fprintf(stderr, "\nInteger overflow\n"); + doexit(5); + } + return sum; +} + +inline size_t add3(size_t a, size_t b, size_t c) +{ + return add2(add2(a, b), c); +} + +typedef struct { + char *buf; + size_t alloc; +} dynstr; + +void growstr(dynstr *str, size_t newlen) +{ + if (newlen < str->alloc) + return; + str->buf = realloc(str->buf, str->alloc = add2(newlen, 1)); + if (!str->buf) { + fprintf(stderr, "\nOut of memory 4\n"); + doexit(4); + } +} + +void rf (const char *name) { struct stat st, st2, st3; + const size_t namelen = strlen(name); nobjects++; if (lstat (name, &st)) return; if (S_ISDIR (st.st_mode)) { - d * dp = malloc(sizeof(d) + 1 + strlen (name)); + d * dp = malloc(add3(sizeof(d), namelen, 1)); if (!dp) { fprintf(stderr, "\nOut of memory 3\n"); doexit(3); } - strcpy (dp->name, name); + memcpy(dp->name, name, namelen + 1); dp->next = dirs; dirs = dp; } else if (S_ISREG (st.st_mode)) { int fd, i; f * fp, * fp2; h * hp; - char *n1, *n2; + const char *n1, *n2; int cksumsize = sizeof(buf); unsigned int cksum; time_t mtime = content_only ? 0 : st.st_mtime; @@ -146,8 +177,8 @@ void rf (char *name) } if (read (fd, buf, cksumsize) != cksumsize) { close(fd); - if (verbose > 1) - fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + if (verbose > 1 && namelen <= NAMELEN) + fprintf(stderr, "\r%*s\r", (int)(namelen + 2), ""); return; } cksumsize = (cksumsize + sizeof(buf[0]) - 1) / sizeof(buf[0]); @@ -178,8 +209,8 @@ void rf (char *name) for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) if (fp2->ino == st.st_ino && fp2->dev == st.st_dev) { close(fd); - if (verbose > 1) - fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + if (verbose > 1 && namelen <= NAMELEN) + fprintf(stderr, "\r%*s\r", (int)(namelen + 2), ""); return; } for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) @@ -221,36 +252,45 @@ void rf (char *name) n1 = fp2->name; n2 = name; if (!no_link) { - strcpy (stpcpy (nambuf2, n2), ".$$$___cleanit___$$$"); - if (rename (n2, nambuf2)) { - fprintf(stderr, "\nFailed to rename %s to %s\n", n2, nambuf2); + const char *suffix = ".$$$___cleanit___$$$"; + const size_t suffixlen = strlen(suffix); + size_t n2len = strlen(n2); + dynstr nam2 = {NULL, 0}; + growstr(&nam2, add2(n2len, suffixlen)); + memcpy(nam2.buf, n2, n2len); + memcpy(&nam2.buf[n2len], suffix, suffixlen + 1); + if (rename (n2, nam2.buf)) { + fprintf(stderr, "\nFailed to rename %s to %s\n", n2, nam2.buf); + free(nam2.buf); continue; } if (link (n1, n2)) { fprintf(stderr, "\nFailed to hardlink %s to %s\n", n1, n2); - if (rename (nambuf2, n2)) { - fprintf(stderr, "\nBad bad - failed to rename back %s to %s\n", nambuf2, n2); + if (rename (nam2.buf, n2)) { + fprintf(stderr, "\nBad bad - failed to rename back %s to %s\n", nam2.buf, n2); } close(fd); + free(nam2.buf); return; } - unlink (nambuf2); + unlink (nam2.buf); + free(nam2.buf); } nlinks++; if (st3.st_nlink > 1) { /* We actually did not save anything this time, since the link second argument had some other links as well. */ if (verbose > 1) - fprintf(stderr, "\r%*s\r%s %s to %s\n", (int)strlen(name)+2, "", (no_link ? "Would link" : "Linked"), n1, n2); + fprintf(stderr, "\r%*s\r%s %s to %s\n", (int)(((namelen > NAMELEN) ? 0 : namelen) + 2), "", (no_link ? "Would link" : "Linked"), n1, n2); } else { nsaved+=((st.st_size+4095)/4096)*4096; if (verbose > 1) - fprintf(stderr, "\r%*s\r%s %s to %s, %s %ld\n", (int)strlen(name)+2, "", (no_link ? "Would link" : "Linked"), n1, n2, (no_link ? "would save" : "saved"), st.st_size); + fprintf(stderr, "\r%*s\r%s %s to %s, %s %ld\n", (int)(((namelen > NAMELEN) ? 0 : namelen) + 2), "", (no_link ? "Would link" : "Linked"), n1, n2, (no_link ? "would save" : "saved"), st.st_size); } close(fd); return; } - fp2 = malloc(sizeof(f) + 1 + strlen (name)); + fp2 = malloc(add3(sizeof(f), namelen, 1)); if (!fp2) { fprintf(stderr, "\nOut of memory 2\n"); doexit(2); @@ -259,7 +299,7 @@ void rf (char *name) fp2->ino = st.st_ino; fp2->dev = st.st_dev; fp2->cksum = cksum; - strcpy(fp2->name, name); + memcpy(fp2->name, name, namelen + 1); if (fp) { fp2->next = fp->next; fp->next = fp2; @@ -267,8 +307,8 @@ void rf (char *name) fp2->next = hp->chain; hp->chain = fp2; } - if (verbose > 1) - fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + if (verbose > 1 && namelen <= NAMELEN) + fprintf(stderr, "\r%*s\r", (int)(namelen + 2), ""); return; } } @@ -277,10 +317,7 @@ int main(int argc, char **argv) { int ch; int i; - char *p; - d * dp; - DIR *dh; - struct dirent *di; + dynstr nam1 = {NULL, 0}; while ((ch = getopt (argc, argv, "cnvh")) != -1) { switch (ch) { case 'n': @@ -302,13 +339,17 @@ int main(int argc, char **argv) for (i = optind; i < argc; i++) rf(argv[i]); while (dirs) { - dp = dirs; + DIR *dh; + struct dirent *di; + d * dp = dirs; + size_t nam1baselen = strlen(dp->name); dirs = dp->next; - strcpy (nambuf1, dp->name); + growstr(&nam1, add2(nam1baselen, 1)); + memcpy(nam1.buf, dp->name, nam1baselen); free (dp); - strcat (nambuf1, "/"); - p = strchr (nambuf1, 0); - dh = opendir (nambuf1); + nam1.buf[nam1baselen++] = '/'; + nam1.buf[nam1baselen] = 0; + dh = opendir (nam1.buf); if (dh == NULL) continue; ndirs++; @@ -321,14 +362,18 @@ int main(int argc, char **argv) continue; q = strrchr (di->d_name, '.'); if (q && strlen (q) == 7 && q != di->d_name) { - *p = 0; + nam1.buf[nam1baselen] = 0; if (verbose) - fprintf(stderr, "Skipping %s%s\n", nambuf1, di->d_name); + fprintf(stderr, "Skipping %s%s\n", nam1.buf, di->d_name); continue; } } - strcpy (p, di->d_name); - rf(nambuf1); + { + size_t subdirlen; + growstr(&nam1, add2(nam1baselen, subdirlen = strlen(di->d_name))); + memcpy(&nam1.buf[nam1baselen], di->d_name, add2(subdirlen, 1)); + } + rf(nam1.buf); } closedir(dh); } -- cgit v1.2.3-55-g7522 From c23b4a230531da663fc35a478754c641866cb6df Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Sun, 15 Apr 2012 11:22:10 +0200 Subject: do not allow to hardlink files across filesystems by default (#786719) (use -f option to override) --- misc-utils/hardlink.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 51a71cf97..a7c72492d 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -18,7 +18,7 @@ Boston, MA 02111-1307, USA. */ /* Changes by Rémy Card to use constants and add option -n. */ -/* Changes by Jindrich Novy to add option -h, replace mmap(2), fix overflows */ +/* Changes by Jindrich Novy to add option -h, -f, replace mmap(2), fix overflows */ #define _GNU_SOURCE #include @@ -56,6 +56,7 @@ h *hps[NHASH]; int no_link = 0; int verbose = 0; int content_only = 0; +int force = 0; typedef struct _f { struct _f *next; @@ -97,11 +98,12 @@ void doexit(int i) void usage(char *prog) { - fprintf (stderr, "Usage: %s [-cnvh] directories...\n", prog); + fprintf (stderr, "Usage: %s [-cnvhf] directories...\n", prog); fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); fprintf (stderr, " -v Print summary after hardlinking.\n"); fprintf (stderr, " -vv Print every hardlinked file and bytes saved + summary.\n"); + fprintf (stderr, " -f Force hardlinking across filesystems.\n"); fprintf (stderr, " -h Show help.\n"); exit(255); } @@ -139,7 +141,7 @@ void growstr(dynstr *str, size_t newlen) doexit(4); } } - +dev_t dev = 0; void rf (const char *name) { struct stat st, st2, st3; @@ -147,6 +149,13 @@ void rf (const char *name) nobjects++; if (lstat (name, &st)) return; + if (st.st_dev != dev && !force) { + if (dev) { + fprintf(stderr, "%s is on different filesystem than the rest.\nUse -f option to override.\n", name); + doexit(6); + } + dev = st.st_dev; + } if (S_ISDIR (st.st_mode)) { d * dp = malloc(add3(sizeof(d), namelen, 1)); if (!dp) { @@ -318,7 +327,7 @@ int main(int argc, char **argv) int ch; int i; dynstr nam1 = {NULL, 0}; - while ((ch = getopt (argc, argv, "cnvh")) != -1) { + while ((ch = getopt (argc, argv, "cnvhf")) != -1) { switch (ch) { case 'n': no_link++; @@ -329,6 +338,9 @@ int main(int argc, char **argv) case 'c': content_only++; break; + case 'f': + force=1; + break; case 'h': default: usage(argv[0]); -- cgit v1.2.3-55-g7522 From 348af99a6296d9d15de10700594e1403d23a9a37 Mon Sep 17 00:00:00 2001 From: Jan Zeleny Date: Wed, 10 Apr 2013 14:47:17 +0200 Subject: Mention -f option in the man page --- misc-utils/hardlink.1 | 2 ++ 1 file changed, 2 insertions(+) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index 0590e8464..04228f4bb 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -24,6 +24,8 @@ is only useful when all directories specified are on the same filesystem. .IP "\fB-c\fP" 10 Compare only the contents of the files being considered for consolidation. Disregards permission, ownership and other differences. +.IP "\fB-f\fP" 10 +Force hardlinking across file systems. .IP "\fB-n\fP" 10 Do not perform the consolidation; only print what would be changed. .IP "\fB-v\fP" 10 -- cgit v1.2.3-55-g7522 From c11af66f0fce8ded25f92c8dde294afb2a64b735 Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Sun, 10 Jul 2016 03:57:43 +0200 Subject: spec file reflects the atomic hardlinking patch; removed cleaning buildroot (redundant); current FSF address at .c source file --- misc-utils/hardlink.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index a7c72492d..ef042deba 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -12,10 +12,9 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public - License along with this program; see the file COPYING. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /* Changes by Rémy Card to use constants and add option -n. */ /* Changes by Jindrich Novy to add option -h, -f, replace mmap(2), fix overflows */ -- cgit v1.2.3-55-g7522 From cbb0524c7c8b4aa62d1dc2fd07c7392ede7cfc5f Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Sun, 10 Jul 2016 04:03:41 +0200 Subject: Revert "spec file reflects the atomic hardlinking patch; removed cleaning buildroot (redundant); current FSF address at .c source file" This reverts commit bb9e76ae339794c2243ae294207942b7ea278364. --- misc-utils/hardlink.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index ef042deba..3521cb3ea 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -12,9 +12,10 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ + You should have received a copy of the GNU General Public + License along with this program; see the file COPYING. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ /* Changes by Rémy Card to use constants and add option -n. */ /* Changes by Jindrich Novy to add option -h, -f, replace mmap(2), fix overflows */ @@ -29,6 +30,7 @@ #include #include #include +#include #define NHASH (1<<17) /* Must be a power of 2! */ #define NIOBUF (1<<12) @@ -267,21 +269,22 @@ void rf (const char *name) growstr(&nam2, add2(n2len, suffixlen)); memcpy(nam2.buf, n2, n2len); memcpy(&nam2.buf[n2len], suffix, suffixlen + 1); - if (rename (n2, nam2.buf)) { - fprintf(stderr, "\nFailed to rename %s to %s\n", n2, nam2.buf); + /* First create a temporary link to n1 under a new name */ + if (link(n1, nam2.buf)) { + fprintf(stderr, "\nFailed to hardlink %s to %s (create temporary link as %s failed - %s)\n", n1, n2, nam2.buf, strerror(errno)); free(nam2.buf); continue; } - if (link (n1, n2)) { - fprintf(stderr, "\nFailed to hardlink %s to %s\n", n1, n2); - if (rename (nam2.buf, n2)) { - fprintf(stderr, "\nBad bad - failed to rename back %s to %s\n", nam2.buf, n2); + /* Then rename into place over the existing n2 */ + if (rename (nam2.buf, n2)) { + fprintf(stderr, "\nFailed to hardlink %s to %s (rename temporary link to %s failed - %s)\n", n1, n2, n2, strerror(errno)); + /* Something went wrong, try to remove the now redundant temporary link */ + if (unlink(nam2.buf)) { + fprintf(stderr, "\nFailed to remove temporary link %s - %s\n", nam2.buf, strerror(errno)); } - close(fd); free(nam2.buf); - return; + continue; } - unlink (nam2.buf); free(nam2.buf); } nlinks++; -- cgit v1.2.3-55-g7522 From 4d072ba7155358ac50531bda512442f514b37e37 Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Sun, 10 Jul 2016 04:12:06 +0200 Subject: spec file reflects the atomic hardlinking patch; removed cleaning buildroot (redundant); update FSF address at .c source file --- misc-utils/hardlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 3521cb3ea..e661e5f0c 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -12,13 +12,13 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public - License along with this program; see the file COPYING. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /* Changes by Rémy Card to use constants and add option -n. */ /* Changes by Jindrich Novy to add option -h, -f, replace mmap(2), fix overflows */ +/* Changes by Travers Carter to make atomic hardlinking */ #define _GNU_SOURCE #include -- cgit v1.2.3-55-g7522 From 92c79dc79e1154f15d6049961287e367fc43aab2 Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Thu, 16 Feb 2017 20:08:40 +0100 Subject: Fixed 32 bit build with gcc7 (RH Bugzilla ID 1422989) --- misc-utils/hardlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index e661e5f0c..16d8163e2 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -67,12 +67,12 @@ typedef struct _f { char name[0]; } f; -inline unsigned int hash(off_t size, time_t mtime) +__attribute__((always_inline)) inline unsigned int hash(off_t size, time_t mtime) { return (size ^ mtime) & (NHASH - 1); } -inline int stcmp(struct stat *st1, struct stat *st2, int content_only) +__attribute__((always_inline)) inline int stcmp(struct stat *st1, struct stat *st2, int content_only) { if (content_only) return st1->st_size != st2->st_size; @@ -112,7 +112,7 @@ void usage(char *prog) unsigned int buf[NBUF]; char iobuf1[NIOBUF], iobuf2[NIOBUF]; -inline size_t add2(size_t a, size_t b) +__attribute__((always_inline)) inline size_t add2(size_t a, size_t b) { size_t sum = a + b; if (sum < a) { @@ -122,7 +122,7 @@ inline size_t add2(size_t a, size_t b) return sum; } -inline size_t add3(size_t a, size_t b, size_t c) +__attribute__((always_inline)) inline size_t add3(size_t a, size_t b, size_t c) { return add2(add2(a, b), c); } -- cgit v1.2.3-55-g7522 From 551e8963f4ca7850a2d91bcb5ffb6032a690e93f Mon Sep 17 00:00:00 2001 From: Todd Lewis Date: Tue, 28 Mar 2017 15:16:56 -0400 Subject: exclude files via pcre --- misc-utils/hardlink.1 | 14 +++++++++++++- misc-utils/hardlink.c | 53 ++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 10 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index 04228f4bb..b8bfe9d22 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -3,7 +3,7 @@ hardlink \- Consolidate duplicate files via hardlinks .SH "SYNOPSIS" .PP -\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-vv\fP] [\fB-h\fP] directory1 [ directory2 ... ] +\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-vv\fP] [\fB-x pattern\fP] [\fB-h\fP] directory1 [ directory2 ... ] .SH "DESCRIPTION" .PP This manual page documents \fBhardlink\fP, a @@ -32,8 +32,14 @@ Do not perform the consolidation; only print what would be changed. Print summary after hardlinking. .IP "\fB-vv\fP" 10 Print every hardlinked file and bytes saved. Also print summary after hardlinking. +.IP "\fB-x pattern\fP" 10 +Exclude files and directories matching pattern from hardlinking. .IP "\fB-h\fP" 10 Show help. +.PP +The optional pattern for excluding files and directories must be a PCRE2 +compatible regular expression. Only the basename of the file or directory +is checked, not its path. Excluded directories' contents will not be examined. .SH "AUTHOR" .PP \fBhardlink\fP was written by Jakub Jelinek . @@ -48,3 +54,9 @@ it. If a directory tree does change, this may result in \fBhardlink\fP accessing files and/or directories outside of the intended directory tree. Thus, you must avoid running \fBhardlink\fP on potentially changing directory trees, and especially on directory trees under control of another user. +.PP +Historically \fBhardlink\fP silently excluded any names beginning with +".in.", as well as any names beginning with "." followed by exactly 6 +other characters. That prior behavior can be achieved by specifying +.br +-x '^(\.in\.|\.[^.]{6}$)' diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 16d8163e2..69f6a464c 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -21,6 +21,7 @@ /* Changes by Travers Carter to make atomic hardlinking */ #define _GNU_SOURCE +#define PCRE2_CODE_UNIT_WIDTH 8 #include #include #include @@ -31,12 +32,17 @@ #include #include #include +#include #define NHASH (1<<17) /* Must be a power of 2! */ #define NIOBUF (1<<12) #define NAMELEN 4096 #define NBUF 64 +pcre2_code *re; +PCRE2_SPTR exclude_pattern; +pcre2_match_data *match_data; + struct _f; typedef struct _h { struct _h *next; @@ -99,12 +105,13 @@ void doexit(int i) void usage(char *prog) { - fprintf (stderr, "Usage: %s [-cnvhf] directories...\n", prog); + fprintf (stderr, "Usage: %s [-cnvhf] [-x pat] directories...\n", prog); fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); fprintf (stderr, " -v Print summary after hardlinking.\n"); fprintf (stderr, " -vv Print every hardlinked file and bytes saved + summary.\n"); fprintf (stderr, " -f Force hardlinking across filesystems.\n"); + fprintf (stderr, " -x pat Exclude files matching pattern.\n"); fprintf (stderr, " -h Show help.\n"); exit(255); } @@ -328,8 +335,10 @@ int main(int argc, char **argv) { int ch; int i; + int errornumber; + PCRE2_SIZE erroroffset; dynstr nam1 = {NULL, 0}; - while ((ch = getopt (argc, argv, "cnvhf")) != -1) { + while ((ch = getopt (argc, argv, "cnvhfx:")) != -1) { switch (ch) { case 'n': no_link++; @@ -343,6 +352,9 @@ int main(int argc, char **argv) case 'f': force=1; break; + case 'x': + exclude_pattern = (PCRE2_SPTR)optarg; + break; case 'h': default: usage(argv[0]); @@ -350,6 +362,22 @@ int main(int argc, char **argv) } if (optind >= argc) usage(argv[0]); + if (exclude_pattern) { + re = pcre2_compile( + exclude_pattern, /* the pattern */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminate */ + 0, /* default options */ + &errornumber, + &erroroffset, + NULL); /* use default compile context */ + if (!re) { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); + fprintf(stderr, "pattern error at offset %d: %s\n", (int)erroroffset, buffer); + usage(argv[0]); + } + match_data = pcre2_match_data_create_from_pattern(re, NULL); + } for (i = optind; i < argc; i++) rf(argv[i]); while (dirs) { @@ -371,16 +399,23 @@ int main(int argc, char **argv) if (!di->d_name[0]) continue; if (di->d_name[0] == '.') { - char *q; - if (!di->d_name[1] || !strcmp (di->d_name, "..") || !strncmp (di->d_name, ".in.", 4)) + if (!di->d_name[1] || !strcmp(di->d_name, "..")) continue; - q = strrchr (di->d_name, '.'); - if (q && strlen (q) == 7 && q != di->d_name) { + } + if (re && pcre2_match( + re, /* compiled regex */ + (PCRE2_SPTR)di->d_name, + strlen(di->d_name), + 0, /* start at offset 0 */ + 0, /* default options */ + match_data, /* block for storing the result */ + NULL) /* use default match context */ + >= 0) { + if (verbose) { nam1.buf[nam1baselen] = 0; - if (verbose) - fprintf(stderr, "Skipping %s%s\n", nam1.buf, di->d_name); - continue; + fprintf(stderr,"Skipping %s%s\n", nam1.buf, di->d_name); } + continue; } { size_t subdirlen; -- cgit v1.2.3-55-g7522 From c64d7e60b19fefbbffbab72ecb89b7b4c652d04c Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Mon, 24 Apr 2017 00:06:03 +0200 Subject: Fixed version number, added changelog about Todd Lewis' patch --- misc-utils/hardlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 69f6a464c..8e74ca021 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -19,6 +19,7 @@ /* Changes by Rémy Card to use constants and add option -n. */ /* Changes by Jindrich Novy to add option -h, -f, replace mmap(2), fix overflows */ /* Changes by Travers Carter to make atomic hardlinking */ +/* Changes by Todd Lewis that adds option -x to exclude files with pcre lib */ #define _GNU_SOURCE #define PCRE2_CODE_UNIT_WIDTH 8 -- cgit v1.2.3-55-g7522 From 88824694f68b3da23954bf3bae5db8e915164111 Mon Sep 17 00:00:00 2001 From: utoddl Date: Mon, 14 Aug 2017 14:46:25 +0000 Subject: Update hardlink.1 The example regex in the BUGS section needs its backslashes backslashed.--- misc-utils/hardlink.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index b8bfe9d22..5aa022a1f 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -59,4 +59,4 @@ Historically \fBhardlink\fP silently excluded any names beginning with ".in.", as well as any names beginning with "." followed by exactly 6 other characters. That prior behavior can be achieved by specifying .br --x '^(\.in\.|\.[^.]{6}$)' +-x '^(\\.in\\.|\\.[^.]{6}$)' -- cgit v1.2.3-55-g7522 From 3b9498938911cabd10130adbcda797d3fe8a2763 Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Tue, 17 Oct 2017 23:21:36 +0200 Subject: temporal fix before re-patch (updates from Fedora repo) --- misc-utils/hardlink.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index 5aa022a1f..b8bfe9d22 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -59,4 +59,4 @@ Historically \fBhardlink\fP silently excluded any names beginning with ".in.", as well as any names beginning with "." followed by exactly 6 other characters. That prior behavior can be achieved by specifying .br --x '^(\\.in\\.|\\.[^.]{6}$)' +-x '^(\.in\.|\.[^.]{6}$)' -- cgit v1.2.3-55-g7522 From 5aad3c12f30819ceed1025e9d20e14bd47d5932b Mon Sep 17 00:00:00 2001 From: Kevin Fenzi Date: Sun, 17 Sep 2017 14:07:42 -0700 Subject: fixes for the fixes Signed-off-by: Francisco Javier Tsao Santín --- misc-utils/hardlink.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index b8bfe9d22..5aa022a1f 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -59,4 +59,4 @@ Historically \fBhardlink\fP silently excluded any names beginning with ".in.", as well as any names beginning with "." followed by exactly 6 other characters. That prior behavior can be achieved by specifying .br --x '^(\.in\.|\.[^.]{6}$)' +-x '^(\\.in\\.|\\.[^.]{6}$)' -- cgit v1.2.3-55-g7522