From 0ec20db824044c36412508310705f32e3503aa4c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 7 Jan 2005 05:58:05 +0000 Subject: auto-import hardlink-1.0-1.1 on branch devel from hardlink-1.0-1.1.src.rpm --- misc-utils/hardlink.c | 344 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 misc-utils/hardlink.c (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c new file mode 100644 index 000000000..ad2c2ffcf --- /dev/null +++ b/misc-utils/hardlink.c @@ -0,0 +1,344 @@ +/* Copyright (C) 2001 Red Hat, Inc. + + Written by Jakub Jelinek . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this program; see the file COPYING. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Changes by Rémy Card to use constants and add option -n. */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NHASH 131072 /* Must be a power of 2! */ +#define NAMELEN 4096 +#define NBUF 64 + +struct _f; +typedef struct _h { + struct _h *next; + struct _f *chain; + off_t size; + time_t mtime; +} h; + +typedef struct _d { + struct _d *next; + char name[0]; +} d; + +d *dirs; + +h *hps[NHASH]; + +int no_link = 0; +int verbose = 0; +int content_only = 0; + +typedef struct _f { + struct _f *next; + ino_t ino; + dev_t dev; + unsigned int cksum; + char name[0]; +} f; + +inline unsigned int hash(off_t size, time_t mtime) +{ + return (size ^ mtime) & (NHASH - 1); +} + +inline int stcmp(struct stat *st1, struct stat *st2, int content_only) +{ + if (content_only) + return st1->st_size != st2->st_size; + return st1->st_mode != st2->st_mode || st1->st_uid != st2->st_uid || + st1->st_gid != st2->st_gid || st1->st_size != st2->st_size || + st1->st_mtime != st2->st_mtime; +} + +long long ndirs, nobjects, nregfiles, nmmap, ncomp, nlinks, nsaved; + +void doexit(int i) +{ + if (verbose) { + fprintf(stderr, "\n\n"); + fprintf(stderr, "Directories %lld\n", ndirs); + fprintf(stderr, "Objects %lld\n", nobjects); + fprintf(stderr, "IFREG %lld\n", nregfiles); + fprintf(stderr, "Mmaps %lld\n", nmmap); + fprintf(stderr, "Comparisons %lld\n", ncomp); + fprintf(stderr, "%s %lld\n", (no_link ? "Would link" : "Linked"), nlinks); + fprintf(stderr, "%s %lld\n", (no_link ? "Would save" : "saved"), nsaved); + } + exit(i); +} + +void usage(prog) +{ + fprintf (stderr, "Usage: %s [-cnv] directories...\n", prog); + exit(255); +} + +unsigned int buf[NBUF]; +char nambuf1[NAMELEN], nambuf2[NAMELEN]; + +void rf (char *name) +{ + struct stat st, st2, st3; + nobjects++; + if (lstat (name, &st)) + return; + if (S_ISDIR (st.st_mode)) { + d * dp = malloc(sizeof(d) + 1 + strlen (name)); + if (!dp) { + fprintf(stderr, "\nOut of memory 3\n"); + doexit(3); + } + strcpy (dp->name, name); + dp->next = dirs; + dirs = dp; + } else if (S_ISREG (st.st_mode)) { + int fd, i; + f * fp, * fp2; + h * hp; + char *p, *q; + char *n1, *n2; + int cksumsize = sizeof(buf); + unsigned int cksum; + time_t mtime = content_only ? 0 : st.st_mtime; + unsigned int hsh = hash (st.st_size, mtime); + nregfiles++; + if (verbose > 1) + fprintf(stderr, " %s", name); + fd = open (name, O_RDONLY); + if (fd < 0) return; + if (st.st_size < sizeof(buf)) { + cksumsize = st.st_size; + memset (((char *)buf) + cksumsize, 0, (sizeof(buf) - cksumsize) % sizeof(buf[0])); + } + if (read (fd, buf, cksumsize) != cksumsize) { + close(fd); + if (verbose > 1) + fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + return; + } + cksumsize = (cksumsize + sizeof(buf[0]) - 1) / sizeof(buf[0]); + for (i = 0, cksum = 0; i < cksumsize; i++) { + if (cksum + buf[i] < cksum) + cksum += buf[i] + 1; + else + cksum += buf[i]; + } + for (hp = hps[hsh]; hp; hp = hp->next) + if (hp->size == st.st_size && hp->mtime == mtime) + break; + if (!hp) { + hp = malloc(sizeof(h)); + if (!hp) { + fprintf(stderr, "\nOut of memory 1\n"); + doexit(1); + } + hp->size = st.st_size; + hp->mtime = mtime; + hp->chain = NULL; + hp->next = hps[hsh]; + hps[hsh] = hp; + } + for (fp = hp->chain; fp; fp = fp->next) + if (fp->cksum == cksum) + break; + for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) + if (fp2->ino == st.st_ino && fp2->dev == st.st_dev) { + close(fd); + if (verbose > 1) + fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + return; + } + if (fp) { + p = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + nmmap++; + if (p == (void *)-1) { + close(fd); + fprintf(stderr, "\nFailed to mmap %s\n", name); + return; + } + } + for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) + if (!lstat (fp2->name, &st2) && S_ISREG (st2.st_mode) && + !stcmp (&st, &st2, content_only) && + st2.st_ino != st.st_ino && + st2.st_dev == st.st_dev) { + int fd2 = open (fp2->name, O_RDONLY); + if (fd2 < 0) continue; + if (fstat (fd2, &st2) || !S_ISREG (st2.st_mode)) { + close (fd2); + continue; + } + ncomp++; + q = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd2, 0); + if (q == (void *)-1) { + close(fd2); + fprintf(stderr, "\nFailed to mmap %s\n", fp2->name); + continue; + } + if (memcmp (p, q, st.st_size)) { + munmap (q, st.st_size); + close(fd2); + continue; + } + munmap (q, st.st_size); + close(fd2); + if (lstat (name, &st3)) { + fprintf(stderr, "\nCould not stat %s again\n", name); + munmap (p, st.st_size); + close(fd); + return; + } + st3.st_atime = st.st_atime; + if (stcmp (&st, &st3, 0)) { + fprintf(stderr, "\nFile %s changed underneath us\n", name); + munmap (p, st.st_size); + close(fd); + return; + } + n1 = fp2->name; + n2 = name; + if (!no_link) { + strcpy (stpcpy (nambuf2, n2), ".$$$___cleanit___$$$"); + if (rename (n2, nambuf2)) { + fprintf(stderr, "\nFailed to rename %s to %s\n", n2, nambuf2); + continue; + } + if (link (n1, n2)) { + fprintf(stderr, "\nFailed to hardlink %s to %s\n", n1, n2); + if (rename (nambuf2, n2)) { + fprintf(stderr, "\nBad bad - failed to rename back %s to %s\n", nambuf2, n2); + } + munmap (p, st.st_size); + close(fd); + return; + } + unlink (nambuf2); + } + nlinks++; + if (st3.st_nlink > 1) { + /* We actually did not save anything this time, since the link second argument + had some other links as well. */ + if (verbose > 1) + fprintf(stderr, "\r%*s\r%s %s to %s\n", (int)strlen(name)+2, "", (no_link ? "Would link" : "Linked"), n1, n2); + } else { + nsaved+=((st.st_size+4095)/4096)*4096; + if (verbose > 1) + fprintf(stderr, "\r%*s\r%s %s to %s, %s %ld\n", (int)strlen(name)+2, "", (no_link ? "Would link" : "Linked"), n1, n2, (no_link ? "would save" : "saved"), st.st_size); + } + munmap (p, st.st_size); + close(fd); + return; + } + if (fp) + munmap (p, st.st_size); + fp2 = malloc(sizeof(f) + 1 + strlen (name)); + if (!fp2) { + fprintf(stderr, "\nOut of memory 2\n"); + doexit(2); + } + close(fd); + fp2->ino = st.st_ino; + fp2->dev = st.st_dev; + fp2->cksum = cksum; + strcpy(fp2->name, name); + if (fp) { + fp2->next = fp->next; + fp->next = fp2; + } else { + fp2->next = hp->chain; + hp->chain = fp2; + } + if (verbose > 1) + fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + return; + } +} + +int main(int argc, char **argv) +{ + int ch; + int i; + char *p; + d * dp; + DIR *dh; + struct dirent *di; + while ((ch = getopt (argc, argv, "cnv")) != -1) { + switch (ch) { + case 'n': + no_link++; + break; + case 'v': + verbose++; + break; + case 'c': + content_only++; + break; + default: + usage(argv[0]); + } + } + if (optind >= argc) + usage(argv[0]); + for (i = optind; i < argc; i++) + rf(argv[i]); + while (dirs) { + dp = dirs; + dirs = dp->next; + strcpy (nambuf1, dp->name); + free (dp); + strcat (nambuf1, "/"); + p = strchr (nambuf1, 0); + dh = opendir (nambuf1); + if (dh == NULL) + continue; + ndirs++; + while ((di = readdir (dh)) != NULL) { + if (!di->d_name[0]) + continue; + if (di->d_name[0] == '.') { + char *q; + if (!di->d_name[1] || !strcmp (di->d_name, "..") || !strncmp (di->d_name, ".in.", 4)) + continue; + q = strrchr (di->d_name, '.'); + if (q && strlen (q) == 7 && q != di->d_name) { + *p = 0; + if (verbose) + fprintf(stderr, "Skipping %s%s\n", nambuf1, di->d_name); + continue; + } + } + strcpy (p, di->d_name); + rf(nambuf1); + } + closedir(dh); + } + doexit(0); + return 0; +} -- cgit v1.2.3-55-g7522 From 5edefc924240843c842f77020da0b7c418c1a50a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 9 Feb 2005 23:12:47 +0000 Subject: kill warning --- misc-utils/hardlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index ad2c2ffcf..52cb41f42 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -94,7 +94,7 @@ void doexit(int i) exit(i); } -void usage(prog) +void usage(char *prog) { fprintf (stderr, "Usage: %s [-cnv] directories...\n", prog); exit(255); -- cgit v1.2.3-55-g7522 From a16d7294a868f4dacb1482d49820f0226e50e949 Mon Sep 17 00:00:00 2001 From: Jeremy Katz Date: Wed, 27 Apr 2005 22:05:13 +0000 Subject: - don't try to hardlink 0 byte files (#154404) --- misc-utils/hardlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 52cb41f42..7a6cf6f52 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -175,7 +175,7 @@ void rf (char *name) fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); return; } - if (fp) { + if (fp && st.st_size > 0) { p = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); nmmap++; if (p == (void *)-1) { @@ -191,7 +191,7 @@ void rf (char *name) st2.st_dev == st.st_dev) { int fd2 = open (fp2->name, O_RDONLY); if (fd2 < 0) continue; - if (fstat (fd2, &st2) || !S_ISREG (st2.st_mode)) { + if (fstat (fd2, &st2) || !S_ISREG (st2.st_mode) || st2.st_size == 0) { close (fd2); continue; } -- cgit v1.2.3-55-g7522 From a810992a7215994d020897215946fbddad3d8e68 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 26 Aug 2005 04:42:38 +0000 Subject: Document hardlink command line options. (Ville Skytta) (#161738) --- misc-utils/hardlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 7a6cf6f52..1e102101f 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -97,6 +97,9 @@ void doexit(int i) void usage(char *prog) { fprintf (stderr, "Usage: %s [-cnv] directories...\n", prog); + fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); + fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); + fprintf (stderr, " -v Operate in verbose mode.\n"); exit(255); } -- cgit v1.2.3-55-g7522 From cabf1c1cb7409db30917e1d7e8546ec850e43c1d Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Mon, 7 Nov 2005 14:07:33 +0000 Subject: add -h option --- misc-utils/hardlink.1 | 39 +++++++++++++++++++++++++++++++++++++++ misc-utils/hardlink.c | 8 +++++--- 2 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 misc-utils/hardlink.1 (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 new file mode 100644 index 000000000..7ffc2b460 --- /dev/null +++ b/misc-utils/hardlink.1 @@ -0,0 +1,39 @@ +.TH "hardlink" "1" +.SH "NAME" +hardlink \- Consolidate duplicate files via hardlinks +.SH "SYNOPSIS" +.PP +\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-h\fP] directory1 [ directory2 ... ] +.SH "DESCRIPTION" +.PP +This manual page documents \fBhardlink\fP, a +program which consolidates duplicate files in one or more directories +using hardlinks. +.PP +\fBhardlink\fP traverses one +or more directories searching for duplicate files. When it finds duplicate +files, it uses one of them as the master. It then removes all other +duplicates and places a hardlink for each one pointing to the master file. +This allows for conservation of disk space where multiple directories +on a single filesystem contain many duplicate files. +.PP +Since hard links can only span a single filesystem, \fBhardlink\fP +is only useful when all directories specified are on the same filesystem. +.SH "OPTIONS" +.PP +.IP "\fB-c\fP" 10 +Compare only the contents of the files being considered for consolidation. +Disregards permission, ownership and other differences. +.IP "\fB-n\fP" 10 +Do not perform the consolidation; only print what would be changed. +.IP "\fB-v\fP" 10 +Enable verbose logging. +.IP "\fB-h\fP" 10 +Show help. +.SH "AUTHOR" +.PP +\fBhardlink\fP was written by Jakub Jelinek . +.PP +Man page written by Brian Long. +.PP +Man page updated by Jindrich Novy diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 1e102101f..fd511c894 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -96,10 +96,11 @@ void doexit(int i) void usage(char *prog) { - fprintf (stderr, "Usage: %s [-cnv] directories...\n", prog); + fprintf (stderr, "Usage: %s [-cnvh] directories...\n", prog); fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); fprintf (stderr, " -v Operate in verbose mode.\n"); + fprintf (stderr, " -h Show help.\n"); exit(255); } @@ -125,7 +126,7 @@ void rf (char *name) int fd, i; f * fp, * fp2; h * hp; - char *p, *q; + char *p = NULL, *q; char *n1, *n2; int cksumsize = sizeof(buf); unsigned int cksum; @@ -292,7 +293,7 @@ int main(int argc, char **argv) d * dp; DIR *dh; struct dirent *di; - while ((ch = getopt (argc, argv, "cnv")) != -1) { + while ((ch = getopt (argc, argv, "cnvh")) != -1) { switch (ch) { case 'n': no_link++; @@ -303,6 +304,7 @@ int main(int argc, char **argv) case 'c': content_only++; break; + case 'h': default: usage(argv[0]); } -- cgit v1.2.3-55-g7522 From b4ece7768f292758e99de782ba248d3b0a9751d7 Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Mon, 14 Nov 2005 08:31:25 +0000 Subject: mostly spec cleanup --- misc-utils/hardlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index fd511c894..4625f1ea3 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -17,7 +17,8 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* Changes by Rémy Card to use constants and add option -n. */ +/* Changes by Rémy Card to use constants and add option -n. */ +/* Changes by Jindrich Novy to add option -h. */ #define _GNU_SOURCE #include -- cgit v1.2.3-55-g7522 From a272f143e7daca35c3617ba02a9371dbe4b5a0ac Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Sun, 29 Oct 2006 07:19:35 +0000 Subject: - update docs to describe highest verbosity -vv option (#210816) - use dist Resolves: 210816 --- misc-utils/hardlink.1 | 6 ++++-- misc-utils/hardlink.c | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index 7ffc2b460..09fd7ac90 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -3,7 +3,7 @@ hardlink \- Consolidate duplicate files via hardlinks .SH "SYNOPSIS" .PP -\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-h\fP] directory1 [ directory2 ... ] +\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-vv\fP] [\fB-h\fP] directory1 [ directory2 ... ] .SH "DESCRIPTION" .PP This manual page documents \fBhardlink\fP, a @@ -27,7 +27,9 @@ Disregards permission, ownership and other differences. .IP "\fB-n\fP" 10 Do not perform the consolidation; only print what would be changed. .IP "\fB-v\fP" 10 -Enable verbose logging. +Print summary after hardlinking. +.IP "\fB-vv\fP" 10 +Print every hardlinked file and bytes saved. Also print summary after hardlinking. .IP "\fB-h\fP" 10 Show help. .SH "AUTHOR" diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 4625f1ea3..a07d90c16 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -100,7 +100,8 @@ void usage(char *prog) fprintf (stderr, "Usage: %s [-cnvh] directories...\n", prog); fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); - fprintf (stderr, " -v Operate in verbose mode.\n"); + fprintf (stderr, " -v Print summary after hardlinking.\n"); + fprintf (stderr, " -vv Print every hardlinked file and bytes saved + summary.\n"); fprintf (stderr, " -h Show help.\n"); exit(255); } -- cgit v1.2.3-55-g7522 From 0cd6b1d3539a25a8bd5513e3c7d9b092a5ca47d3 Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Thu, 3 Mar 2011 12:15:33 +0100 Subject: fix URL and remove mmap() (#676962, #672917) --- misc-utils/hardlink.c | 52 +++++++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 33 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index a07d90c16..225c71f45 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -18,7 +18,7 @@ Boston, MA 02111-1307, USA. */ /* Changes by Rémy Card to use constants and add option -n. */ -/* Changes by Jindrich Novy to add option -h. */ +/* Changes by Jindrich Novy to add option -h, replace mmap(2) */ #define _GNU_SOURCE #include @@ -31,7 +31,8 @@ #include #include -#define NHASH 131072 /* Must be a power of 2! */ +#define NHASH (1<<17) /* Must be a power of 2! */ +#define NIOBUF (1<<12) #define NAMELEN 4096 #define NBUF 64 @@ -78,7 +79,7 @@ inline int stcmp(struct stat *st1, struct stat *st2, int content_only) st1->st_mtime != st2->st_mtime; } -long long ndirs, nobjects, nregfiles, nmmap, ncomp, nlinks, nsaved; +long long ndirs, nobjects, nregfiles, ncomp, nlinks, nsaved; void doexit(int i) { @@ -87,7 +88,6 @@ void doexit(int i) fprintf(stderr, "Directories %lld\n", ndirs); fprintf(stderr, "Objects %lld\n", nobjects); fprintf(stderr, "IFREG %lld\n", nregfiles); - fprintf(stderr, "Mmaps %lld\n", nmmap); fprintf(stderr, "Comparisons %lld\n", ncomp); fprintf(stderr, "%s %lld\n", (no_link ? "Would link" : "Linked"), nlinks); fprintf(stderr, "%s %lld\n", (no_link ? "Would save" : "saved"), nsaved); @@ -107,6 +107,7 @@ void usage(char *prog) } unsigned int buf[NBUF]; +char iobuf1[NIOBUF], iobuf2[NIOBUF]; char nambuf1[NAMELEN], nambuf2[NAMELEN]; void rf (char *name) @@ -128,12 +129,12 @@ void rf (char *name) int fd, i; f * fp, * fp2; h * hp; - char *p = NULL, *q; char *n1, *n2; int cksumsize = sizeof(buf); unsigned int cksum; time_t mtime = content_only ? 0 : st.st_mtime; unsigned int hsh = hash (st.st_size, mtime); + off_t fsize; nregfiles++; if (verbose > 1) fprintf(stderr, " %s", name); @@ -181,15 +182,6 @@ void rf (char *name) fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); return; } - if (fp && st.st_size > 0) { - p = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); - nmmap++; - if (p == (void *)-1) { - close(fd); - fprintf(stderr, "\nFailed to mmap %s\n", name); - return; - } - } for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) if (!lstat (fp2->name, &st2) && S_ISREG (st2.st_mode) && !stcmp (&st, &st2, content_only) && @@ -202,29 +194,27 @@ void rf (char *name) continue; } ncomp++; - q = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd2, 0); - if (q == (void *)-1) { - close(fd2); - fprintf(stderr, "\nFailed to mmap %s\n", fp2->name); - continue; - } - if (memcmp (p, q, st.st_size)) { - munmap (q, st.st_size); - close(fd2); - continue; - } - munmap (q, st.st_size); - close(fd2); + lseek(fd, 0, SEEK_SET); + for (fsize = st.st_size; fsize > 0; fsize -= NIOBUF) { + off_t rsize = fsize >= NIOBUF ? NIOBUF : fsize; + if (read (fd, iobuf1, rsize) != rsize || read (fd2, iobuf2, rsize) != rsize) { + close(fd); + close(fd2); + fprintf(stderr, "\nReading error\n"); + return; + } + if (memcmp (iobuf1, iobuf2, rsize)) break; + } + close(fd2); + if (fsize > 0) continue; if (lstat (name, &st3)) { fprintf(stderr, "\nCould not stat %s again\n", name); - munmap (p, st.st_size); close(fd); return; } st3.st_atime = st.st_atime; if (stcmp (&st, &st3, 0)) { fprintf(stderr, "\nFile %s changed underneath us\n", name); - munmap (p, st.st_size); close(fd); return; } @@ -241,7 +231,6 @@ void rf (char *name) if (rename (nambuf2, n2)) { fprintf(stderr, "\nBad bad - failed to rename back %s to %s\n", nambuf2, n2); } - munmap (p, st.st_size); close(fd); return; } @@ -258,12 +247,9 @@ void rf (char *name) if (verbose > 1) fprintf(stderr, "\r%*s\r%s %s to %s, %s %ld\n", (int)strlen(name)+2, "", (no_link ? "Would link" : "Linked"), n1, n2, (no_link ? "would save" : "saved"), st.st_size); } - munmap (p, st.st_size); close(fd); return; } - if (fp) - munmap (p, st.st_size); fp2 = malloc(sizeof(f) + 1 + strlen (name)); if (!fp2) { fprintf(stderr, "\nOut of memory 2\n"); -- cgit v1.2.3-55-g7522 From 94b040b06ba018af0799e4117fe5c8dc99f70ef4 Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Fri, 21 Oct 2011 02:49:08 +0200 Subject: fix possible buffer overflows, integer overflows, update man page --- misc-utils/hardlink.1 | 13 ++++-- misc-utils/hardlink.c | 125 ++++++++++++++++++++++++++++++++++---------------- 2 files changed, 95 insertions(+), 43 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index 09fd7ac90..0590e8464 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -6,11 +6,11 @@ hardlink \- Consolidate duplicate files via hardlinks \fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-vv\fP] [\fB-h\fP] directory1 [ directory2 ... ] .SH "DESCRIPTION" .PP -This manual page documents \fBhardlink\fP, a +This manual page documents \fBhardlink\fP, a program which consolidates duplicate files in one or more directories using hardlinks. .PP -\fBhardlink\fP traverses one +\fBhardlink\fP traverses one or more directories searching for duplicate files. When it finds duplicate files, it uses one of them as the master. It then removes all other duplicates and places a hardlink for each one pointing to the master file. @@ -34,8 +34,15 @@ Print every hardlinked file and bytes saved. Also print summary after hardlinkin Show help. .SH "AUTHOR" .PP -\fBhardlink\fP was written by Jakub Jelinek . +\fBhardlink\fP was written by Jakub Jelinek . .PP Man page written by Brian Long. .PP Man page updated by Jindrich Novy +.SH "BUGS" +.PP +\fBhardlink\fP assumes that its target directory trees do not change from under +it. If a directory tree does change, this may result in \fBhardlink\fP +accessing files and/or directories outside of the intended directory tree. +Thus, you must avoid running \fBhardlink\fP on potentially changing directory +trees, and especially on directory trees under control of another user. diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 225c71f45..51a71cf97 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -1,24 +1,24 @@ /* Copyright (C) 2001 Red Hat, Inc. Written by Jakub Jelinek . - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - + /* Changes by Rémy Card to use constants and add option -n. */ -/* Changes by Jindrich Novy to add option -h, replace mmap(2) */ +/* Changes by Jindrich Novy to add option -h, replace mmap(2), fix overflows */ #define _GNU_SOURCE #include @@ -83,7 +83,7 @@ long long ndirs, nobjects, nregfiles, ncomp, nlinks, nsaved; void doexit(int i) { - if (verbose) { + if (verbose) { fprintf(stderr, "\n\n"); fprintf(stderr, "Directories %lld\n", ndirs); fprintf(stderr, "Objects %lld\n", nobjects); @@ -108,28 +108,59 @@ void usage(char *prog) unsigned int buf[NBUF]; char iobuf1[NIOBUF], iobuf2[NIOBUF]; -char nambuf1[NAMELEN], nambuf2[NAMELEN]; -void rf (char *name) +inline size_t add2(size_t a, size_t b) +{ + size_t sum = a + b; + if (sum < a) { + fprintf(stderr, "\nInteger overflow\n"); + doexit(5); + } + return sum; +} + +inline size_t add3(size_t a, size_t b, size_t c) +{ + return add2(add2(a, b), c); +} + +typedef struct { + char *buf; + size_t alloc; +} dynstr; + +void growstr(dynstr *str, size_t newlen) +{ + if (newlen < str->alloc) + return; + str->buf = realloc(str->buf, str->alloc = add2(newlen, 1)); + if (!str->buf) { + fprintf(stderr, "\nOut of memory 4\n"); + doexit(4); + } +} + +void rf (const char *name) { struct stat st, st2, st3; + const size_t namelen = strlen(name); nobjects++; if (lstat (name, &st)) return; if (S_ISDIR (st.st_mode)) { - d * dp = malloc(sizeof(d) + 1 + strlen (name)); + d * dp = malloc(add3(sizeof(d), namelen, 1)); if (!dp) { fprintf(stderr, "\nOut of memory 3\n"); doexit(3); } - strcpy (dp->name, name); + memcpy(dp->name, name, namelen + 1); dp->next = dirs; dirs = dp; } else if (S_ISREG (st.st_mode)) { int fd, i; f * fp, * fp2; h * hp; - char *n1, *n2; + const char *n1, *n2; int cksumsize = sizeof(buf); unsigned int cksum; time_t mtime = content_only ? 0 : st.st_mtime; @@ -146,8 +177,8 @@ void rf (char *name) } if (read (fd, buf, cksumsize) != cksumsize) { close(fd); - if (verbose > 1) - fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + if (verbose > 1 && namelen <= NAMELEN) + fprintf(stderr, "\r%*s\r", (int)(namelen + 2), ""); return; } cksumsize = (cksumsize + sizeof(buf[0]) - 1) / sizeof(buf[0]); @@ -178,8 +209,8 @@ void rf (char *name) for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) if (fp2->ino == st.st_ino && fp2->dev == st.st_dev) { close(fd); - if (verbose > 1) - fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + if (verbose > 1 && namelen <= NAMELEN) + fprintf(stderr, "\r%*s\r", (int)(namelen + 2), ""); return; } for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) @@ -221,36 +252,45 @@ void rf (char *name) n1 = fp2->name; n2 = name; if (!no_link) { - strcpy (stpcpy (nambuf2, n2), ".$$$___cleanit___$$$"); - if (rename (n2, nambuf2)) { - fprintf(stderr, "\nFailed to rename %s to %s\n", n2, nambuf2); + const char *suffix = ".$$$___cleanit___$$$"; + const size_t suffixlen = strlen(suffix); + size_t n2len = strlen(n2); + dynstr nam2 = {NULL, 0}; + growstr(&nam2, add2(n2len, suffixlen)); + memcpy(nam2.buf, n2, n2len); + memcpy(&nam2.buf[n2len], suffix, suffixlen + 1); + if (rename (n2, nam2.buf)) { + fprintf(stderr, "\nFailed to rename %s to %s\n", n2, nam2.buf); + free(nam2.buf); continue; } if (link (n1, n2)) { fprintf(stderr, "\nFailed to hardlink %s to %s\n", n1, n2); - if (rename (nambuf2, n2)) { - fprintf(stderr, "\nBad bad - failed to rename back %s to %s\n", nambuf2, n2); + if (rename (nam2.buf, n2)) { + fprintf(stderr, "\nBad bad - failed to rename back %s to %s\n", nam2.buf, n2); } close(fd); + free(nam2.buf); return; } - unlink (nambuf2); + unlink (nam2.buf); + free(nam2.buf); } nlinks++; if (st3.st_nlink > 1) { /* We actually did not save anything this time, since the link second argument had some other links as well. */ if (verbose > 1) - fprintf(stderr, "\r%*s\r%s %s to %s\n", (int)strlen(name)+2, "", (no_link ? "Would link" : "Linked"), n1, n2); + fprintf(stderr, "\r%*s\r%s %s to %s\n", (int)(((namelen > NAMELEN) ? 0 : namelen) + 2), "", (no_link ? "Would link" : "Linked"), n1, n2); } else { nsaved+=((st.st_size+4095)/4096)*4096; if (verbose > 1) - fprintf(stderr, "\r%*s\r%s %s to %s, %s %ld\n", (int)strlen(name)+2, "", (no_link ? "Would link" : "Linked"), n1, n2, (no_link ? "would save" : "saved"), st.st_size); + fprintf(stderr, "\r%*s\r%s %s to %s, %s %ld\n", (int)(((namelen > NAMELEN) ? 0 : namelen) + 2), "", (no_link ? "Would link" : "Linked"), n1, n2, (no_link ? "would save" : "saved"), st.st_size); } close(fd); return; } - fp2 = malloc(sizeof(f) + 1 + strlen (name)); + fp2 = malloc(add3(sizeof(f), namelen, 1)); if (!fp2) { fprintf(stderr, "\nOut of memory 2\n"); doexit(2); @@ -259,7 +299,7 @@ void rf (char *name) fp2->ino = st.st_ino; fp2->dev = st.st_dev; fp2->cksum = cksum; - strcpy(fp2->name, name); + memcpy(fp2->name, name, namelen + 1); if (fp) { fp2->next = fp->next; fp->next = fp2; @@ -267,8 +307,8 @@ void rf (char *name) fp2->next = hp->chain; hp->chain = fp2; } - if (verbose > 1) - fprintf(stderr, "\r%*s\r", (int)strlen(name)+2, ""); + if (verbose > 1 && namelen <= NAMELEN) + fprintf(stderr, "\r%*s\r", (int)(namelen + 2), ""); return; } } @@ -277,10 +317,7 @@ int main(int argc, char **argv) { int ch; int i; - char *p; - d * dp; - DIR *dh; - struct dirent *di; + dynstr nam1 = {NULL, 0}; while ((ch = getopt (argc, argv, "cnvh")) != -1) { switch (ch) { case 'n': @@ -302,13 +339,17 @@ int main(int argc, char **argv) for (i = optind; i < argc; i++) rf(argv[i]); while (dirs) { - dp = dirs; + DIR *dh; + struct dirent *di; + d * dp = dirs; + size_t nam1baselen = strlen(dp->name); dirs = dp->next; - strcpy (nambuf1, dp->name); + growstr(&nam1, add2(nam1baselen, 1)); + memcpy(nam1.buf, dp->name, nam1baselen); free (dp); - strcat (nambuf1, "/"); - p = strchr (nambuf1, 0); - dh = opendir (nambuf1); + nam1.buf[nam1baselen++] = '/'; + nam1.buf[nam1baselen] = 0; + dh = opendir (nam1.buf); if (dh == NULL) continue; ndirs++; @@ -321,14 +362,18 @@ int main(int argc, char **argv) continue; q = strrchr (di->d_name, '.'); if (q && strlen (q) == 7 && q != di->d_name) { - *p = 0; + nam1.buf[nam1baselen] = 0; if (verbose) - fprintf(stderr, "Skipping %s%s\n", nambuf1, di->d_name); + fprintf(stderr, "Skipping %s%s\n", nam1.buf, di->d_name); continue; } } - strcpy (p, di->d_name); - rf(nambuf1); + { + size_t subdirlen; + growstr(&nam1, add2(nam1baselen, subdirlen = strlen(di->d_name))); + memcpy(&nam1.buf[nam1baselen], di->d_name, add2(subdirlen, 1)); + } + rf(nam1.buf); } closedir(dh); } -- cgit v1.2.3-55-g7522 From c23b4a230531da663fc35a478754c641866cb6df Mon Sep 17 00:00:00 2001 From: Jindrich Novy Date: Sun, 15 Apr 2012 11:22:10 +0200 Subject: do not allow to hardlink files across filesystems by default (#786719) (use -f option to override) --- misc-utils/hardlink.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 51a71cf97..a7c72492d 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -18,7 +18,7 @@ Boston, MA 02111-1307, USA. */ /* Changes by Rémy Card to use constants and add option -n. */ -/* Changes by Jindrich Novy to add option -h, replace mmap(2), fix overflows */ +/* Changes by Jindrich Novy to add option -h, -f, replace mmap(2), fix overflows */ #define _GNU_SOURCE #include @@ -56,6 +56,7 @@ h *hps[NHASH]; int no_link = 0; int verbose = 0; int content_only = 0; +int force = 0; typedef struct _f { struct _f *next; @@ -97,11 +98,12 @@ void doexit(int i) void usage(char *prog) { - fprintf (stderr, "Usage: %s [-cnvh] directories...\n", prog); + fprintf (stderr, "Usage: %s [-cnvhf] directories...\n", prog); fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); fprintf (stderr, " -v Print summary after hardlinking.\n"); fprintf (stderr, " -vv Print every hardlinked file and bytes saved + summary.\n"); + fprintf (stderr, " -f Force hardlinking across filesystems.\n"); fprintf (stderr, " -h Show help.\n"); exit(255); } @@ -139,7 +141,7 @@ void growstr(dynstr *str, size_t newlen) doexit(4); } } - +dev_t dev = 0; void rf (const char *name) { struct stat st, st2, st3; @@ -147,6 +149,13 @@ void rf (const char *name) nobjects++; if (lstat (name, &st)) return; + if (st.st_dev != dev && !force) { + if (dev) { + fprintf(stderr, "%s is on different filesystem than the rest.\nUse -f option to override.\n", name); + doexit(6); + } + dev = st.st_dev; + } if (S_ISDIR (st.st_mode)) { d * dp = malloc(add3(sizeof(d), namelen, 1)); if (!dp) { @@ -318,7 +327,7 @@ int main(int argc, char **argv) int ch; int i; dynstr nam1 = {NULL, 0}; - while ((ch = getopt (argc, argv, "cnvh")) != -1) { + while ((ch = getopt (argc, argv, "cnvhf")) != -1) { switch (ch) { case 'n': no_link++; @@ -329,6 +338,9 @@ int main(int argc, char **argv) case 'c': content_only++; break; + case 'f': + force=1; + break; case 'h': default: usage(argv[0]); -- cgit v1.2.3-55-g7522 From 348af99a6296d9d15de10700594e1403d23a9a37 Mon Sep 17 00:00:00 2001 From: Jan Zeleny Date: Wed, 10 Apr 2013 14:47:17 +0200 Subject: Mention -f option in the man page --- misc-utils/hardlink.1 | 2 ++ 1 file changed, 2 insertions(+) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index 0590e8464..04228f4bb 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -24,6 +24,8 @@ is only useful when all directories specified are on the same filesystem. .IP "\fB-c\fP" 10 Compare only the contents of the files being considered for consolidation. Disregards permission, ownership and other differences. +.IP "\fB-f\fP" 10 +Force hardlinking across file systems. .IP "\fB-n\fP" 10 Do not perform the consolidation; only print what would be changed. .IP "\fB-v\fP" 10 -- cgit v1.2.3-55-g7522 From c11af66f0fce8ded25f92c8dde294afb2a64b735 Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Sun, 10 Jul 2016 03:57:43 +0200 Subject: spec file reflects the atomic hardlinking patch; removed cleaning buildroot (redundant); current FSF address at .c source file --- misc-utils/hardlink.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index a7c72492d..ef042deba 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -12,10 +12,9 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public - License along with this program; see the file COPYING. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /* Changes by Rémy Card to use constants and add option -n. */ /* Changes by Jindrich Novy to add option -h, -f, replace mmap(2), fix overflows */ -- cgit v1.2.3-55-g7522 From cbb0524c7c8b4aa62d1dc2fd07c7392ede7cfc5f Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Sun, 10 Jul 2016 04:03:41 +0200 Subject: Revert "spec file reflects the atomic hardlinking patch; removed cleaning buildroot (redundant); current FSF address at .c source file" This reverts commit bb9e76ae339794c2243ae294207942b7ea278364. --- misc-utils/hardlink.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index ef042deba..3521cb3ea 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -12,9 +12,10 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ + You should have received a copy of the GNU General Public + License along with this program; see the file COPYING. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ /* Changes by Rémy Card to use constants and add option -n. */ /* Changes by Jindrich Novy to add option -h, -f, replace mmap(2), fix overflows */ @@ -29,6 +30,7 @@ #include #include #include +#include #define NHASH (1<<17) /* Must be a power of 2! */ #define NIOBUF (1<<12) @@ -267,21 +269,22 @@ void rf (const char *name) growstr(&nam2, add2(n2len, suffixlen)); memcpy(nam2.buf, n2, n2len); memcpy(&nam2.buf[n2len], suffix, suffixlen + 1); - if (rename (n2, nam2.buf)) { - fprintf(stderr, "\nFailed to rename %s to %s\n", n2, nam2.buf); + /* First create a temporary link to n1 under a new name */ + if (link(n1, nam2.buf)) { + fprintf(stderr, "\nFailed to hardlink %s to %s (create temporary link as %s failed - %s)\n", n1, n2, nam2.buf, strerror(errno)); free(nam2.buf); continue; } - if (link (n1, n2)) { - fprintf(stderr, "\nFailed to hardlink %s to %s\n", n1, n2); - if (rename (nam2.buf, n2)) { - fprintf(stderr, "\nBad bad - failed to rename back %s to %s\n", nam2.buf, n2); + /* Then rename into place over the existing n2 */ + if (rename (nam2.buf, n2)) { + fprintf(stderr, "\nFailed to hardlink %s to %s (rename temporary link to %s failed - %s)\n", n1, n2, n2, strerror(errno)); + /* Something went wrong, try to remove the now redundant temporary link */ + if (unlink(nam2.buf)) { + fprintf(stderr, "\nFailed to remove temporary link %s - %s\n", nam2.buf, strerror(errno)); } - close(fd); free(nam2.buf); - return; + continue; } - unlink (nam2.buf); free(nam2.buf); } nlinks++; -- cgit v1.2.3-55-g7522 From 4d072ba7155358ac50531bda512442f514b37e37 Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Sun, 10 Jul 2016 04:12:06 +0200 Subject: spec file reflects the atomic hardlinking patch; removed cleaning buildroot (redundant); update FSF address at .c source file --- misc-utils/hardlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 3521cb3ea..e661e5f0c 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -12,13 +12,13 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public - License along with this program; see the file COPYING. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /* Changes by Rémy Card to use constants and add option -n. */ /* Changes by Jindrich Novy to add option -h, -f, replace mmap(2), fix overflows */ +/* Changes by Travers Carter to make atomic hardlinking */ #define _GNU_SOURCE #include -- cgit v1.2.3-55-g7522 From 92c79dc79e1154f15d6049961287e367fc43aab2 Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Thu, 16 Feb 2017 20:08:40 +0100 Subject: Fixed 32 bit build with gcc7 (RH Bugzilla ID 1422989) --- misc-utils/hardlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index e661e5f0c..16d8163e2 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -67,12 +67,12 @@ typedef struct _f { char name[0]; } f; -inline unsigned int hash(off_t size, time_t mtime) +__attribute__((always_inline)) inline unsigned int hash(off_t size, time_t mtime) { return (size ^ mtime) & (NHASH - 1); } -inline int stcmp(struct stat *st1, struct stat *st2, int content_only) +__attribute__((always_inline)) inline int stcmp(struct stat *st1, struct stat *st2, int content_only) { if (content_only) return st1->st_size != st2->st_size; @@ -112,7 +112,7 @@ void usage(char *prog) unsigned int buf[NBUF]; char iobuf1[NIOBUF], iobuf2[NIOBUF]; -inline size_t add2(size_t a, size_t b) +__attribute__((always_inline)) inline size_t add2(size_t a, size_t b) { size_t sum = a + b; if (sum < a) { @@ -122,7 +122,7 @@ inline size_t add2(size_t a, size_t b) return sum; } -inline size_t add3(size_t a, size_t b, size_t c) +__attribute__((always_inline)) inline size_t add3(size_t a, size_t b, size_t c) { return add2(add2(a, b), c); } -- cgit v1.2.3-55-g7522 From 551e8963f4ca7850a2d91bcb5ffb6032a690e93f Mon Sep 17 00:00:00 2001 From: Todd Lewis Date: Tue, 28 Mar 2017 15:16:56 -0400 Subject: exclude files via pcre --- misc-utils/hardlink.1 | 14 +++++++++++++- misc-utils/hardlink.c | 53 ++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 10 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index 04228f4bb..b8bfe9d22 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -3,7 +3,7 @@ hardlink \- Consolidate duplicate files via hardlinks .SH "SYNOPSIS" .PP -\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-vv\fP] [\fB-h\fP] directory1 [ directory2 ... ] +\fBhardlink\fP [\fB-c\fP] [\fB-n\fP] [\fB-v\fP] [\fB-vv\fP] [\fB-x pattern\fP] [\fB-h\fP] directory1 [ directory2 ... ] .SH "DESCRIPTION" .PP This manual page documents \fBhardlink\fP, a @@ -32,8 +32,14 @@ Do not perform the consolidation; only print what would be changed. Print summary after hardlinking. .IP "\fB-vv\fP" 10 Print every hardlinked file and bytes saved. Also print summary after hardlinking. +.IP "\fB-x pattern\fP" 10 +Exclude files and directories matching pattern from hardlinking. .IP "\fB-h\fP" 10 Show help. +.PP +The optional pattern for excluding files and directories must be a PCRE2 +compatible regular expression. Only the basename of the file or directory +is checked, not its path. Excluded directories' contents will not be examined. .SH "AUTHOR" .PP \fBhardlink\fP was written by Jakub Jelinek . @@ -48,3 +54,9 @@ it. If a directory tree does change, this may result in \fBhardlink\fP accessing files and/or directories outside of the intended directory tree. Thus, you must avoid running \fBhardlink\fP on potentially changing directory trees, and especially on directory trees under control of another user. +.PP +Historically \fBhardlink\fP silently excluded any names beginning with +".in.", as well as any names beginning with "." followed by exactly 6 +other characters. That prior behavior can be achieved by specifying +.br +-x '^(\.in\.|\.[^.]{6}$)' diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 16d8163e2..69f6a464c 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -21,6 +21,7 @@ /* Changes by Travers Carter to make atomic hardlinking */ #define _GNU_SOURCE +#define PCRE2_CODE_UNIT_WIDTH 8 #include #include #include @@ -31,12 +32,17 @@ #include #include #include +#include #define NHASH (1<<17) /* Must be a power of 2! */ #define NIOBUF (1<<12) #define NAMELEN 4096 #define NBUF 64 +pcre2_code *re; +PCRE2_SPTR exclude_pattern; +pcre2_match_data *match_data; + struct _f; typedef struct _h { struct _h *next; @@ -99,12 +105,13 @@ void doexit(int i) void usage(char *prog) { - fprintf (stderr, "Usage: %s [-cnvhf] directories...\n", prog); + fprintf (stderr, "Usage: %s [-cnvhf] [-x pat] directories...\n", prog); fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); fprintf (stderr, " -v Print summary after hardlinking.\n"); fprintf (stderr, " -vv Print every hardlinked file and bytes saved + summary.\n"); fprintf (stderr, " -f Force hardlinking across filesystems.\n"); + fprintf (stderr, " -x pat Exclude files matching pattern.\n"); fprintf (stderr, " -h Show help.\n"); exit(255); } @@ -328,8 +335,10 @@ int main(int argc, char **argv) { int ch; int i; + int errornumber; + PCRE2_SIZE erroroffset; dynstr nam1 = {NULL, 0}; - while ((ch = getopt (argc, argv, "cnvhf")) != -1) { + while ((ch = getopt (argc, argv, "cnvhfx:")) != -1) { switch (ch) { case 'n': no_link++; @@ -343,6 +352,9 @@ int main(int argc, char **argv) case 'f': force=1; break; + case 'x': + exclude_pattern = (PCRE2_SPTR)optarg; + break; case 'h': default: usage(argv[0]); @@ -350,6 +362,22 @@ int main(int argc, char **argv) } if (optind >= argc) usage(argv[0]); + if (exclude_pattern) { + re = pcre2_compile( + exclude_pattern, /* the pattern */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminate */ + 0, /* default options */ + &errornumber, + &erroroffset, + NULL); /* use default compile context */ + if (!re) { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); + fprintf(stderr, "pattern error at offset %d: %s\n", (int)erroroffset, buffer); + usage(argv[0]); + } + match_data = pcre2_match_data_create_from_pattern(re, NULL); + } for (i = optind; i < argc; i++) rf(argv[i]); while (dirs) { @@ -371,16 +399,23 @@ int main(int argc, char **argv) if (!di->d_name[0]) continue; if (di->d_name[0] == '.') { - char *q; - if (!di->d_name[1] || !strcmp (di->d_name, "..") || !strncmp (di->d_name, ".in.", 4)) + if (!di->d_name[1] || !strcmp(di->d_name, "..")) continue; - q = strrchr (di->d_name, '.'); - if (q && strlen (q) == 7 && q != di->d_name) { + } + if (re && pcre2_match( + re, /* compiled regex */ + (PCRE2_SPTR)di->d_name, + strlen(di->d_name), + 0, /* start at offset 0 */ + 0, /* default options */ + match_data, /* block for storing the result */ + NULL) /* use default match context */ + >= 0) { + if (verbose) { nam1.buf[nam1baselen] = 0; - if (verbose) - fprintf(stderr, "Skipping %s%s\n", nam1.buf, di->d_name); - continue; + fprintf(stderr,"Skipping %s%s\n", nam1.buf, di->d_name); } + continue; } { size_t subdirlen; -- cgit v1.2.3-55-g7522 From c64d7e60b19fefbbffbab72ecb89b7b4c652d04c Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Mon, 24 Apr 2017 00:06:03 +0200 Subject: Fixed version number, added changelog about Todd Lewis' patch --- misc-utils/hardlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 69f6a464c..8e74ca021 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -19,6 +19,7 @@ /* Changes by Rémy Card to use constants and add option -n. */ /* Changes by Jindrich Novy to add option -h, -f, replace mmap(2), fix overflows */ /* Changes by Travers Carter to make atomic hardlinking */ +/* Changes by Todd Lewis that adds option -x to exclude files with pcre lib */ #define _GNU_SOURCE #define PCRE2_CODE_UNIT_WIDTH 8 -- cgit v1.2.3-55-g7522 From 88824694f68b3da23954bf3bae5db8e915164111 Mon Sep 17 00:00:00 2001 From: utoddl Date: Mon, 14 Aug 2017 14:46:25 +0000 Subject: Update hardlink.1 The example regex in the BUGS section needs its backslashes backslashed.--- misc-utils/hardlink.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index b8bfe9d22..5aa022a1f 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -59,4 +59,4 @@ Historically \fBhardlink\fP silently excluded any names beginning with ".in.", as well as any names beginning with "." followed by exactly 6 other characters. That prior behavior can be achieved by specifying .br --x '^(\.in\.|\.[^.]{6}$)' +-x '^(\\.in\\.|\\.[^.]{6}$)' -- cgit v1.2.3-55-g7522 From 3b9498938911cabd10130adbcda797d3fe8a2763 Mon Sep 17 00:00:00 2001 From: Francisco Javier Tsao Santín Date: Tue, 17 Oct 2017 23:21:36 +0200 Subject: temporal fix before re-patch (updates from Fedora repo) --- misc-utils/hardlink.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index 5aa022a1f..b8bfe9d22 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -59,4 +59,4 @@ Historically \fBhardlink\fP silently excluded any names beginning with ".in.", as well as any names beginning with "." followed by exactly 6 other characters. That prior behavior can be achieved by specifying .br --x '^(\\.in\\.|\\.[^.]{6}$)' +-x '^(\.in\.|\.[^.]{6}$)' -- cgit v1.2.3-55-g7522 From 5aad3c12f30819ceed1025e9d20e14bd47d5932b Mon Sep 17 00:00:00 2001 From: Kevin Fenzi Date: Sun, 17 Sep 2017 14:07:42 -0700 Subject: fixes for the fixes Signed-off-by: Francisco Javier Tsao Santín --- misc-utils/hardlink.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.1 b/misc-utils/hardlink.1 index b8bfe9d22..5aa022a1f 100644 --- a/misc-utils/hardlink.1 +++ b/misc-utils/hardlink.1 @@ -59,4 +59,4 @@ Historically \fBhardlink\fP silently excluded any names beginning with ".in.", as well as any names beginning with "." followed by exactly 6 other characters. That prior behavior can be achieved by specifying .br --x '^(\.in\.|\.[^.]{6}$)' +-x '^(\\.in\\.|\\.[^.]{6}$)' -- cgit v1.2.3-55-g7522 From 04ae85a7e57691983c25f0917cfb8ae4dcb462d5 Mon Sep 17 00:00:00 2001 From: Ruediger Meier Date: Tue, 12 Jun 2018 14:04:06 +0200 Subject: hardlink: enable build with and without pcre2 Signed-off-by: Ruediger Meier --- .gitignore | 1 + configure.ac | 12 ++++++++++++ misc-utils/Makemodule.am | 12 ++++++++++++ misc-utils/hardlink.c | 19 +++++++++++++++++-- 4 files changed, 42 insertions(+), 2 deletions(-) (limited to 'misc-utils') diff --git a/.gitignore b/.gitignore index 9f2fb17db..b582a8627 100644 --- a/.gitignore +++ b/.gitignore @@ -96,6 +96,7 @@ ylwrap /fsfreeze /fstrim /getopt +/hardlink /hexdump /hwclock /ionice diff --git a/configure.ac b/configure.ac index b3430dac4..260283cf2 100644 --- a/configure.ac +++ b/configure.ac @@ -1372,6 +1372,18 @@ UL_REQUIRES_HAVE([setpriv], [linux_securebits_h], [securebits.h header file]) UL_REQUIRES_HAVE([setpriv], [cap_ng], [libcap-ng library]) AM_CONDITIONAL([BUILD_SETPRIV], [test "x$build_setpriv" = xyes]) +PKG_CHECK_MODULES([PCRE], [libpcre2-8], [have_pcre=yes], [have_pcre=no]) +AS_IF([test "x$have_pcre" = xyes ], [ + AC_DEFINE([HAVE_PCRE], [1], [Define if libpcre2 is available]) +]) +AM_CONDITIONAL([HAVE_PCRE], [test "x$have_pcre" = xyes]) + +AC_ARG_ENABLE([hardlink], + AS_HELP_STRING([--disable-hardlink], [do not build hardlink]), + [], [UL_DEFAULT_ENABLE([hardlink], [check])] +) +UL_BUILD_INIT([hardlink]) +AM_CONDITIONAL([BUILD_HARDLINK], [test "x$build_hardlink" = xyes]) AC_ARG_ENABLE([eject], AS_HELP_STRING([--disable-eject], [do not build eject]), diff --git a/misc-utils/Makemodule.am b/misc-utils/Makemodule.am index 36195b7a3..30b7c2f0f 100644 --- a/misc-utils/Makemodule.am +++ b/misc-utils/Makemodule.am @@ -211,3 +211,15 @@ fincore_SOURCES = misc-utils/fincore.c fincore_LDADD = $(LDADD) libsmartcols.la libcommon.la fincore_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) endif + +if BUILD_HARDLINK +usrbin_exec_PROGRAMS += hardlink +hardlink_SOURCES = misc-utils/hardlink.c +hardlink_LDADD = $(LDADD) libcommon.la +hardlink_CFLAGS = $(AM_CFLAGS) +if HAVE_PCRE +hardlink_LDADD += $(PCRE_LIBS) +hardlink_CFLAGS += $(PCRE_CFLAGS) +endif +dist_man_MANS += misc-utils/hardlink.1 +endif diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 8e74ca021..ba519993a 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -22,7 +22,6 @@ /* Changes by Todd Lewis that adds option -x to exclude files with pcre lib */ #define _GNU_SOURCE -#define PCRE2_CODE_UNIT_WIDTH 8 #include #include #include @@ -33,16 +32,21 @@ #include #include #include -#include +#ifdef HAVE_PCRE +# define PCRE2_CODE_UNIT_WIDTH 8 +# include +#endif #define NHASH (1<<17) /* Must be a power of 2! */ #define NIOBUF (1<<12) #define NAMELEN 4096 #define NBUF 64 +#ifdef HAVE_PCRE pcre2_code *re; PCRE2_SPTR exclude_pattern; pcre2_match_data *match_data; +#endif struct _f; typedef struct _h { @@ -336,8 +340,10 @@ int main(int argc, char **argv) { int ch; int i; +#ifdef HAVE_PCRE int errornumber; PCRE2_SIZE erroroffset; +#endif dynstr nam1 = {NULL, 0}; while ((ch = getopt (argc, argv, "cnvhfx:")) != -1) { switch (ch) { @@ -354,7 +360,12 @@ int main(int argc, char **argv) force=1; break; case 'x': +#ifdef HAVE_PCRE exclude_pattern = (PCRE2_SPTR)optarg; +#else + fprintf(stderr, "option x not supported (built without pcre2)\n"); + exit(1); +#endif break; case 'h': default: @@ -363,6 +374,7 @@ int main(int argc, char **argv) } if (optind >= argc) usage(argv[0]); +#ifdef HAVE_PCRE if (exclude_pattern) { re = pcre2_compile( exclude_pattern, /* the pattern */ @@ -379,6 +391,7 @@ int main(int argc, char **argv) } match_data = pcre2_match_data_create_from_pattern(re, NULL); } +#endif for (i = optind; i < argc; i++) rf(argv[i]); while (dirs) { @@ -403,6 +416,7 @@ int main(int argc, char **argv) if (!di->d_name[1] || !strcmp(di->d_name, "..")) continue; } +#ifdef HAVE_PCRE if (re && pcre2_match( re, /* compiled regex */ (PCRE2_SPTR)di->d_name, @@ -418,6 +432,7 @@ int main(int argc, char **argv) } continue; } +#endif { size_t subdirlen; growstr(&nam1, add2(nam1baselen, subdirlen = strlen(di->d_name))); -- cgit v1.2.3-55-g7522 From 55c000e1ebb95ebb222206da692ceb77d0ba8409 Mon Sep 17 00:00:00 2001 From: Ruediger Meier Date: Tue, 12 Jun 2018 19:32:33 +0200 Subject: hardlink: style indentations and license header Signed-off-by: Ruediger Meier --- misc-utils/hardlink.c | 769 +++++++++++++++++++++++++++----------------------- 1 file changed, 413 insertions(+), 356 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index ba519993a..335c6c452 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -1,25 +1,23 @@ -/* Copyright (C) 2001 Red Hat, Inc. - - Written by Jakub Jelinek . - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ - -/* Changes by Rémy Card to use constants and add option -n. */ -/* Changes by Jindrich Novy to add option -h, -f, replace mmap(2), fix overflows */ -/* Changes by Travers Carter to make atomic hardlinking */ -/* Changes by Todd Lewis that adds option -x to exclude files with pcre lib */ +/* + * hardlink - consolidate duplicate files via hardlinks + * + * Copyright (C) 2018 Red Hat, Inc. All rights reserved. + * Written by Jakub Jelinek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ #define _GNU_SOURCE #include @@ -37,10 +35,10 @@ # include #endif -#define NHASH (1<<17) /* Must be a power of 2! */ -#define NIOBUF (1<<12) -#define NAMELEN 4096 -#define NBUF 64 +#define NHASH (1<<17) /* Must be a power of 2! */ +#define NIOBUF (1<<12) +#define NAMELEN 4096 +#define NBUF 64 #ifdef HAVE_PCRE pcre2_code *re; @@ -50,15 +48,15 @@ pcre2_match_data *match_data; struct _f; typedef struct _h { - struct _h *next; - struct _f *chain; - off_t size; - time_t mtime; + struct _h *next; + struct _f *chain; + off_t size; + time_t mtime; } h; typedef struct _d { - struct _d *next; - char name[0]; + struct _d *next; + char name[0]; } d; d *dirs; @@ -71,377 +69,436 @@ int content_only = 0; int force = 0; typedef struct _f { - struct _f *next; - ino_t ino; - dev_t dev; - unsigned int cksum; - char name[0]; + struct _f *next; + ino_t ino; + dev_t dev; + unsigned int cksum; + char name[0]; } f; -__attribute__((always_inline)) inline unsigned int hash(off_t size, time_t mtime) +__attribute__ ((always_inline)) +inline unsigned int hash(off_t size, time_t mtime) { - return (size ^ mtime) & (NHASH - 1); + return (size ^ mtime) & (NHASH - 1); } -__attribute__((always_inline)) inline int stcmp(struct stat *st1, struct stat *st2, int content_only) +__attribute__ ((always_inline)) +inline int stcmp(struct stat *st1, struct stat *st2, int content_only) { - if (content_only) - return st1->st_size != st2->st_size; - return st1->st_mode != st2->st_mode || st1->st_uid != st2->st_uid || - st1->st_gid != st2->st_gid || st1->st_size != st2->st_size || - st1->st_mtime != st2->st_mtime; + if (content_only) + return st1->st_size != st2->st_size; + return st1->st_mode != st2->st_mode || st1->st_uid != st2->st_uid || + st1->st_gid != st2->st_gid || st1->st_size != st2->st_size || + st1->st_mtime != st2->st_mtime; } long long ndirs, nobjects, nregfiles, ncomp, nlinks, nsaved; void doexit(int i) { - if (verbose) { - fprintf(stderr, "\n\n"); - fprintf(stderr, "Directories %lld\n", ndirs); - fprintf(stderr, "Objects %lld\n", nobjects); - fprintf(stderr, "IFREG %lld\n", nregfiles); - fprintf(stderr, "Comparisons %lld\n", ncomp); - fprintf(stderr, "%s %lld\n", (no_link ? "Would link" : "Linked"), nlinks); - fprintf(stderr, "%s %lld\n", (no_link ? "Would save" : "saved"), nsaved); - } - exit(i); + if (verbose) { + fprintf(stderr, "\n\n"); + fprintf(stderr, "Directories %lld\n", ndirs); + fprintf(stderr, "Objects %lld\n", nobjects); + fprintf(stderr, "IFREG %lld\n", nregfiles); + fprintf(stderr, "Comparisons %lld\n", ncomp); + fprintf(stderr, "%s %lld\n", + (no_link ? "Would link" : "Linked"), nlinks); + fprintf(stderr, "%s %lld\n", (no_link ? "Would save" : "saved"), + nsaved); + } + exit(i); } void usage(char *prog) { - fprintf (stderr, "Usage: %s [-cnvhf] [-x pat] directories...\n", prog); - fprintf (stderr, " -c When finding candidates for linking, compare only file contents.\n"); - fprintf (stderr, " -n Don't actually link anything, just report what would be done.\n"); - fprintf (stderr, " -v Print summary after hardlinking.\n"); - fprintf (stderr, " -vv Print every hardlinked file and bytes saved + summary.\n"); - fprintf (stderr, " -f Force hardlinking across filesystems.\n"); - fprintf (stderr, " -x pat Exclude files matching pattern.\n"); - fprintf (stderr, " -h Show help.\n"); - exit(255); + fprintf(stderr, "Usage: %s [-cnvhf] [-x pat] directories...\n", prog); + fprintf(stderr, + " -c When finding candidates for linking, compare only file contents.\n"); + fprintf(stderr, + " -n Don't actually link anything, just report what would be done.\n"); + fprintf(stderr, " -v Print summary after hardlinking.\n"); + fprintf(stderr, + " -vv Print every hardlinked file and bytes saved + summary.\n"); + fprintf(stderr, " -f Force hardlinking across filesystems.\n"); + fprintf(stderr, " -x pat Exclude files matching pattern.\n"); + fprintf(stderr, " -h Show help.\n"); + exit(255); } unsigned int buf[NBUF]; char iobuf1[NIOBUF], iobuf2[NIOBUF]; -__attribute__((always_inline)) inline size_t add2(size_t a, size_t b) +__attribute__ ((always_inline)) +inline size_t add2(size_t a, size_t b) { - size_t sum = a + b; - if (sum < a) { - fprintf(stderr, "\nInteger overflow\n"); - doexit(5); - } - return sum; + size_t sum = a + b; + if (sum < a) { + fprintf(stderr, "\nInteger overflow\n"); + doexit(5); + } + return sum; } -__attribute__((always_inline)) inline size_t add3(size_t a, size_t b, size_t c) +__attribute__ ((always_inline)) +inline size_t add3(size_t a, size_t b, size_t c) { - return add2(add2(a, b), c); + return add2(add2(a, b), c); } typedef struct { - char *buf; - size_t alloc; + char *buf; + size_t alloc; } dynstr; -void growstr(dynstr *str, size_t newlen) +void growstr(dynstr * str, size_t newlen) { - if (newlen < str->alloc) - return; - str->buf = realloc(str->buf, str->alloc = add2(newlen, 1)); - if (!str->buf) { - fprintf(stderr, "\nOut of memory 4\n"); - doexit(4); - } + if (newlen < str->alloc) + return; + str->buf = realloc(str->buf, str->alloc = add2(newlen, 1)); + if (!str->buf) { + fprintf(stderr, "\nOut of memory 4\n"); + doexit(4); + } } + dev_t dev = 0; -void rf (const char *name) +void rf(const char *name) { - struct stat st, st2, st3; - const size_t namelen = strlen(name); - nobjects++; - if (lstat (name, &st)) - return; - if (st.st_dev != dev && !force) { - if (dev) { - fprintf(stderr, "%s is on different filesystem than the rest.\nUse -f option to override.\n", name); - doexit(6); - } - dev = st.st_dev; - } - if (S_ISDIR (st.st_mode)) { - d * dp = malloc(add3(sizeof(d), namelen, 1)); - if (!dp) { - fprintf(stderr, "\nOut of memory 3\n"); - doexit(3); - } - memcpy(dp->name, name, namelen + 1); - dp->next = dirs; - dirs = dp; - } else if (S_ISREG (st.st_mode)) { - int fd, i; - f * fp, * fp2; - h * hp; - const char *n1, *n2; - int cksumsize = sizeof(buf); - unsigned int cksum; - time_t mtime = content_only ? 0 : st.st_mtime; - unsigned int hsh = hash (st.st_size, mtime); - off_t fsize; - nregfiles++; - if (verbose > 1) - fprintf(stderr, " %s", name); - fd = open (name, O_RDONLY); - if (fd < 0) return; - if (st.st_size < sizeof(buf)) { - cksumsize = st.st_size; - memset (((char *)buf) + cksumsize, 0, (sizeof(buf) - cksumsize) % sizeof(buf[0])); - } - if (read (fd, buf, cksumsize) != cksumsize) { - close(fd); - if (verbose > 1 && namelen <= NAMELEN) - fprintf(stderr, "\r%*s\r", (int)(namelen + 2), ""); - return; - } - cksumsize = (cksumsize + sizeof(buf[0]) - 1) / sizeof(buf[0]); - for (i = 0, cksum = 0; i < cksumsize; i++) { - if (cksum + buf[i] < cksum) - cksum += buf[i] + 1; - else - cksum += buf[i]; - } - for (hp = hps[hsh]; hp; hp = hp->next) - if (hp->size == st.st_size && hp->mtime == mtime) - break; - if (!hp) { - hp = malloc(sizeof(h)); - if (!hp) { - fprintf(stderr, "\nOut of memory 1\n"); - doexit(1); - } - hp->size = st.st_size; - hp->mtime = mtime; - hp->chain = NULL; - hp->next = hps[hsh]; - hps[hsh] = hp; - } - for (fp = hp->chain; fp; fp = fp->next) - if (fp->cksum == cksum) - break; - for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) - if (fp2->ino == st.st_ino && fp2->dev == st.st_dev) { - close(fd); - if (verbose > 1 && namelen <= NAMELEN) - fprintf(stderr, "\r%*s\r", (int)(namelen + 2), ""); - return; - } - for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) - if (!lstat (fp2->name, &st2) && S_ISREG (st2.st_mode) && - !stcmp (&st, &st2, content_only) && - st2.st_ino != st.st_ino && - st2.st_dev == st.st_dev) { - int fd2 = open (fp2->name, O_RDONLY); - if (fd2 < 0) continue; - if (fstat (fd2, &st2) || !S_ISREG (st2.st_mode) || st2.st_size == 0) { - close (fd2); - continue; - } - ncomp++; - lseek(fd, 0, SEEK_SET); - for (fsize = st.st_size; fsize > 0; fsize -= NIOBUF) { - off_t rsize = fsize >= NIOBUF ? NIOBUF : fsize; - if (read (fd, iobuf1, rsize) != rsize || read (fd2, iobuf2, rsize) != rsize) { - close(fd); - close(fd2); - fprintf(stderr, "\nReading error\n"); - return; - } - if (memcmp (iobuf1, iobuf2, rsize)) break; + struct stat st, st2, st3; + const size_t namelen = strlen(name); + nobjects++; + if (lstat(name, &st)) + return; + if (st.st_dev != dev && !force) { + if (dev) { + fprintf(stderr, + "%s is on different filesystem than the rest.\nUse -f option to override.\n", + name); + doexit(6); + } + dev = st.st_dev; } - close(fd2); - if (fsize > 0) continue; - if (lstat (name, &st3)) { - fprintf(stderr, "\nCould not stat %s again\n", name); - close(fd); - return; - } - st3.st_atime = st.st_atime; - if (stcmp (&st, &st3, 0)) { - fprintf(stderr, "\nFile %s changed underneath us\n", name); - close(fd); - return; - } - n1 = fp2->name; - n2 = name; - if (!no_link) { - const char *suffix = ".$$$___cleanit___$$$"; - const size_t suffixlen = strlen(suffix); - size_t n2len = strlen(n2); - dynstr nam2 = {NULL, 0}; - growstr(&nam2, add2(n2len, suffixlen)); - memcpy(nam2.buf, n2, n2len); - memcpy(&nam2.buf[n2len], suffix, suffixlen + 1); - /* First create a temporary link to n1 under a new name */ - if (link(n1, nam2.buf)) { - fprintf(stderr, "\nFailed to hardlink %s to %s (create temporary link as %s failed - %s)\n", n1, n2, nam2.buf, strerror(errno)); - free(nam2.buf); - continue; - } - /* Then rename into place over the existing n2 */ - if (rename (nam2.buf, n2)) { - fprintf(stderr, "\nFailed to hardlink %s to %s (rename temporary link to %s failed - %s)\n", n1, n2, n2, strerror(errno)); - /* Something went wrong, try to remove the now redundant temporary link */ - if (unlink(nam2.buf)) { - fprintf(stderr, "\nFailed to remove temporary link %s - %s\n", nam2.buf, strerror(errno)); - } - free(nam2.buf); - continue; - } - free(nam2.buf); - } - nlinks++; - if (st3.st_nlink > 1) { - /* We actually did not save anything this time, since the link second argument - had some other links as well. */ - if (verbose > 1) - fprintf(stderr, "\r%*s\r%s %s to %s\n", (int)(((namelen > NAMELEN) ? 0 : namelen) + 2), "", (no_link ? "Would link" : "Linked"), n1, n2); - } else { - nsaved+=((st.st_size+4095)/4096)*4096; - if (verbose > 1) - fprintf(stderr, "\r%*s\r%s %s to %s, %s %ld\n", (int)(((namelen > NAMELEN) ? 0 : namelen) + 2), "", (no_link ? "Would link" : "Linked"), n1, n2, (no_link ? "would save" : "saved"), st.st_size); + if (S_ISDIR(st.st_mode)) { + d *dp = malloc(add3(sizeof(d), namelen, 1)); + if (!dp) { + fprintf(stderr, "\nOut of memory 3\n"); + doexit(3); + } + memcpy(dp->name, name, namelen + 1); + dp->next = dirs; + dirs = dp; + } else if (S_ISREG(st.st_mode)) { + int fd, i; + f *fp, *fp2; + h *hp; + const char *n1, *n2; + int cksumsize = sizeof(buf); + unsigned int cksum; + time_t mtime = content_only ? 0 : st.st_mtime; + unsigned int hsh = hash(st.st_size, mtime); + off_t fsize; + nregfiles++; + if (verbose > 1) + fprintf(stderr, " %s", name); + fd = open(name, O_RDONLY); + if (fd < 0) + return; + if (st.st_size < sizeof(buf)) { + cksumsize = st.st_size; + memset(((char *)buf) + cksumsize, 0, + (sizeof(buf) - cksumsize) % sizeof(buf[0])); + } + if (read(fd, buf, cksumsize) != cksumsize) { + close(fd); + if (verbose > 1 && namelen <= NAMELEN) + fprintf(stderr, "\r%*s\r", (int)(namelen + 2), + ""); + return; + } + cksumsize = (cksumsize + sizeof(buf[0]) - 1) / sizeof(buf[0]); + for (i = 0, cksum = 0; i < cksumsize; i++) { + if (cksum + buf[i] < cksum) + cksum += buf[i] + 1; + else + cksum += buf[i]; + } + for (hp = hps[hsh]; hp; hp = hp->next) + if (hp->size == st.st_size && hp->mtime == mtime) + break; + if (!hp) { + hp = malloc(sizeof(h)); + if (!hp) { + fprintf(stderr, "\nOut of memory 1\n"); + doexit(1); + } + hp->size = st.st_size; + hp->mtime = mtime; + hp->chain = NULL; + hp->next = hps[hsh]; + hps[hsh] = hp; + } + for (fp = hp->chain; fp; fp = fp->next) + if (fp->cksum == cksum) + break; + for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) + if (fp2->ino == st.st_ino && fp2->dev == st.st_dev) { + close(fd); + if (verbose > 1 && namelen <= NAMELEN) + fprintf(stderr, "\r%*s\r", + (int)(namelen + 2), ""); + return; + } + for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) + if (!lstat(fp2->name, &st2) && S_ISREG(st2.st_mode) && + !stcmp(&st, &st2, content_only) && + st2.st_ino != st.st_ino && + st2.st_dev == st.st_dev) { + int fd2 = open(fp2->name, O_RDONLY); + if (fd2 < 0) + continue; + if (fstat(fd2, &st2) || !S_ISREG(st2.st_mode) + || st2.st_size == 0) { + close(fd2); + continue; + } + ncomp++; + lseek(fd, 0, SEEK_SET); + for (fsize = st.st_size; fsize > 0; + fsize -= NIOBUF) { + off_t rsize = + fsize >= NIOBUF ? NIOBUF : fsize; + if (read(fd, iobuf1, rsize) != rsize + || read(fd2, iobuf2, + rsize) != rsize) { + close(fd); + close(fd2); + fprintf(stderr, + "\nReading error\n"); + return; + } + if (memcmp(iobuf1, iobuf2, rsize)) + break; + } + close(fd2); + if (fsize > 0) + continue; + if (lstat(name, &st3)) { + fprintf(stderr, + "\nCould not stat %s again\n", + name); + close(fd); + return; + } + st3.st_atime = st.st_atime; + if (stcmp(&st, &st3, 0)) { + fprintf(stderr, + "\nFile %s changed underneath us\n", + name); + close(fd); + return; + } + n1 = fp2->name; + n2 = name; + if (!no_link) { + const char *suffix = + ".$$$___cleanit___$$$"; + const size_t suffixlen = strlen(suffix); + size_t n2len = strlen(n2); + dynstr nam2 = { NULL, 0 }; + growstr(&nam2, add2(n2len, suffixlen)); + memcpy(nam2.buf, n2, n2len); + memcpy(&nam2.buf[n2len], suffix, + suffixlen + 1); + /* First create a temporary link to n1 under a new name */ + if (link(n1, nam2.buf)) { + fprintf(stderr, + "\nFailed to hardlink %s to %s (create temporary link as %s failed - %s)\n", + n1, n2, nam2.buf, + strerror(errno)); + free(nam2.buf); + continue; + } + /* Then rename into place over the existing n2 */ + if (rename(nam2.buf, n2)) { + fprintf(stderr, + "\nFailed to hardlink %s to %s (rename temporary link to %s failed - %s)\n", + n1, n2, n2, + strerror(errno)); + /* Something went wrong, try to remove the now redundant temporary link */ + if (unlink(nam2.buf)) { + fprintf(stderr, + "\nFailed to remove temporary link %s - %s\n", + nam2.buf, + strerror + (errno)); + } + free(nam2.buf); + continue; + } + free(nam2.buf); + } + nlinks++; + if (st3.st_nlink > 1) { + /* We actually did not save anything this time, since the link second argument + had some other links as well. */ + if (verbose > 1) + fprintf(stderr, + "\r%*s\r%s %s to %s\n", + (int)(((namelen > + NAMELEN) ? 0 : + namelen) + 2), + "", + (no_link ? "Would link" + : "Linked"), n1, n2); + } else { + nsaved += + ((st.st_size + 4095) / 4096) * 4096; + if (verbose > 1) + fprintf(stderr, + "\r%*s\r%s %s to %s, %s %ld\n", + (int)(((namelen > + NAMELEN) ? 0 : + namelen) + 2), + "", + (no_link ? "Would link" + : "Linked"), n1, n2, + (no_link ? "would save" + : "saved"), + st.st_size); + } + close(fd); + return; + } + fp2 = malloc(add3(sizeof(f), namelen, 1)); + if (!fp2) { + fprintf(stderr, "\nOut of memory 2\n"); + doexit(2); + } + close(fd); + fp2->ino = st.st_ino; + fp2->dev = st.st_dev; + fp2->cksum = cksum; + memcpy(fp2->name, name, namelen + 1); + if (fp) { + fp2->next = fp->next; + fp->next = fp2; + } else { + fp2->next = hp->chain; + hp->chain = fp2; + } + if (verbose > 1 && namelen <= NAMELEN) + fprintf(stderr, "\r%*s\r", (int)(namelen + 2), ""); + return; } - close(fd); - return; - } - fp2 = malloc(add3(sizeof(f), namelen, 1)); - if (!fp2) { - fprintf(stderr, "\nOut of memory 2\n"); - doexit(2); - } - close(fd); - fp2->ino = st.st_ino; - fp2->dev = st.st_dev; - fp2->cksum = cksum; - memcpy(fp2->name, name, namelen + 1); - if (fp) { - fp2->next = fp->next; - fp->next = fp2; - } else { - fp2->next = hp->chain; - hp->chain = fp2; - } - if (verbose > 1 && namelen <= NAMELEN) - fprintf(stderr, "\r%*s\r", (int)(namelen + 2), ""); - return; - } } int main(int argc, char **argv) { - int ch; - int i; + int ch; + int i; #ifdef HAVE_PCRE - int errornumber; - PCRE2_SIZE erroroffset; + int errornumber; + PCRE2_SIZE erroroffset; #endif - dynstr nam1 = {NULL, 0}; - while ((ch = getopt (argc, argv, "cnvhfx:")) != -1) { - switch (ch) { - case 'n': - no_link++; - break; - case 'v': - verbose++; - break; - case 'c': - content_only++; - break; - case 'f': - force=1; - break; - case 'x': + dynstr nam1 = { NULL, 0 }; + while ((ch = getopt(argc, argv, "cnvhfx:")) != -1) { + switch (ch) { + case 'n': + no_link++; + break; + case 'v': + verbose++; + break; + case 'c': + content_only++; + break; + case 'f': + force = 1; + break; + case 'x': #ifdef HAVE_PCRE - exclude_pattern = (PCRE2_SPTR)optarg; + exclude_pattern = (PCRE2_SPTR) optarg; #else - fprintf(stderr, "option x not supported (built without pcre2)\n"); - exit(1); + fprintf(stderr, "option x not supported (built without pcre2)\n"); + exit(1); #endif - break; - case 'h': - default: - usage(argv[0]); - } - } - if (optind >= argc) - usage(argv[0]); + break; + case 'h': + default: + usage(argv[0]); + } + } + if (optind >= argc) + usage(argv[0]); #ifdef HAVE_PCRE - if (exclude_pattern) { - re = pcre2_compile( - exclude_pattern, /* the pattern */ - PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminate */ - 0, /* default options */ - &errornumber, - &erroroffset, - NULL); /* use default compile context */ - if (!re) { - PCRE2_UCHAR buffer[256]; - pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); - fprintf(stderr, "pattern error at offset %d: %s\n", (int)erroroffset, buffer); - usage(argv[0]); - } - match_data = pcre2_match_data_create_from_pattern(re, NULL); - } + if (exclude_pattern) { + re = pcre2_compile(exclude_pattern, /* the pattern */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminate */ + 0, /* default options */ + &errornumber, &erroroffset, NULL); /* use default compile context */ + if (!re) { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(errornumber, buffer, + sizeof(buffer)); + fprintf(stderr, "pattern error at offset %d: %s\n", + (int)erroroffset, buffer); + usage(argv[0]); + } + match_data = pcre2_match_data_create_from_pattern(re, NULL); + } #endif - for (i = optind; i < argc; i++) - rf(argv[i]); - while (dirs) { - DIR *dh; - struct dirent *di; - d * dp = dirs; - size_t nam1baselen = strlen(dp->name); - dirs = dp->next; - growstr(&nam1, add2(nam1baselen, 1)); - memcpy(nam1.buf, dp->name, nam1baselen); - free (dp); - nam1.buf[nam1baselen++] = '/'; - nam1.buf[nam1baselen] = 0; - dh = opendir (nam1.buf); - if (dh == NULL) - continue; - ndirs++; - while ((di = readdir (dh)) != NULL) { - if (!di->d_name[0]) - continue; - if (di->d_name[0] == '.') { - if (!di->d_name[1] || !strcmp(di->d_name, "..")) - continue; - } + for (i = optind; i < argc; i++) + rf(argv[i]); + while (dirs) { + DIR *dh; + struct dirent *di; + d *dp = dirs; + size_t nam1baselen = strlen(dp->name); + dirs = dp->next; + growstr(&nam1, add2(nam1baselen, 1)); + memcpy(nam1.buf, dp->name, nam1baselen); + free(dp); + nam1.buf[nam1baselen++] = '/'; + nam1.buf[nam1baselen] = 0; + dh = opendir(nam1.buf); + if (dh == NULL) + continue; + ndirs++; + while ((di = readdir(dh)) != NULL) { + if (!di->d_name[0]) + continue; + if (di->d_name[0] == '.') { + if (!di->d_name[1] || !strcmp(di->d_name, "..")) + continue; + } #ifdef HAVE_PCRE - if (re && pcre2_match( - re, /* compiled regex */ - (PCRE2_SPTR)di->d_name, - strlen(di->d_name), - 0, /* start at offset 0 */ - 0, /* default options */ - match_data, /* block for storing the result */ - NULL) /* use default match context */ - >= 0) { - if (verbose) { - nam1.buf[nam1baselen] = 0; - fprintf(stderr,"Skipping %s%s\n", nam1.buf, di->d_name); - } - continue; - } + if (re && pcre2_match(re, /* compiled regex */ + (PCRE2_SPTR) di->d_name, strlen(di->d_name), 0, /* start at offset 0 */ + 0, /* default options */ + match_data, /* block for storing the result */ + NULL) /* use default match context */ + >=0) { + if (verbose) { + nam1.buf[nam1baselen] = 0; + fprintf(stderr, "Skipping %s%s\n", + nam1.buf, di->d_name); + } + continue; + } #endif - { - size_t subdirlen; - growstr(&nam1, add2(nam1baselen, subdirlen = strlen(di->d_name))); - memcpy(&nam1.buf[nam1baselen], di->d_name, add2(subdirlen, 1)); - } - rf(nam1.buf); - } - closedir(dh); - } - doexit(0); - return 0; + { + size_t subdirlen; + growstr(&nam1, + add2(nam1baselen, subdirlen = + strlen(di->d_name))); + memcpy(&nam1.buf[nam1baselen], di->d_name, + add2(subdirlen, 1)); + } + rf(nam1.buf); + } + closedir(dh); + } + doexit(0); + return 0; } -- cgit v1.2.3-55-g7522 From 53071734aa19a7d1ee4cf71590e3521aa89aaa54 Mon Sep 17 00:00:00 2001 From: Ruediger Meier Date: Tue, 12 Jun 2018 20:19:21 +0200 Subject: hardlink: fix compiler warnings Signed-off-by: Ruediger Meier --- misc-utils/hardlink.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 335c6c452..0bd33db38 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -19,7 +19,6 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#define _GNU_SOURCE #include #include #include @@ -77,13 +76,13 @@ typedef struct _f { } f; __attribute__ ((always_inline)) -inline unsigned int hash(off_t size, time_t mtime) +static inline unsigned int hash(off_t size, time_t mtime) { return (size ^ mtime) & (NHASH - 1); } __attribute__ ((always_inline)) -inline int stcmp(struct stat *st1, struct stat *st2, int content_only) +static inline int stcmp(struct stat *st1, struct stat *st2, int content_only) { if (content_only) return st1->st_size != st2->st_size; @@ -94,7 +93,7 @@ inline int stcmp(struct stat *st1, struct stat *st2, int content_only) long long ndirs, nobjects, nregfiles, ncomp, nlinks, nsaved; -void doexit(int i) +static void doexit(int i) { if (verbose) { fprintf(stderr, "\n\n"); @@ -110,7 +109,7 @@ void doexit(int i) exit(i); } -void usage(char *prog) +static void usage(char *prog) { fprintf(stderr, "Usage: %s [-cnvhf] [-x pat] directories...\n", prog); fprintf(stderr, @@ -130,7 +129,7 @@ unsigned int buf[NBUF]; char iobuf1[NIOBUF], iobuf2[NIOBUF]; __attribute__ ((always_inline)) -inline size_t add2(size_t a, size_t b) +static inline size_t add2(size_t a, size_t b) { size_t sum = a + b; if (sum < a) { @@ -141,7 +140,7 @@ inline size_t add2(size_t a, size_t b) } __attribute__ ((always_inline)) -inline size_t add3(size_t a, size_t b, size_t c) +static inline size_t add3(size_t a, size_t b, size_t c) { return add2(add2(a, b), c); } @@ -151,7 +150,7 @@ typedef struct { size_t alloc; } dynstr; -void growstr(dynstr * str, size_t newlen) +static void growstr(dynstr * str, size_t newlen) { if (newlen < str->alloc) return; @@ -163,7 +162,7 @@ void growstr(dynstr * str, size_t newlen) } dev_t dev = 0; -void rf(const char *name) +static void rf(const char *name) { struct stat st, st2, st3; const size_t namelen = strlen(name); @@ -204,7 +203,7 @@ void rf(const char *name) fd = open(name, O_RDONLY); if (fd < 0) return; - if (st.st_size < sizeof(buf)) { + if ((size_t)st.st_size < sizeof(buf)) { cksumsize = st.st_size; memset(((char *)buf) + cksumsize, 0, (sizeof(buf) - cksumsize) % sizeof(buf[0])); @@ -356,7 +355,7 @@ void rf(const char *name) ((st.st_size + 4095) / 4096) * 4096; if (verbose > 1) fprintf(stderr, - "\r%*s\r%s %s to %s, %s %ld\n", + "\r%*s\r%s %s to %s, %s %jd\n", (int)(((namelen > NAMELEN) ? 0 : namelen) + 2), @@ -365,7 +364,7 @@ void rf(const char *name) : "Linked"), n1, n2, (no_link ? "would save" : "saved"), - st.st_size); + (intmax_t)st.st_size); } close(fd); return; -- cgit v1.2.3-55-g7522 From 7d50d3612163833f0863f4c007159bc8edf65663 Mon Sep 17 00:00:00 2001 From: Ruediger Meier Date: Thu, 14 Jun 2018 01:13:56 +0200 Subject: hardlink: util-linux usage Signed-off-by: Ruediger Meier --- misc-utils/hardlink.c | 71 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 20 deletions(-) (limited to 'misc-utils') diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 0bd33db38..56edaa686 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -34,6 +35,10 @@ # include #endif +#include "c.h" +#include "nls.h" +#include "closestream.h" + #define NHASH (1<<17) /* Must be a power of 2! */ #define NIOBUF (1<<12) #define NAMELEN 4096 @@ -109,22 +114,28 @@ static void doexit(int i) exit(i); } -static void usage(char *prog) +static void __attribute__((__noreturn__)) usage(void) { - fprintf(stderr, "Usage: %s [-cnvhf] [-x pat] directories...\n", prog); - fprintf(stderr, - " -c When finding candidates for linking, compare only file contents.\n"); - fprintf(stderr, - " -n Don't actually link anything, just report what would be done.\n"); - fprintf(stderr, " -v Print summary after hardlinking.\n"); - fprintf(stderr, - " -vv Print every hardlinked file and bytes saved + summary.\n"); - fprintf(stderr, " -f Force hardlinking across filesystems.\n"); - fprintf(stderr, " -x pat Exclude files matching pattern.\n"); - fprintf(stderr, " -h Show help.\n"); - exit(255); + fputs(USAGE_HEADER, stdout); + printf(_(" %s [options] directory...\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, stdout); + puts(_("Consolidate duplicate files using hardlinks.")); + + fputs(USAGE_OPTIONS, stdout); + puts(_(" -c when finding candidates for linking, compare only file contents")); + puts(_(" -n don't actually link anything, just report what would be done")); + puts(_(" -v print summary after hardlinking")); + puts(_(" -vv print every hardlinked file and bytes saved + summary")); + puts(_(" -f force hardlinking across filesystems")); + puts(_(" -x exclude files matching pattern")); + fputs(USAGE_SEPARATOR, stdout); + printf(USAGE_HELP_OPTIONS(16)); /* char offset to align option descriptions */ + printf(USAGE_MAN_TAIL("hardlink(1)")); + exit(EXIT_SUCCESS); } + unsigned int buf[NBUF]; char iobuf1[NIOBUF], iobuf2[NIOBUF]; @@ -401,7 +412,19 @@ int main(int argc, char **argv) PCRE2_SIZE erroroffset; #endif dynstr nam1 = { NULL, 0 }; - while ((ch = getopt(argc, argv, "cnvhfx:")) != -1) { + + static const struct option longopts[] = { + { "version", no_argument, NULL, 'V' }, + { "help", no_argument, NULL, 'h' }, + { NULL, 0, NULL, 0 }, + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + atexit(close_stdout); + + while ((ch = getopt_long(argc, argv, "cnvfx:Vh", longopts, NULL)) != -1) { switch (ch) { case 'n': no_link++; @@ -419,17 +442,26 @@ int main(int argc, char **argv) #ifdef HAVE_PCRE exclude_pattern = (PCRE2_SPTR) optarg; #else - fprintf(stderr, "option x not supported (built without pcre2)\n"); + errx(EXIT_FAILURE, + _("option -x not supported (built without pcre2)")); exit(1); #endif break; + case 'V': + printf(UTIL_LINUX_VERSION); + return EXIT_SUCCESS; case 'h': + usage(); default: - usage(argv[0]); + errtryhelp(EXIT_FAILURE); } } - if (optind >= argc) - usage(argv[0]); + + if (optind == argc) { + warnx(_("no directory specified")); + errtryhelp(EXIT_FAILURE); + } + #ifdef HAVE_PCRE if (exclude_pattern) { re = pcre2_compile(exclude_pattern, /* the pattern */ @@ -440,9 +472,8 @@ int main(int argc, char **argv) PCRE2_UCHAR buffer[256]; pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); - fprintf(stderr, "pattern error at offset %d: %s\n", + errx(EXIT_FAILURE, _("pattern error at offset %d: %s"), (int)erroroffset, buffer); - usage(argv[0]); } match_data = pcre2_match_data_create_from_pattern(re, NULL); } -- cgit v1.2.3-55-g7522