Annotation of embedaddon/rsync/patches/link-by-hash.diff, revision 1.1
1.1 ! misho 1: Jason M. Felice wrote:
! 2:
! 3: This patch adds the --link-by-hash=DIR option, which hard links received files
! 4: in a link farm arranged by MD4 or MD5 file hash. The result is that the system
! 5: will only store one copy of the unique contents of each file, regardless of the
! 6: file's name.
! 7:
! 8: To use this patch, run these commands for a successful build:
! 9:
! 10: patch -p1 <patches/link-by-hash.diff
! 11: ./prepare-source
! 12: ./configure
! 13: make
! 14:
! 15: based-on: e94bad1c156fc3910f24e2b3b71a81b0b0bdeb70
! 16: diff --git a/Makefile.in b/Makefile.in
! 17: --- a/Makefile.in
! 18: +++ b/Makefile.in
! 19: @@ -44,7 +44,7 @@ OBJS1=flist.o rsync.o generator.o receiver.o cleanup.o sender.o exclude.o \
! 20: util.o util2.o main.o checksum.o match.o syscall.o log.o backup.o delete.o
! 21: OBJS2=options.o io.o compat.o hlink.o token.o uidlist.o socket.o hashtable.o \
! 22: usage.o fileio.o batch.o clientname.o chmod.o acls.o xattrs.o
! 23: -OBJS3=progress.o pipe.o @ASM@
! 24: +OBJS3=progress.o pipe.o hashlink.o @ASM@
! 25: DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
! 26: popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
! 27: popt/popthelp.o popt/poptparse.o
! 28: diff --git a/checksum.c b/checksum.c
! 29: --- a/checksum.c
! 30: +++ b/checksum.c
! 31: @@ -40,6 +40,8 @@ extern int whole_file;
! 32: extern int checksum_seed;
! 33: extern int protocol_version;
! 34: extern int proper_seed_order;
! 35: +extern char *link_by_hash_dir;
! 36: +extern char link_by_hash_extra_sum[MAX_DIGEST_LEN];
! 37: extern const char *checksum_choice;
! 38:
! 39: struct name_num_obj valid_checksums = {
! 40: @@ -444,7 +446,7 @@ static union {
! 41: MD4_CTX m4;
! 42: #endif
! 43: MD5_CTX m5;
! 44: -} ctx;
! 45: +} ctx, ctx2;
! 46: #ifdef SUPPORT_XXHASH
! 47: static XXH64_state_t* xxh64_state;
! 48: #endif
! 49: @@ -483,6 +485,8 @@ void sum_init(int csum_type, int seed)
! 50: #endif
! 51: case CSUM_MD5:
! 52: MD5_Init(&ctx.m5);
! 53: + if (link_by_hash_dir)
! 54: + MD5_Init(&ctx2.m5);
! 55: break;
! 56: case CSUM_MD4:
! 57: #ifdef USE_OPENSSL
! 58: @@ -533,6 +537,8 @@ void sum_update(const char *p, int32 len)
! 59: #endif
! 60: case CSUM_MD5:
! 61: MD5_Update(&ctx.m5, (uchar *)p, len);
! 62: + if (link_by_hash_dir)
! 63: + MD5_Update(&ctx2.m5, (uchar *)p, len);
! 64: break;
! 65: case CSUM_MD4:
! 66: #ifdef USE_OPENSSL
! 67: @@ -598,6 +604,8 @@ int sum_end(char *sum)
! 68: #endif
! 69: case CSUM_MD5:
! 70: MD5_Final((uchar *)sum, &ctx.m5);
! 71: + if (link_by_hash_dir)
! 72: + MD5_Final((uchar *)link_by_hash_extra_sum, &ctx2.m5);
! 73: break;
! 74: case CSUM_MD4:
! 75: #ifdef USE_OPENSSL
! 76: diff --git a/clientserver.c b/clientserver.c
! 77: --- a/clientserver.c
! 78: +++ b/clientserver.c
! 79: @@ -52,6 +52,7 @@ extern int logfile_format_has_i;
! 80: extern int logfile_format_has_o_or_i;
! 81: extern char *bind_address;
! 82: extern char *config_file;
! 83: +extern char *link_by_hash_dir;
! 84: extern char *logfile_format;
! 85: extern char *files_from;
! 86: extern char *tmpdir;
! 87: @@ -665,6 +666,9 @@ static int rsync_module(int f_in, int f_out, int i, const char *addr, const char
! 88: return -1;
! 89: }
! 90:
! 91: + if (*lp_link_by_hash_dir(i))
! 92: + link_by_hash_dir = lp_link_by_hash_dir(i);
! 93: +
! 94: if (am_daemon > 0) {
! 95: rprintf(FLOG, "rsync allowed access on module %s from %s (%s)\n",
! 96: name, host, addr);
! 97: diff --git a/daemon-parm.txt b/daemon-parm.txt
! 98: --- a/daemon-parm.txt
! 99: +++ b/daemon-parm.txt
! 100: @@ -29,6 +29,7 @@ STRING hosts_deny NULL
! 101: STRING include NULL
! 102: STRING include_from NULL
! 103: STRING incoming_chmod NULL
! 104: +STRING link_by_hash_dir NULL
! 105: STRING lock_file DEFAULT_LOCK_FILE
! 106: STRING log_file NULL
! 107: STRING log_format "%o %h [%a] %m (%u) %f %l"
! 108: diff --git a/hashlink.c b/hashlink.c
! 109: new file mode 100644
! 110: --- /dev/null
! 111: +++ b/hashlink.c
! 112: @@ -0,0 +1,92 @@
! 113: +/*
! 114: + Copyright (C) Cronosys, LLC 2004
! 115: +
! 116: + This program is free software; you can redistribute it and/or modify
! 117: + it under the terms of the GNU General Public License as published by
! 118: + the Free Software Foundation; either version 2 of the License, or
! 119: + (at your option) any later version.
! 120: +
! 121: + This program is distributed in the hope that it will be useful,
! 122: + but WITHOUT ANY WARRANTY; without even the implied warranty of
! 123: + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 124: + GNU General Public License for more details.
! 125: +
! 126: + You should have received a copy of the GNU General Public License
! 127: + along with this program; if not, write to the Free Software
! 128: + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! 129: +*/
! 130: +
! 131: +/* This file contains code used by the --link-by-hash option. */
! 132: +
! 133: +#include "rsync.h"
! 134: +#include "inums.h"
! 135: +
! 136: +extern int protocol_version;
! 137: +extern char *link_by_hash_dir;
! 138: +extern char sender_file_sum[MAX_DIGEST_LEN];
! 139: +
! 140: +char link_by_hash_extra_sum[MAX_DIGEST_LEN]; /* Only used when md4 sums are in the transfer */
! 141: +
! 142: +#ifdef HAVE_LINK
! 143: +
! 144: +/* This function is always called after a file is received, so the
! 145: + * sender_file_sum buffer has whatever the last checksum was for the
! 146: + * transferred file. */
! 147: +void link_by_hash(const char *fname, const char *fnametmp, struct file_struct *file)
! 148: +{
! 149: + STRUCT_STAT st;
! 150: + char *hashname, *last_slash, *num_str;
! 151: + const char *hex;
! 152: + int num = 0;
! 153: +
! 154: + /* We don't bother to hard-link 0-length files. */
! 155: + if (F_LENGTH(file) == 0)
! 156: + return;
! 157: +
! 158: + hex = sum_as_hex(5, protocol_version >= 30 ? sender_file_sum : link_by_hash_extra_sum, 0);
! 159: + if (asprintf(&hashname, "%s/%.3s/%.3s/%.3s/%s.%s.000000",
! 160: + link_by_hash_dir, hex, hex+3, hex+6, hex+9, big_num(F_LENGTH(file))) < 0)
! 161: + {
! 162: + out_of_memory("make_hash_name");
! 163: + }
! 164: +
! 165: + last_slash = strrchr(hashname, '/');
! 166: + num_str = strrchr(last_slash, '.') + 1;
! 167: +
! 168: + while (1) {
! 169: + if (num >= 999999) { /* Surely we'll never reach this... */
! 170: + if (DEBUG_GTE(HASHLINK, 1))
! 171: + rprintf(FINFO, "link-by-hash: giving up after \"%s\".\n", hashname);
! 172: + goto cleanup;
! 173: + }
! 174: + if (num > 0 && DEBUG_GTE(HASHLINK, 1))
! 175: + rprintf(FINFO, "link-by-hash: max link count exceeded, starting new file \"%s\".\n", hashname);
! 176: +
! 177: + snprintf(num_str, 7, "%d", num++);
! 178: + if (do_stat(hashname, &st) < 0)
! 179: + break;
! 180: +
! 181: + if (do_link(hashname, fnametmp) < 0) {
! 182: + if (errno == EMLINK)
! 183: + continue;
! 184: + rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"", hashname, full_fname(fname));
! 185: + } else {
! 186: + if (DEBUG_GTE(HASHLINK, 2))
! 187: + rprintf(FINFO, "link-by-hash (existing): \"%s\" -> %s\n", hashname, full_fname(fname));
! 188: + robust_rename(fnametmp, fname, NULL, 0644);
! 189: + }
! 190: +
! 191: + goto cleanup;
! 192: + }
! 193: +
! 194: + if (DEBUG_GTE(HASHLINK, 2))
! 195: + rprintf(FINFO, "link-by-hash (new): %s -> \"%s\"\n", full_fname(fname), hashname);
! 196: +
! 197: + if (do_link(fname, hashname) < 0
! 198: + && (errno != ENOENT || make_path(hashname, MKP_DROP_NAME) < 0 || do_link(fname, hashname) < 0))
! 199: + rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"", full_fname(fname), hashname);
! 200: +
! 201: + cleanup:
! 202: + free(hashname);
! 203: +}
! 204: +#endif
! 205: diff --git a/options.c b/options.c
! 206: --- a/options.c
! 207: +++ b/options.c
! 208: @@ -164,6 +164,7 @@ char *backup_suffix = NULL;
! 209: char *tmpdir = NULL;
! 210: char *partial_dir = NULL;
! 211: char *basis_dir[MAX_BASIS_DIRS+1];
! 212: +char *link_by_hash_dir = NULL;
! 213: char *config_file = NULL;
! 214: char *shell_cmd = NULL;
! 215: char *logfile_name = NULL;
! 216: @@ -221,7 +222,7 @@ static const char *debug_verbosity[] = {
! 217: /*2*/ "BIND,CMD,CONNECT,DEL,DELTASUM,DUP,FILTER,FLIST,ICONV",
! 218: /*3*/ "ACL,BACKUP,CONNECT2,DELTASUM2,DEL2,EXIT,FILTER2,FLIST2,FUZZY,GENR,OWN,RECV,SEND,TIME",
! 219: /*4*/ "CMD2,DELTASUM3,DEL3,EXIT2,FLIST3,ICONV2,OWN2,PROTO,TIME2",
! 220: - /*5*/ "CHDIR,DELTASUM4,FLIST4,FUZZY2,HASH,HLINK",
! 221: + /*5*/ "CHDIR,DELTASUM4,FLIST4,FUZZY2,HASH,HASHLINK,HLINK",
! 222: };
! 223:
! 224: #define MAX_VERBOSITY ((int)(sizeof debug_verbosity / sizeof debug_verbosity[0]) - 1)
! 225: @@ -291,6 +292,7 @@ static struct output_struct debug_words[COUNT_DEBUG+1] = {
! 226: DEBUG_WORD(FUZZY, W_REC, "Debug fuzzy scoring (levels 1-2)"),
! 227: DEBUG_WORD(GENR, W_REC, "Debug generator functions"),
! 228: DEBUG_WORD(HASH, W_SND|W_REC, "Debug hashtable code"),
! 229: + DEBUG_WORD(HASHLINK, W_REC, "Debug hashlink code (levels 1-2)"),
! 230: DEBUG_WORD(HLINK, W_SND|W_REC, "Debug hard-link actions (levels 1-3)"),
! 231: DEBUG_WORD(ICONV, W_CLI|W_SRV, "Debug iconv character conversions (levels 1-2)"),
! 232: DEBUG_WORD(IO, W_CLI|W_SRV, "Debug I/O routines (levels 1-4)"),
! 233: @@ -573,7 +575,7 @@ enum {OPT_SERVER = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
! 234: OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
! 235: OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
! 236: OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG, OPT_BLOCK_SIZE,
! 237: - OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, OPT_STDERR,
! 238: + OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, OPT_STDERR, OPT_LINK_BY_HASH,
! 239: OPT_OLD_COMPRESS, OPT_NEW_COMPRESS, OPT_NO_COMPRESS,
! 240: OPT_STOP_AFTER, OPT_STOP_AT,
! 241: OPT_REFUSED_BASE = 9000};
! 242: @@ -733,6 +735,7 @@ static struct poptOption long_options[] = {
! 243: {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
! 244: {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
! 245: {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
! 246: + {"link-by-hash", 0, POPT_ARG_STRING, 0, OPT_LINK_BY_HASH, 0, 0},
! 247: {"fuzzy", 'y', POPT_ARG_NONE, 0, 'y', 0, 0 },
! 248: {"no-fuzzy", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
! 249: {"no-y", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
! 250: @@ -972,6 +975,9 @@ static void set_refuse_options(void)
! 251: ref = cp + 1;
! 252: }
! 253:
! 254: + if (*lp_link_by_hash_dir(module_id))
! 255: + parse_one_refuse_match(0, "link-by-hash", list_end);
! 256: +
! 257: if (am_daemon) {
! 258: #ifdef ICONV_OPTION
! 259: if (!*lp_charset(module_id))
! 260: @@ -1834,6 +1840,20 @@ int parse_arguments(int *argc_p, const char ***argv_p)
! 261: return 0;
! 262: #endif
! 263:
! 264: + case OPT_LINK_BY_HASH:
! 265: +#ifdef HAVE_LINK
! 266: + arg = poptGetOptArg(pc);
! 267: + if (sanitize_paths)
! 268: + arg = sanitize_path(NULL, arg, NULL, 0, SP_DEFAULT);
! 269: + link_by_hash_dir = (char *)arg;
! 270: + break;
! 271: +#else
! 272: + snprintf(err_buf, sizeof err_buf,
! 273: + "hard links are not supported on this %s\n",
! 274: + am_server ? "server" : "client");
! 275: + return 0;
! 276: +#endif
! 277: +
! 278: case OPT_STOP_AFTER: {
! 279: long val;
! 280: arg = poptGetOptArg(pc);
! 281: @@ -2186,6 +2206,8 @@ int parse_arguments(int *argc_p, const char ***argv_p)
! 282: tmpdir = sanitize_path(NULL, tmpdir, NULL, 0, SP_DEFAULT);
! 283: if (backup_dir)
! 284: backup_dir = sanitize_path(NULL, backup_dir, NULL, 0, SP_DEFAULT);
! 285: + if (link_by_hash_dir)
! 286: + link_by_hash_dir = sanitize_path(NULL, link_by_hash_dir, NULL, 0, SP_DEFAULT);
! 287: }
! 288: if (daemon_filter_list.head && !am_sender) {
! 289: filter_rule_list *elp = &daemon_filter_list;
! 290: @@ -2870,6 +2892,12 @@ void server_options(char **args, int *argc_p)
! 291: } else if (inplace)
! 292: args[ac++] = "--inplace";
! 293:
! 294: + if (link_by_hash_dir && am_sender) {
! 295: + args[ac++] = "--link-by-hash";
! 296: + args[ac++] = link_by_hash_dir;
! 297: + link_by_hash_dir = NULL; /* optimize sending-side checksums */
! 298: + }
! 299: +
! 300: if (files_from && (!am_sender || filesfrom_host)) {
! 301: if (filesfrom_host) {
! 302: args[ac++] = "--files-from";
! 303: diff --git a/rsync.1.md b/rsync.1.md
! 304: --- a/rsync.1.md
! 305: +++ b/rsync.1.md
! 306: @@ -424,6 +424,7 @@ detailed description below for a complete description.
! 307: --compare-dest=DIR also compare destination files relative to DIR
! 308: --copy-dest=DIR ... and include copies of unchanged files
! 309: --link-dest=DIR hardlink to files in DIR when unchanged
! 310: +--link-by-hash=DIR create hardlinks by hash into DIR
! 311: --compress, -z compress file data during the transfer
! 312: --compress-choice=STR choose the compression algorithm (aka --zc)
! 313: --compress-level=NUM explicitly set compression level (aka --zl)
! 314: @@ -2331,6 +2332,50 @@ your home directory (remove the '=' for that).
! 315: specified (or implied by `-a`). You can work-around this bug by avoiding
! 316: the `-o` option when sending to an old rsync.
! 317:
! 318: +0. `--link-by-hash=DIR`
! 319: +
! 320: + This option hard links the destination files into `DIR`, a link farm
! 321: + arranged by MD5 file hash. The result is that the system will only store
! 322: + (usually) one copy of the unique contents of each file, regardless of the
! 323: + file's name (it will use extra files if the links overflow the available
! 324: + maximum).
! 325: +
! 326: + This patch does not take into account file permissions, extended
! 327: + attributes, or ACLs when linking things together, so you should only use
! 328: + this if you don't care about preserving those extra file attributes (or if
! 329: + they are always the same for identical files).
! 330: +
! 331: + The DIR is relative to the destination directory, so either specify a full
! 332: + path to the hash hierarchy, or specify a relative path that puts the links
! 333: + outside the destination (e.g. "../links").
! 334: +
! 335: + Keep in mind that the hierarchy is never pruned, so if you need to reclaim
! 336: + space, you should remove any files that have just one link (since they are
! 337: + not linked into any destination dirs anymore):
! 338: +
! 339: + > find $DIR -links 1 -delete
! 340: +
! 341: + The link farm's directory hierarchy is determined by the file's (32-char)
! 342: + MD5 hash and the file-length. The hash is split up into directory shards.
! 343: + For example, if a file is 54321 bytes long, it could be stored like this:
! 344: +
! 345: + > $DIR/123/456/789/01234567890123456789012.54321.0
! 346: +
! 347: + Note that the directory layout in this patch was modified for version
! 348: + 3.1.0, so anyone using an older version of this patch should move their
! 349: + existing link hierarchy out of the way and then use the newer rsync to copy
! 350: + the saved hierarchy into its new layout. Assuming that no files have
! 351: + overflowed their link limits, this would work:
! 352: +
! 353: + > mv $DIR $DIR.old
! 354: + > rsync -aiv --link-by-hash=$DIR $DIR.old/ $DIR.tmp/
! 355: + > rm -rf $DIR.tmp
! 356: + > rm -rf $DIR.old
! 357: +
! 358: + If some of your files are at their link limit, you'd be better of using a
! 359: + script to calculate the md5 sum of each file in the hierarchy and move it
! 360: + to its new location.
! 361: +
! 362: 0. `--compress`, `-z`
! 363:
! 364: With this option, rsync compresses the file data as it is sent to the
! 365: diff --git a/rsync.c b/rsync.c
! 366: --- a/rsync.c
! 367: +++ b/rsync.c
! 368: @@ -50,6 +50,7 @@ extern int flist_eof;
! 369: extern int file_old_total;
! 370: extern int keep_dirlinks;
! 371: extern int make_backups;
! 372: +extern char *link_by_hash_dir;
! 373: extern int sanitize_paths;
! 374: extern struct file_list *cur_flist, *first_flist, *dir_flist;
! 375: extern struct chmod_mode_struct *daemon_chmod_modes;
! 376: @@ -748,6 +749,10 @@ int finish_transfer(const char *fname, const char *fnametmp,
! 377: }
! 378: if (ret == 0) {
! 379: /* The file was moved into place (not copied), so it's done. */
! 380: +#ifdef HAVE_LINK
! 381: + if (link_by_hash_dir)
! 382: + link_by_hash(fname, fnametmp, file);
! 383: +#endif
! 384: return 1;
! 385: }
! 386: /* The file was copied, so tweak the perms of the copied file. If it
! 387: diff --git a/rsync.h b/rsync.h
! 388: --- a/rsync.h
! 389: +++ b/rsync.h
! 390: @@ -1428,7 +1428,8 @@ extern short info_levels[], debug_levels[];
! 391: #define DEBUG_FUZZY (DEBUG_FLIST+1)
! 392: #define DEBUG_GENR (DEBUG_FUZZY+1)
! 393: #define DEBUG_HASH (DEBUG_GENR+1)
! 394: -#define DEBUG_HLINK (DEBUG_HASH+1)
! 395: +#define DEBUG_HASHLINK (DEBUG_HASH+1)
! 396: +#define DEBUG_HLINK (DEBUG_HASHLINK+1)
! 397: #define DEBUG_ICONV (DEBUG_HLINK+1)
! 398: #define DEBUG_IO (DEBUG_ICONV+1)
! 399: #define DEBUG_NSTR (DEBUG_IO+1)
! 400: diff --git a/rsyncd.conf.5.md b/rsyncd.conf.5.md
! 401: --- a/rsyncd.conf.5.md
! 402: +++ b/rsyncd.conf.5.md
! 403: @@ -354,6 +354,23 @@ the values of parameters. See the GLOBAL PARAMETERS section for more details.
! 404: is 0, which means no limit. A negative value disables the module. See
! 405: also the "lock file" parameter.
! 406:
! 407: +0. `link by hash dir`
! 408: +
! 409: + When the "link by hash dir" parameter is set to a non-empty string,
! 410: + received files will be hard linked into **DIR**, a link farm arranged by
! 411: + MD5 file hash. See the `--link-by-hash` option for a full explanation.
! 412: +
! 413: + The **DIR** must be accessible inside any chroot restrictions for the
! 414: + module, but can exist outside the transfer location if there is an
! 415: + inside-the-chroot path to the module (see "use chroot"). Note that a
! 416: + user-specified option does not allow this outside-the-transfer-area
! 417: + placement.
! 418: +
! 419: + If this parameter is set, it will disable the `--link-by-hash` command-line
! 420: + option for copies into the module.
! 421: +
! 422: +The default is for this parameter to be unset.
! 423: +
! 424: 0. `log file`
! 425:
! 426: When the "log file" parameter is set to a non-empty string, the rsync
! 427: diff -Nurp a/rsync.1 b/rsync.1
! 428: --- a/rsync.1
! 429: +++ b/rsync.1
! 430: @@ -500,6 +500,7 @@ detailed description below for a complet
! 431: --compare-dest=DIR also compare destination files relative to DIR
! 432: --copy-dest=DIR ... and include copies of unchanged files
! 433: --link-dest=DIR hardlink to files in DIR when unchanged
! 434: +--link-by-hash=DIR create hardlinks by hash into DIR
! 435: --compress, -z compress file data during the transfer
! 436: --compress-choice=STR choose the compression algorithm (aka --zc)
! 437: --compress-level=NUM explicitly set compression level (aka --zl)
! 438: @@ -2372,6 +2373,60 @@ Note that rsync versions prior to 2.6.1
! 439: \fB\-\-link-dest\fP from working properly for a non-super-user when \fB\-o\fP was
! 440: specified (or implied by \fB\-a\fP). You can work-around this bug by avoiding
! 441: the \fB\-o\fP option when sending to an old rsync.
! 442: +.IP "\fB\-\-link-by-hash=DIR\fP"
! 443: +This option hard links the destination files into \fBDIR\fP, a link farm
! 444: +arranged by MD5 file hash. The result is that the system will only store
! 445: +(usually) one copy of the unique contents of each file, regardless of the
! 446: +file's name (it will use extra files if the links overflow the available
! 447: +maximum).
! 448: +.IP
! 449: +This patch does not take into account file permissions, extended
! 450: +attributes, or ACLs when linking things together, so you should only use
! 451: +this if you don't care about preserving those extra file attributes (or if
! 452: +they are always the same for identical files).
! 453: +.IP
! 454: +The DIR is relative to the destination directory, so either specify a full
! 455: +path to the hash hierarchy, or specify a relative path that puts the links
! 456: +outside the destination (e.g. "../links").
! 457: +.IP
! 458: +Keep in mind that the hierarchy is never pruned, so if you need to reclaim
! 459: +space, you should remove any files that have just one link (since they are
! 460: +not linked into any destination dirs anymore):
! 461: +.RS 4
! 462: +.IP
! 463: +.nf
! 464: +find $DIR -links 1 -delete
! 465: +.fi
! 466: +.RE
! 467: +.IP
! 468: +The link farm's directory hierarchy is determined by the file's (32-char)
! 469: +MD5 hash and the file-length. The hash is split up into directory shards.
! 470: +For example, if a file is 54321 bytes long, it could be stored like this:
! 471: +.RS 4
! 472: +.IP
! 473: +.nf
! 474: +$DIR/123/456/789/01234567890123456789012.54321.0
! 475: +.fi
! 476: +.RE
! 477: +.IP
! 478: +Note that the directory layout in this patch was modified for version
! 479: +3.1.0, so anyone using an older version of this patch should move their
! 480: +existing link hierarchy out of the way and then use the newer rsync to copy
! 481: +the saved hierarchy into its new layout. Assuming that no files have
! 482: +overflowed their link limits, this would work:
! 483: +.RS 4
! 484: +.IP
! 485: +.nf
! 486: +mv $DIR $DIR.old
! 487: +rsync -aiv --link-by-hash=$DIR $DIR.old/ $DIR.tmp/
! 488: +rm -rf $DIR.tmp
! 489: +rm -rf $DIR.old
! 490: +.fi
! 491: +.RE
! 492: +.IP
! 493: +If some of your files are at their link limit, you'd be better of using a
! 494: +script to calculate the md5 sum of each file in the hierarchy and move it
! 495: +to its new location.
! 496: .IP "\fB\-\-compress\fP, \fB\-z\fP"
! 497: With this option, rsync compresses the file data as it is sent to the
! 498: destination machine, which reduces the amount of data being transmitted\ \-\-
! 499: diff -Nurp a/rsync.1.html b/rsync.1.html
! 500: --- a/rsync.1.html
! 501: +++ b/rsync.1.html
! 502: @@ -415,6 +415,7 @@ detailed description below for a complet
! 503: --compare-dest=DIR also compare destination files relative to DIR
! 504: --copy-dest=DIR ... and include copies of unchanged files
! 505: --link-dest=DIR hardlink to files in DIR when unchanged
! 506: +--link-by-hash=DIR create hardlinks by hash into DIR
! 507: --compress, -z compress file data during the transfer
! 508: --compress-choice=STR choose the compression algorithm (aka --zc)
! 509: --compress-level=NUM explicitly set compression level (aka --zl)
! 510: @@ -2210,6 +2211,50 @@ specified (or implied by <code>-a</code>
! 511: the <code>-o</code> option when sending to an old rsync.</p>
! 512: </dd>
! 513:
! 514: +<dt><code>--link-by-hash=DIR</code></dt><dd>
! 515: +<p>This option hard links the destination files into <code>DIR</code>, a link farm
! 516: +arranged by MD5 file hash. The result is that the system will only store
! 517: +(usually) one copy of the unique contents of each file, regardless of the
! 518: +file's name (it will use extra files if the links overflow the available
! 519: +maximum).</p>
! 520: +<p>This patch does not take into account file permissions, extended
! 521: +attributes, or ACLs when linking things together, so you should only use
! 522: +this if you don't care about preserving those extra file attributes (or if
! 523: +they are always the same for identical files).</p>
! 524: +<p>The DIR is relative to the destination directory, so either specify a full
! 525: +path to the hash hierarchy, or specify a relative path that puts the links
! 526: +outside the destination (e.g. "../links").</p>
! 527: +<p>Keep in mind that the hierarchy is never pruned, so if you need to reclaim
! 528: +space, you should remove any files that have just one link (since they are
! 529: +not linked into any destination dirs anymore):</p>
! 530: +<blockquote>
! 531: +<pre><code>find $DIR -links 1 -delete
! 532: +</code></pre>
! 533: +</blockquote>
! 534: +<p>The link farm's directory hierarchy is determined by the file's (32-char)
! 535: +MD5 hash and the file-length. The hash is split up into directory shards.
! 536: +For example, if a file is 54321 bytes long, it could be stored like this:</p>
! 537: +<blockquote>
! 538: +<pre><code>$DIR/123/456/789/01234567890123456789012.54321.0
! 539: +</code></pre>
! 540: +</blockquote>
! 541: +<p>Note that the directory layout in this patch was modified for version
! 542: +3.1.0, so anyone using an older version of this patch should move their
! 543: +existing link hierarchy out of the way and then use the newer rsync to copy
! 544: +the saved hierarchy into its new layout. Assuming that no files have
! 545: +overflowed their link limits, this would work:</p>
! 546: +<blockquote>
! 547: +<pre><code>mv $DIR $DIR.old
! 548: +rsync -aiv --link-by-hash=$DIR $DIR.old/ $DIR.tmp/
! 549: +rm -rf $DIR.tmp
! 550: +rm -rf $DIR.old
! 551: +</code></pre>
! 552: +</blockquote>
! 553: +<p>If some of your files are at their link limit, you'd be better of using a
! 554: +script to calculate the md5 sum of each file in the hierarchy and move it
! 555: +to its new location.</p>
! 556: +</dd>
! 557: +
! 558: <dt><code>--compress</code>, <code>-z</code></dt><dd>
! 559: <p>With this option, rsync compresses the file data as it is sent to the
! 560: destination machine, which reduces the amount of data being transmitted -⁠-⁠
! 561: diff -Nurp a/rsyncd.conf.5 b/rsyncd.conf.5
! 562: --- a/rsyncd.conf.5
! 563: +++ b/rsyncd.conf.5
! 564: @@ -335,6 +335,22 @@ connections you will allow. Any clients
! 565: been reached will receive a message telling them to try later. The default
! 566: is 0, which means no limit. A negative value disables the module. See
! 567: also the "lock file" parameter.
! 568: +.IP "\fBlink\ by\ hash\ dir\fP"
! 569: +When the "link by hash dir" parameter is set to a non-empty string,
! 570: +received files will be hard linked into \fBDIR\fP, a link farm arranged by
! 571: +MD5 file hash. See the \fB\-\-link-by-hash\fP option for a full explanation.
! 572: +.IP
! 573: +The \fBDIR\fP must be accessible inside any chroot restrictions for the
! 574: +module, but can exist outside the transfer location if there is an
! 575: +inside-the-chroot path to the module (see "use chroot"). Note that a
! 576: +user-specified option does not allow this outside-the-transfer-area
! 577: +placement.
! 578: +.IP
! 579: +If this parameter is set, it will disable the \fB\-\-link-by-hash\fP command-line
! 580: +option for copies into the module.
! 581: +.P
! 582: +The default is for this parameter to be unset.
! 583: +.P
! 584: .IP "\fBlog\ file\fP"
! 585: When the "log file" parameter is set to a non-empty string, the rsync
! 586: daemon will log messages to the indicated file rather than using syslog.
! 587: diff -Nurp a/rsyncd.conf.5.html b/rsyncd.conf.5.html
! 588: --- a/rsyncd.conf.5.html
! 589: +++ b/rsyncd.conf.5.html
! 590: @@ -342,6 +342,22 @@ is 0, which means no limit. A negative
! 591: also the "lock file" parameter.</p>
! 592: </dd>
! 593:
! 594: +<dt><code>link by hash dir</code></dt><dd>
! 595: +<p>When the "link by hash dir" parameter is set to a non-empty string,
! 596: +received files will be hard linked into <strong>DIR</strong>, a link farm arranged by
! 597: +MD5 file hash. See the <code>--link-by-hash</code> option for a full explanation.</p>
! 598: +<p>The <strong>DIR</strong> must be accessible inside any chroot restrictions for the
! 599: +module, but can exist outside the transfer location if there is an
! 600: +inside-the-chroot path to the module (see "use chroot"). Note that a
! 601: +user-specified option does not allow this outside-the-transfer-area
! 602: +placement.</p>
! 603: +<p>If this parameter is set, it will disable the <code>--link-by-hash</code> command-line
! 604: +option for copies into the module.</p>
! 605: +</dd>
! 606: +</dl>
! 607: +<p>The default is for this parameter to be unset.</p>
! 608: +<dl>
! 609: +
! 610: <dt><code>log file</code></dt><dd>
! 611: <p>When the "log file" parameter is set to a non-empty string, the rsync
! 612: daemon will log messages to the indicated file rather than using syslog.
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>