Annotation of embedaddon/rsync/patches/link-by-hash.diff, revision 1.1

1.1     ! misho       1: Jason M. Felice wrote:
        !             2: 
        !             3: This patch adds the --link-by-hash=DIR option, which hard links received files
        !             4: in a link farm arranged by MD4 or MD5 file hash.  The result is that the system
        !             5: will only store one copy of the unique contents of each file, regardless of the
        !             6: file's name.
        !             7: 
        !             8: To use this patch, run these commands for a successful build:
        !             9: 
        !            10:     patch -p1 <patches/link-by-hash.diff
        !            11:     ./prepare-source
        !            12:     ./configure
        !            13:     make
        !            14: 
        !            15: based-on: e94bad1c156fc3910f24e2b3b71a81b0b0bdeb70
        !            16: diff --git a/Makefile.in b/Makefile.in
        !            17: --- a/Makefile.in
        !            18: +++ b/Makefile.in
        !            19: @@ -44,7 +44,7 @@ OBJS1=flist.o rsync.o generator.o receiver.o cleanup.o sender.o exclude.o \
        !            20:        util.o util2.o main.o checksum.o match.o syscall.o log.o backup.o delete.o
        !            21:  OBJS2=options.o io.o compat.o hlink.o token.o uidlist.o socket.o hashtable.o \
        !            22:        usage.o fileio.o batch.o clientname.o chmod.o acls.o xattrs.o
        !            23: -OBJS3=progress.o pipe.o @ASM@
        !            24: +OBJS3=progress.o pipe.o hashlink.o @ASM@
        !            25:  DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
        !            26:  popt_OBJS=popt/findme.o  popt/popt.o  popt/poptconfig.o \
        !            27:        popt/popthelp.o popt/poptparse.o
        !            28: diff --git a/checksum.c b/checksum.c
        !            29: --- a/checksum.c
        !            30: +++ b/checksum.c
        !            31: @@ -40,6 +40,8 @@ extern int whole_file;
        !            32:  extern int checksum_seed;
        !            33:  extern int protocol_version;
        !            34:  extern int proper_seed_order;
        !            35: +extern char *link_by_hash_dir;
        !            36: +extern char link_by_hash_extra_sum[MAX_DIGEST_LEN];
        !            37:  extern const char *checksum_choice;
        !            38:  
        !            39:  struct name_num_obj valid_checksums = {
        !            40: @@ -444,7 +446,7 @@ static union {
        !            41:        MD4_CTX m4;
        !            42:  #endif
        !            43:        MD5_CTX m5;
        !            44: -} ctx;
        !            45: +} ctx, ctx2;
        !            46:  #ifdef SUPPORT_XXHASH
        !            47:  static XXH64_state_t* xxh64_state;
        !            48:  #endif
        !            49: @@ -483,6 +485,8 @@ void sum_init(int csum_type, int seed)
        !            50:  #endif
        !            51:          case CSUM_MD5:
        !            52:                MD5_Init(&ctx.m5);
        !            53: +              if (link_by_hash_dir)
        !            54: +                      MD5_Init(&ctx2.m5);
        !            55:                break;
        !            56:          case CSUM_MD4:
        !            57:  #ifdef USE_OPENSSL
        !            58: @@ -533,6 +537,8 @@ void sum_update(const char *p, int32 len)
        !            59:  #endif
        !            60:          case CSUM_MD5:
        !            61:                MD5_Update(&ctx.m5, (uchar *)p, len);
        !            62: +              if (link_by_hash_dir)
        !            63: +                      MD5_Update(&ctx2.m5, (uchar *)p, len);
        !            64:                break;
        !            65:          case CSUM_MD4:
        !            66:  #ifdef USE_OPENSSL
        !            67: @@ -598,6 +604,8 @@ int sum_end(char *sum)
        !            68:  #endif
        !            69:          case CSUM_MD5:
        !            70:                MD5_Final((uchar *)sum, &ctx.m5);
        !            71: +              if (link_by_hash_dir)
        !            72: +                      MD5_Final((uchar *)link_by_hash_extra_sum, &ctx2.m5);
        !            73:                break;
        !            74:          case CSUM_MD4:
        !            75:  #ifdef USE_OPENSSL
        !            76: diff --git a/clientserver.c b/clientserver.c
        !            77: --- a/clientserver.c
        !            78: +++ b/clientserver.c
        !            79: @@ -52,6 +52,7 @@ extern int logfile_format_has_i;
        !            80:  extern int logfile_format_has_o_or_i;
        !            81:  extern char *bind_address;
        !            82:  extern char *config_file;
        !            83: +extern char *link_by_hash_dir;
        !            84:  extern char *logfile_format;
        !            85:  extern char *files_from;
        !            86:  extern char *tmpdir;
        !            87: @@ -665,6 +666,9 @@ static int rsync_module(int f_in, int f_out, int i, const char *addr, const char
        !            88:                return -1;
        !            89:        }
        !            90:  
        !            91: +      if (*lp_link_by_hash_dir(i))
        !            92: +              link_by_hash_dir = lp_link_by_hash_dir(i);
        !            93: +
        !            94:        if (am_daemon > 0) {
        !            95:                rprintf(FLOG, "rsync allowed access on module %s from %s (%s)\n",
        !            96:                        name, host, addr);
        !            97: diff --git a/daemon-parm.txt b/daemon-parm.txt
        !            98: --- a/daemon-parm.txt
        !            99: +++ b/daemon-parm.txt
        !           100: @@ -29,6 +29,7 @@ STRING       hosts_deny              NULL
        !           101:  STRING        include                 NULL
        !           102:  STRING        include_from            NULL
        !           103:  STRING        incoming_chmod          NULL
        !           104: +STRING        link_by_hash_dir        NULL
        !           105:  STRING        lock_file               DEFAULT_LOCK_FILE
        !           106:  STRING        log_file                NULL
        !           107:  STRING        log_format              "%o %h [%a] %m (%u) %f %l"
        !           108: diff --git a/hashlink.c b/hashlink.c
        !           109: new file mode 100644
        !           110: --- /dev/null
        !           111: +++ b/hashlink.c
        !           112: @@ -0,0 +1,92 @@
        !           113: +/*
        !           114: +   Copyright (C) Cronosys, LLC 2004
        !           115: +
        !           116: +   This program is free software; you can redistribute it and/or modify
        !           117: +   it under the terms of the GNU General Public License as published by
        !           118: +   the Free Software Foundation; either version 2 of the License, or
        !           119: +   (at your option) any later version.
        !           120: +
        !           121: +   This program is distributed in the hope that it will be useful,
        !           122: +   but WITHOUT ANY WARRANTY; without even the implied warranty of
        !           123: +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        !           124: +   GNU General Public License for more details.
        !           125: +
        !           126: +   You should have received a copy of the GNU General Public License
        !           127: +   along with this program; if not, write to the Free Software
        !           128: +   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
        !           129: +*/
        !           130: +
        !           131: +/* This file contains code used by the --link-by-hash option. */
        !           132: +
        !           133: +#include "rsync.h"
        !           134: +#include "inums.h"
        !           135: +
        !           136: +extern int protocol_version;
        !           137: +extern char *link_by_hash_dir;
        !           138: +extern char sender_file_sum[MAX_DIGEST_LEN];
        !           139: +
        !           140: +char link_by_hash_extra_sum[MAX_DIGEST_LEN]; /* Only used when md4 sums are in the transfer */
        !           141: +
        !           142: +#ifdef HAVE_LINK
        !           143: +
        !           144: +/* This function is always called after a file is received, so the
        !           145: + * sender_file_sum buffer has whatever the last checksum was for the
        !           146: + * transferred file. */
        !           147: +void link_by_hash(const char *fname, const char *fnametmp, struct file_struct *file)
        !           148: +{
        !           149: +      STRUCT_STAT st;
        !           150: +      char *hashname, *last_slash, *num_str;
        !           151: +      const char *hex;
        !           152: +      int num = 0;
        !           153: +
        !           154: +      /* We don't bother to hard-link 0-length files. */
        !           155: +      if (F_LENGTH(file) == 0)
        !           156: +              return;
        !           157: +
        !           158: +      hex = sum_as_hex(5, protocol_version >= 30 ? sender_file_sum : link_by_hash_extra_sum, 0);
        !           159: +      if (asprintf(&hashname, "%s/%.3s/%.3s/%.3s/%s.%s.000000",
        !           160: +                   link_by_hash_dir, hex, hex+3, hex+6, hex+9, big_num(F_LENGTH(file))) < 0)
        !           161: +      {
        !           162: +              out_of_memory("make_hash_name");
        !           163: +      }
        !           164: +
        !           165: +      last_slash = strrchr(hashname, '/');
        !           166: +      num_str = strrchr(last_slash, '.') + 1;
        !           167: +
        !           168: +      while (1) {
        !           169: +              if (num >= 999999) { /* Surely we'll never reach this... */
        !           170: +                      if (DEBUG_GTE(HASHLINK, 1))
        !           171: +                              rprintf(FINFO, "link-by-hash: giving up after \"%s\".\n", hashname);
        !           172: +                      goto cleanup;
        !           173: +              }
        !           174: +              if (num > 0 && DEBUG_GTE(HASHLINK, 1))
        !           175: +                      rprintf(FINFO, "link-by-hash: max link count exceeded, starting new file \"%s\".\n", hashname);
        !           176: +
        !           177: +              snprintf(num_str, 7, "%d", num++);
        !           178: +              if (do_stat(hashname, &st) < 0)
        !           179: +                      break;
        !           180: +
        !           181: +              if (do_link(hashname, fnametmp) < 0) {
        !           182: +                      if (errno == EMLINK)
        !           183: +                              continue;
        !           184: +                      rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"", hashname, full_fname(fname));
        !           185: +              } else {
        !           186: +                      if (DEBUG_GTE(HASHLINK, 2))
        !           187: +                              rprintf(FINFO, "link-by-hash (existing): \"%s\" -> %s\n", hashname, full_fname(fname));
        !           188: +                      robust_rename(fnametmp, fname, NULL, 0644);
        !           189: +              }
        !           190: +
        !           191: +              goto cleanup;
        !           192: +      }
        !           193: +
        !           194: +      if (DEBUG_GTE(HASHLINK, 2))
        !           195: +              rprintf(FINFO, "link-by-hash (new): %s -> \"%s\"\n", full_fname(fname), hashname);
        !           196: +
        !           197: +      if (do_link(fname, hashname) < 0
        !           198: +       && (errno != ENOENT || make_path(hashname, MKP_DROP_NAME) < 0 || do_link(fname, hashname) < 0))
        !           199: +              rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"", full_fname(fname), hashname);
        !           200: +
        !           201: +  cleanup:
        !           202: +      free(hashname);
        !           203: +}
        !           204: +#endif
        !           205: diff --git a/options.c b/options.c
        !           206: --- a/options.c
        !           207: +++ b/options.c
        !           208: @@ -164,6 +164,7 @@ char *backup_suffix = NULL;
        !           209:  char *tmpdir = NULL;
        !           210:  char *partial_dir = NULL;
        !           211:  char *basis_dir[MAX_BASIS_DIRS+1];
        !           212: +char *link_by_hash_dir = NULL;
        !           213:  char *config_file = NULL;
        !           214:  char *shell_cmd = NULL;
        !           215:  char *logfile_name = NULL;
        !           216: @@ -221,7 +222,7 @@ static const char *debug_verbosity[] = {
        !           217:        /*2*/ "BIND,CMD,CONNECT,DEL,DELTASUM,DUP,FILTER,FLIST,ICONV",
        !           218:        /*3*/ "ACL,BACKUP,CONNECT2,DELTASUM2,DEL2,EXIT,FILTER2,FLIST2,FUZZY,GENR,OWN,RECV,SEND,TIME",
        !           219:        /*4*/ "CMD2,DELTASUM3,DEL3,EXIT2,FLIST3,ICONV2,OWN2,PROTO,TIME2",
        !           220: -      /*5*/ "CHDIR,DELTASUM4,FLIST4,FUZZY2,HASH,HLINK",
        !           221: +      /*5*/ "CHDIR,DELTASUM4,FLIST4,FUZZY2,HASH,HASHLINK,HLINK",
        !           222:  };
        !           223:  
        !           224:  #define MAX_VERBOSITY ((int)(sizeof debug_verbosity / sizeof debug_verbosity[0]) - 1)
        !           225: @@ -291,6 +292,7 @@ static struct output_struct debug_words[COUNT_DEBUG+1] = {
        !           226:        DEBUG_WORD(FUZZY, W_REC, "Debug fuzzy scoring (levels 1-2)"),
        !           227:        DEBUG_WORD(GENR, W_REC, "Debug generator functions"),
        !           228:        DEBUG_WORD(HASH, W_SND|W_REC, "Debug hashtable code"),
        !           229: +      DEBUG_WORD(HASHLINK, W_REC, "Debug hashlink code (levels 1-2)"),
        !           230:        DEBUG_WORD(HLINK, W_SND|W_REC, "Debug hard-link actions (levels 1-3)"),
        !           231:        DEBUG_WORD(ICONV, W_CLI|W_SRV, "Debug iconv character conversions (levels 1-2)"),
        !           232:        DEBUG_WORD(IO, W_CLI|W_SRV, "Debug I/O routines (levels 1-4)"),
        !           233: @@ -573,7 +575,7 @@ enum {OPT_SERVER = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
        !           234:        OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
        !           235:        OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
        !           236:        OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG, OPT_BLOCK_SIZE,
        !           237: -      OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, OPT_STDERR,
        !           238: +      OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, OPT_STDERR, OPT_LINK_BY_HASH,
        !           239:        OPT_OLD_COMPRESS, OPT_NEW_COMPRESS, OPT_NO_COMPRESS,
        !           240:        OPT_STOP_AFTER, OPT_STOP_AT,
        !           241:        OPT_REFUSED_BASE = 9000};
        !           242: @@ -733,6 +735,7 @@ static struct poptOption long_options[] = {
        !           243:    {"compare-dest",     0,  POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
        !           244:    {"copy-dest",        0,  POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
        !           245:    {"link-dest",        0,  POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
        !           246: +  {"link-by-hash",     0,  POPT_ARG_STRING, 0, OPT_LINK_BY_HASH, 0, 0},
        !           247:    {"fuzzy",           'y', POPT_ARG_NONE,   0, 'y', 0, 0 },
        !           248:    {"no-fuzzy",         0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
        !           249:    {"no-y",             0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
        !           250: @@ -972,6 +975,9 @@ static void set_refuse_options(void)
        !           251:                ref = cp + 1;
        !           252:        }
        !           253:  
        !           254: +      if (*lp_link_by_hash_dir(module_id))
        !           255: +              parse_one_refuse_match(0, "link-by-hash", list_end);
        !           256: +
        !           257:        if (am_daemon) {
        !           258:  #ifdef ICONV_OPTION
        !           259:                if (!*lp_charset(module_id))
        !           260: @@ -1834,6 +1840,20 @@ int parse_arguments(int *argc_p, const char ***argv_p)
        !           261:                        return 0;
        !           262:  #endif
        !           263:  
        !           264: +                case OPT_LINK_BY_HASH:
        !           265: +#ifdef HAVE_LINK
        !           266: +                      arg = poptGetOptArg(pc);
        !           267: +                      if (sanitize_paths)
        !           268: +                              arg = sanitize_path(NULL, arg, NULL, 0, SP_DEFAULT);
        !           269: +                      link_by_hash_dir = (char *)arg;
        !           270: +                      break;
        !           271: +#else
        !           272: +                      snprintf(err_buf, sizeof err_buf,
        !           273: +                               "hard links are not supported on this %s\n",
        !           274: +                               am_server ? "server" : "client");
        !           275: +                      return 0;
        !           276: +#endif
        !           277: +
        !           278:                case OPT_STOP_AFTER: {
        !           279:                        long val;
        !           280:                        arg = poptGetOptArg(pc);
        !           281: @@ -2186,6 +2206,8 @@ int parse_arguments(int *argc_p, const char ***argv_p)
        !           282:                        tmpdir = sanitize_path(NULL, tmpdir, NULL, 0, SP_DEFAULT);
        !           283:                if (backup_dir)
        !           284:                        backup_dir = sanitize_path(NULL, backup_dir, NULL, 0, SP_DEFAULT);
        !           285: +              if (link_by_hash_dir)
        !           286: +                      link_by_hash_dir = sanitize_path(NULL, link_by_hash_dir, NULL, 0, SP_DEFAULT);
        !           287:        }
        !           288:        if (daemon_filter_list.head && !am_sender) {
        !           289:                filter_rule_list *elp = &daemon_filter_list;
        !           290: @@ -2870,6 +2892,12 @@ void server_options(char **args, int *argc_p)
        !           291:        } else if (inplace)
        !           292:                args[ac++] = "--inplace";
        !           293:  
        !           294: +      if (link_by_hash_dir && am_sender) {
        !           295: +              args[ac++] = "--link-by-hash";
        !           296: +              args[ac++] = link_by_hash_dir;
        !           297: +              link_by_hash_dir = NULL; /* optimize sending-side checksums */
        !           298: +      }
        !           299: +
        !           300:        if (files_from && (!am_sender || filesfrom_host)) {
        !           301:                if (filesfrom_host) {
        !           302:                        args[ac++] = "--files-from";
        !           303: diff --git a/rsync.1.md b/rsync.1.md
        !           304: --- a/rsync.1.md
        !           305: +++ b/rsync.1.md
        !           306: @@ -424,6 +424,7 @@ detailed description below for a complete description.
        !           307:  --compare-dest=DIR       also compare destination files relative to DIR
        !           308:  --copy-dest=DIR          ... and include copies of unchanged files
        !           309:  --link-dest=DIR          hardlink to files in DIR when unchanged
        !           310: +--link-by-hash=DIR       create hardlinks by hash into DIR
        !           311:  --compress, -z           compress file data during the transfer
        !           312:  --compress-choice=STR    choose the compression algorithm (aka --zc)
        !           313:  --compress-level=NUM     explicitly set compression level (aka --zl)
        !           314: @@ -2331,6 +2332,50 @@ your home directory (remove the '=' for that).
        !           315:      specified (or implied by `-a`).  You can work-around this bug by avoiding
        !           316:      the `-o` option when sending to an old rsync.
        !           317:  
        !           318: +0.  `--link-by-hash=DIR`
        !           319: +
        !           320: +    This option hard links the destination files into `DIR`, a link farm
        !           321: +    arranged by MD5 file hash. The result is that the system will only store
        !           322: +    (usually) one copy of the unique contents of each file, regardless of the
        !           323: +    file's name (it will use extra files if the links overflow the available
        !           324: +    maximum).
        !           325: +
        !           326: +    This patch does not take into account file permissions, extended
        !           327: +    attributes, or ACLs when linking things together, so you should only use
        !           328: +    this if you don't care about preserving those extra file attributes (or if
        !           329: +    they are always the same for identical files).
        !           330: +
        !           331: +    The DIR is relative to the destination directory, so either specify a full
        !           332: +    path to the hash hierarchy, or specify a relative path that puts the links
        !           333: +    outside the destination (e.g. "../links").
        !           334: +
        !           335: +    Keep in mind that the hierarchy is never pruned, so if you need to reclaim
        !           336: +    space, you should remove any files that have just one link (since they are
        !           337: +    not linked into any destination dirs anymore):
        !           338: +
        !           339: +    >     find $DIR -links 1 -delete
        !           340: +
        !           341: +    The link farm's directory hierarchy is determined by the file's (32-char)
        !           342: +    MD5 hash and the file-length.  The hash is split up into directory shards.
        !           343: +    For example, if a file is 54321 bytes long, it could be stored like this:
        !           344: +
        !           345: +    >     $DIR/123/456/789/01234567890123456789012.54321.0
        !           346: +
        !           347: +    Note that the directory layout in this patch was modified for version
        !           348: +    3.1.0, so anyone using an older version of this patch should move their
        !           349: +    existing link hierarchy out of the way and then use the newer rsync to copy
        !           350: +    the saved hierarchy into its new layout.  Assuming that no files have
        !           351: +    overflowed their link limits, this would work:
        !           352: +
        !           353: +    >     mv $DIR $DIR.old
        !           354: +    >     rsync -aiv --link-by-hash=$DIR $DIR.old/ $DIR.tmp/
        !           355: +    >     rm -rf $DIR.tmp
        !           356: +    >     rm -rf $DIR.old
        !           357: +
        !           358: +    If some of your files are at their link limit, you'd be better of using a
        !           359: +    script to calculate the md5 sum of each file in the hierarchy and move it
        !           360: +    to its new location.
        !           361: +
        !           362:  0.  `--compress`, `-z`
        !           363:  
        !           364:      With this option, rsync compresses the file data as it is sent to the
        !           365: diff --git a/rsync.c b/rsync.c
        !           366: --- a/rsync.c
        !           367: +++ b/rsync.c
        !           368: @@ -50,6 +50,7 @@ extern int flist_eof;
        !           369:  extern int file_old_total;
        !           370:  extern int keep_dirlinks;
        !           371:  extern int make_backups;
        !           372: +extern char *link_by_hash_dir;
        !           373:  extern int sanitize_paths;
        !           374:  extern struct file_list *cur_flist, *first_flist, *dir_flist;
        !           375:  extern struct chmod_mode_struct *daemon_chmod_modes;
        !           376: @@ -748,6 +749,10 @@ int finish_transfer(const char *fname, const char *fnametmp,
        !           377:        }
        !           378:        if (ret == 0) {
        !           379:                /* The file was moved into place (not copied), so it's done. */
        !           380: +#ifdef HAVE_LINK
        !           381: +              if (link_by_hash_dir)
        !           382: +                      link_by_hash(fname, fnametmp, file);
        !           383: +#endif
        !           384:                return 1;
        !           385:        }
        !           386:        /* The file was copied, so tweak the perms of the copied file.  If it
        !           387: diff --git a/rsync.h b/rsync.h
        !           388: --- a/rsync.h
        !           389: +++ b/rsync.h
        !           390: @@ -1428,7 +1428,8 @@ extern short info_levels[], debug_levels[];
        !           391:  #define DEBUG_FUZZY (DEBUG_FLIST+1)
        !           392:  #define DEBUG_GENR (DEBUG_FUZZY+1)
        !           393:  #define DEBUG_HASH (DEBUG_GENR+1)
        !           394: -#define DEBUG_HLINK (DEBUG_HASH+1)
        !           395: +#define DEBUG_HASHLINK (DEBUG_HASH+1)
        !           396: +#define DEBUG_HLINK (DEBUG_HASHLINK+1)
        !           397:  #define DEBUG_ICONV (DEBUG_HLINK+1)
        !           398:  #define DEBUG_IO (DEBUG_ICONV+1)
        !           399:  #define DEBUG_NSTR (DEBUG_IO+1)
        !           400: diff --git a/rsyncd.conf.5.md b/rsyncd.conf.5.md
        !           401: --- a/rsyncd.conf.5.md
        !           402: +++ b/rsyncd.conf.5.md
        !           403: @@ -354,6 +354,23 @@ the values of parameters.  See the GLOBAL PARAMETERS section for more details.
        !           404:      is 0, which means no limit.  A negative value disables the module.  See
        !           405:      also the "lock file" parameter.
        !           406:  
        !           407: +0.  `link by hash dir`
        !           408: +
        !           409: +    When the "link by hash dir" parameter is set to a non-empty string,
        !           410: +    received files will be hard linked into **DIR**, a link farm arranged by
        !           411: +    MD5 file hash. See the `--link-by-hash` option for a full explanation.
        !           412: +
        !           413: +    The **DIR** must be accessible inside any chroot restrictions for the
        !           414: +    module, but can exist outside the transfer location if there is an
        !           415: +    inside-the-chroot path to the module (see "use chroot").  Note that a
        !           416: +    user-specified option does not allow this outside-the-transfer-area
        !           417: +    placement.
        !           418: +
        !           419: +    If this parameter is set, it will disable the `--link-by-hash` command-line
        !           420: +    option for copies into the module.
        !           421: +
        !           422: +The default is for this parameter to be unset.
        !           423: +
        !           424:  0.  `log file`
        !           425:  
        !           426:      When the "log file" parameter is set to a non-empty string, the rsync
        !           427: diff -Nurp a/rsync.1 b/rsync.1
        !           428: --- a/rsync.1
        !           429: +++ b/rsync.1
        !           430: @@ -500,6 +500,7 @@ detailed description below for a complet
        !           431:  --compare-dest=DIR       also compare destination files relative to DIR
        !           432:  --copy-dest=DIR          ... and include copies of unchanged files
        !           433:  --link-dest=DIR          hardlink to files in DIR when unchanged
        !           434: +--link-by-hash=DIR       create hardlinks by hash into DIR
        !           435:  --compress, -z           compress file data during the transfer
        !           436:  --compress-choice=STR    choose the compression algorithm (aka --zc)
        !           437:  --compress-level=NUM     explicitly set compression level (aka --zl)
        !           438: @@ -2372,6 +2373,60 @@ Note that rsync versions prior to 2.6.1
        !           439:  \fB\-\-link-dest\fP from working properly for a non-super-user when \fB\-o\fP was
        !           440:  specified (or implied by \fB\-a\fP).  You can work-around this bug by avoiding
        !           441:  the \fB\-o\fP option when sending to an old rsync.
        !           442: +.IP "\fB\-\-link-by-hash=DIR\fP"
        !           443: +This option hard links the destination files into \fBDIR\fP, a link farm
        !           444: +arranged by MD5 file hash. The result is that the system will only store
        !           445: +(usually) one copy of the unique contents of each file, regardless of the
        !           446: +file's name (it will use extra files if the links overflow the available
        !           447: +maximum).
        !           448: +.IP
        !           449: +This patch does not take into account file permissions, extended
        !           450: +attributes, or ACLs when linking things together, so you should only use
        !           451: +this if you don't care about preserving those extra file attributes (or if
        !           452: +they are always the same for identical files).
        !           453: +.IP
        !           454: +The DIR is relative to the destination directory, so either specify a full
        !           455: +path to the hash hierarchy, or specify a relative path that puts the links
        !           456: +outside the destination (e.g. "../links").
        !           457: +.IP
        !           458: +Keep in mind that the hierarchy is never pruned, so if you need to reclaim
        !           459: +space, you should remove any files that have just one link (since they are
        !           460: +not linked into any destination dirs anymore):
        !           461: +.RS 4
        !           462: +.IP
        !           463: +.nf
        !           464: +find $DIR -links 1 -delete
        !           465: +.fi
        !           466: +.RE
        !           467: +.IP
        !           468: +The link farm's directory hierarchy is determined by the file's (32-char)
        !           469: +MD5 hash and the file-length.  The hash is split up into directory shards.
        !           470: +For example, if a file is 54321 bytes long, it could be stored like this:
        !           471: +.RS 4
        !           472: +.IP
        !           473: +.nf
        !           474: +$DIR/123/456/789/01234567890123456789012.54321.0
        !           475: +.fi
        !           476: +.RE
        !           477: +.IP
        !           478: +Note that the directory layout in this patch was modified for version
        !           479: +3.1.0, so anyone using an older version of this patch should move their
        !           480: +existing link hierarchy out of the way and then use the newer rsync to copy
        !           481: +the saved hierarchy into its new layout.  Assuming that no files have
        !           482: +overflowed their link limits, this would work:
        !           483: +.RS 4
        !           484: +.IP
        !           485: +.nf
        !           486: +mv $DIR $DIR.old
        !           487: +rsync -aiv --link-by-hash=$DIR $DIR.old/ $DIR.tmp/
        !           488: +rm -rf $DIR.tmp
        !           489: +rm -rf $DIR.old
        !           490: +.fi
        !           491: +.RE
        !           492: +.IP
        !           493: +If some of your files are at their link limit, you'd be better of using a
        !           494: +script to calculate the md5 sum of each file in the hierarchy and move it
        !           495: +to its new location.
        !           496:  .IP "\fB\-\-compress\fP, \fB\-z\fP"
        !           497:  With this option, rsync compresses the file data as it is sent to the
        !           498:  destination machine, which reduces the amount of data being transmitted\ \-\-
        !           499: diff -Nurp a/rsync.1.html b/rsync.1.html
        !           500: --- a/rsync.1.html
        !           501: +++ b/rsync.1.html
        !           502: @@ -415,6 +415,7 @@ detailed description below for a complet
        !           503:  --compare-dest=DIR       also compare destination files relative to DIR
        !           504:  --copy-dest=DIR          ... and include copies of unchanged files
        !           505:  --link-dest=DIR          hardlink to files in DIR when unchanged
        !           506: +--link-by-hash=DIR       create hardlinks by hash into DIR
        !           507:  --compress, -z           compress file data during the transfer
        !           508:  --compress-choice=STR    choose the compression algorithm (aka --zc)
        !           509:  --compress-level=NUM     explicitly set compression level (aka --zl)
        !           510: @@ -2210,6 +2211,50 @@ specified (or implied by <code>-a</code>
        !           511:  the <code>-o</code> option when sending to an old rsync.</p>
        !           512:  </dd>
        !           513:  
        !           514: +<dt><code>--link-by-hash=DIR</code></dt><dd>
        !           515: +<p>This option hard links the destination files into <code>DIR</code>, a link farm
        !           516: +arranged by MD5 file hash. The result is that the system will only store
        !           517: +(usually) one copy of the unique contents of each file, regardless of the
        !           518: +file's name (it will use extra files if the links overflow the available
        !           519: +maximum).</p>
        !           520: +<p>This patch does not take into account file permissions, extended
        !           521: +attributes, or ACLs when linking things together, so you should only use
        !           522: +this if you don't care about preserving those extra file attributes (or if
        !           523: +they are always the same for identical files).</p>
        !           524: +<p>The DIR is relative to the destination directory, so either specify a full
        !           525: +path to the hash hierarchy, or specify a relative path that puts the links
        !           526: +outside the destination (e.g. &quot;../links&quot;).</p>
        !           527: +<p>Keep in mind that the hierarchy is never pruned, so if you need to reclaim
        !           528: +space, you should remove any files that have just one link (since they are
        !           529: +not linked into any destination dirs anymore):</p>
        !           530: +<blockquote>
        !           531: +<pre><code>find $DIR -links 1 -delete
        !           532: +</code></pre>
        !           533: +</blockquote>
        !           534: +<p>The link farm's directory hierarchy is determined by the file's (32-char)
        !           535: +MD5 hash and the file-length.  The hash is split up into directory shards.
        !           536: +For example, if a file is 54321 bytes long, it could be stored like this:</p>
        !           537: +<blockquote>
        !           538: +<pre><code>$DIR/123/456/789/01234567890123456789012.54321.0
        !           539: +</code></pre>
        !           540: +</blockquote>
        !           541: +<p>Note that the directory layout in this patch was modified for version
        !           542: +3.1.0, so anyone using an older version of this patch should move their
        !           543: +existing link hierarchy out of the way and then use the newer rsync to copy
        !           544: +the saved hierarchy into its new layout.  Assuming that no files have
        !           545: +overflowed their link limits, this would work:</p>
        !           546: +<blockquote>
        !           547: +<pre><code>mv $DIR $DIR.old
        !           548: +rsync -aiv --link-by-hash=$DIR $DIR.old/ $DIR.tmp/
        !           549: +rm -rf $DIR.tmp
        !           550: +rm -rf $DIR.old
        !           551: +</code></pre>
        !           552: +</blockquote>
        !           553: +<p>If some of your files are at their link limit, you'd be better of using a
        !           554: +script to calculate the md5 sum of each file in the hierarchy and move it
        !           555: +to its new location.</p>
        !           556: +</dd>
        !           557: +
        !           558:  <dt><code>--compress</code>, <code>-z</code></dt><dd>
        !           559:  <p>With this option, rsync compresses the file data as it is sent to the
        !           560:  destination machine, which reduces the amount of data being transmitted&nbsp;-&#8288;-&#8288;
        !           561: diff -Nurp a/rsyncd.conf.5 b/rsyncd.conf.5
        !           562: --- a/rsyncd.conf.5
        !           563: +++ b/rsyncd.conf.5
        !           564: @@ -335,6 +335,22 @@ connections you will allow.  Any clients
        !           565:  been reached will receive a message telling them to try later.  The default
        !           566:  is 0, which means no limit.  A negative value disables the module.  See
        !           567:  also the "lock file" parameter.
        !           568: +.IP "\fBlink\ by\ hash\ dir\fP"
        !           569: +When the "link by hash dir" parameter is set to a non-empty string,
        !           570: +received files will be hard linked into \fBDIR\fP, a link farm arranged by
        !           571: +MD5 file hash. See the \fB\-\-link-by-hash\fP option for a full explanation.
        !           572: +.IP
        !           573: +The \fBDIR\fP must be accessible inside any chroot restrictions for the
        !           574: +module, but can exist outside the transfer location if there is an
        !           575: +inside-the-chroot path to the module (see "use chroot").  Note that a
        !           576: +user-specified option does not allow this outside-the-transfer-area
        !           577: +placement.
        !           578: +.IP
        !           579: +If this parameter is set, it will disable the \fB\-\-link-by-hash\fP command-line
        !           580: +option for copies into the module.
        !           581: +.P
        !           582: +The default is for this parameter to be unset.
        !           583: +.P
        !           584:  .IP "\fBlog\ file\fP"
        !           585:  When the "log file" parameter is set to a non-empty string, the rsync
        !           586:  daemon will log messages to the indicated file rather than using syslog.
        !           587: diff -Nurp a/rsyncd.conf.5.html b/rsyncd.conf.5.html
        !           588: --- a/rsyncd.conf.5.html
        !           589: +++ b/rsyncd.conf.5.html
        !           590: @@ -342,6 +342,22 @@ is 0, which means no limit.  A negative
        !           591:  also the &quot;lock file&quot; parameter.</p>
        !           592:  </dd>
        !           593:  
        !           594: +<dt><code>link by hash dir</code></dt><dd>
        !           595: +<p>When the &quot;link by hash dir&quot; parameter is set to a non-empty string,
        !           596: +received files will be hard linked into <strong>DIR</strong>, a link farm arranged by
        !           597: +MD5 file hash. See the <code>--link-by-hash</code> option for a full explanation.</p>
        !           598: +<p>The <strong>DIR</strong> must be accessible inside any chroot restrictions for the
        !           599: +module, but can exist outside the transfer location if there is an
        !           600: +inside-the-chroot path to the module (see &quot;use chroot&quot;).  Note that a
        !           601: +user-specified option does not allow this outside-the-transfer-area
        !           602: +placement.</p>
        !           603: +<p>If this parameter is set, it will disable the <code>--link-by-hash</code> command-line
        !           604: +option for copies into the module.</p>
        !           605: +</dd>
        !           606: +</dl>
        !           607: +<p>The default is for this parameter to be unset.</p>
        !           608: +<dl>
        !           609: +
        !           610:  <dt><code>log file</code></dt><dd>
        !           611:  <p>When the &quot;log file&quot; parameter is set to a non-empty string, the rsync
        !           612:  daemon will log messages to the indicated file rather than using syslog.

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>