Annotation of embedaddon/rsync/patches/detect-renamed.diff, revision 1.1

1.1     ! misho       1: This patch adds the --detect-renamed option which makes rsync notice files
        !             2: that either (1) match in size & modify-time (plus the basename, if possible)
        !             3: or (2) match in size & checksum (when --checksum was also specified) and use
        !             4: each match as an alternate basis file to speed up the transfer.
        !             5: 
        !             6: The algorithm attempts to scan the receiving-side's files in an efficient
        !             7: manner.  If --delete[-before] is enabled, we'll take advantage of the
        !             8: pre-transfer delete pass to prepare any alternate-basis-file matches we
        !             9: might find.  If --delete-before is not enabled, rsync does the rename scan
        !            10: during the regular file-sending scan (scanning each directory right before
        !            11: the generator starts updating files from that dir).  In this latter mode,
        !            12: rsync might delay the updating of a file (if no alternate-basis match was
        !            13: yet found) until the full scan of the receiving side is complete, at which
        !            14: point any delayed files are processed.
        !            15: 
        !            16: I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
        !            17: takes advantage of rsync's pre-existing partial-dir logic.  This uses less
        !            18: memory than trying to keep track of the matches internally, and also allows
        !            19: any deletions or file-updates to occur normally without interfering with
        !            20: these alternate-basis discoveries.
        !            21: 
        !            22: To use this patch, run these commands for a successful build:
        !            23: 
        !            24:     patch -p1 <patches/detect-renamed.diff
        !            25:     ./configure                                 (optional if already run)
        !            26:     make
        !            27: 
        !            28: TODO:
        !            29: 
        !            30:   The routine that makes missing directories for files that get renamed
        !            31:   down into a new sub-hierarchy doesn't properly handle the case where some
        !            32:   path elements might exist but not be a dir yet.  We need to either change
        !            33:   our stash-ahead algorithm (to not require unknown path elements) or we
        !            34:   need to create a better path-making routine.
        !            35: 
        !            36:   We need to never return a match from fattr_find() that has a basis
        !            37:   file.  This will ensure that we don't try to give a renamed file to
        !            38:   a file that can't use it, while missing out on giving it to a file
        !            39:   that could use it.
        !            40: 
        !            41: based-on: e94bad1c156fc3910f24e2b3b71a81b0b0bdeb70
        !            42: diff --git a/backup.c b/backup.c
        !            43: --- a/backup.c
        !            44: +++ b/backup.c
        !            45: @@ -162,7 +162,7 @@ char *get_backup_name(const char *fname)
        !            46:                        int ret;
        !            47:                        if (backup_dir_len > 1)
        !            48:                                backup_dir_buf[backup_dir_len-1] = '\0';
        !            49: -                      ret = make_path(backup_dir_buf, 0);
        !            50: +                      ret = make_path(backup_dir_buf, ACCESSPERMS, 0);
        !            51:                        if (backup_dir_len > 1)
        !            52:                                backup_dir_buf[backup_dir_len-1] = '/';
        !            53:                        if (ret < 0)
        !            54: diff --git a/compat.c b/compat.c
        !            55: --- a/compat.c
        !            56: +++ b/compat.c
        !            57: @@ -39,6 +39,7 @@ extern int checksum_seed;
        !            58:  extern int basis_dir_cnt;
        !            59:  extern int prune_empty_dirs;
        !            60:  extern int protocol_version;
        !            61: +extern int detect_renamed;
        !            62:  extern int protect_args;
        !            63:  extern int preserve_uid;
        !            64:  extern int preserve_gid;
        !            65: @@ -159,6 +160,7 @@ void set_allow_inc_recurse(void)
        !            66:                allow_inc_recurse = 0;
        !            67:        else if (!am_sender
        !            68:         && (delete_before || delete_after
        !            69: +        || detect_renamed
        !            70:          || delay_updates || prune_empty_dirs))
        !            71:                allow_inc_recurse = 0;
        !            72:        else if (am_server && !local_server
        !            73: diff --git a/delete.c b/delete.c
        !            74: --- a/delete.c
        !            75: +++ b/delete.c
        !            76: @@ -25,6 +25,7 @@
        !            77:  extern int am_root;
        !            78:  extern int make_backups;
        !            79:  extern int max_delete;
        !            80: +extern int detect_renamed;
        !            81:  extern char *backup_dir;
        !            82:  extern char *backup_suffix;
        !            83:  extern int backup_suffix_len;
        !            84: @@ -44,6 +45,8 @@ static inline int is_backup_file(char *fn)
        !            85:   * its contents, otherwise just checks for content.  Returns DR_SUCCESS or
        !            86:   * DR_NOT_EMPTY.  Note that fname must point to a MAXPATHLEN buffer!  (The
        !            87:   * buffer is used for recursion, but returned unchanged.)
        !            88: + *
        !            89: + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
        !            90:   */
        !            91:  static enum delret delete_dir_contents(char *fname, uint16 flags)
        !            92:  {
        !            93: @@ -63,7 +66,9 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
        !            94:        save_filters = push_local_filters(fname, dlen);
        !            95:  
        !            96:        non_perishable_cnt = 0;
        !            97: +      file_extra_cnt += SUM_EXTRA_CNT;
        !            98:        dirlist = get_dirlist(fname, dlen, 0);
        !            99: +      file_extra_cnt -= SUM_EXTRA_CNT;
        !           100:        ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
        !           101:  
        !           102:        if (!dirlist->used)
        !           103: @@ -103,7 +108,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
        !           104:                if (S_ISDIR(fp->mode)) {
        !           105:                        if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
        !           106:                                ret = DR_NOT_EMPTY;
        !           107: -              }
        !           108: +              } else if (detect_renamed && S_ISREG(fp->mode))
        !           109: +                      look_for_rename(fp, fname);
        !           110:                if (delete_item(fname, fp->mode, flags) != DR_SUCCESS)
        !           111:                        ret = DR_NOT_EMPTY;
        !           112:        }
        !           113: @@ -126,6 +132,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
        !           114:   *
        !           115:   * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
        !           116:   * a directory! (The buffer is used for recursion, but returned unchanged.)
        !           117: + *
        !           118: + * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
        !           119:   */
        !           120:  enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
        !           121:  {
        !           122: @@ -153,6 +161,9 @@ enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
        !           123:                /* OK: try to delete the directory. */
        !           124:        }
        !           125:  
        !           126: +      if (flags & DEL_NO_DELETIONS)
        !           127: +              return DR_SUCCESS;
        !           128: +
        !           129:        if (!(flags & DEL_MAKE_ROOM) && max_delete >= 0 && stats.deleted_files >= max_delete) {
        !           130:                skipped_deletes++;
        !           131:                return DR_AT_LIMIT;
        !           132: diff --git a/flist.c b/flist.c
        !           133: --- a/flist.c
        !           134: +++ b/flist.c
        !           135: @@ -64,6 +64,7 @@ extern int non_perishable_cnt;
        !           136:  extern int prune_empty_dirs;
        !           137:  extern int copy_links;
        !           138:  extern int copy_unsafe_links;
        !           139: +extern int detect_renamed;
        !           140:  extern int protocol_version;
        !           141:  extern int sanitize_paths;
        !           142:  extern int munge_symlinks;
        !           143: @@ -130,6 +131,8 @@ static int64 tmp_dev = -1, tmp_ino;
        !           144:  #endif
        !           145:  static char tmp_sum[MAX_DIGEST_LEN];
        !           146:  
        !           147: +struct file_list the_fattr_list;
        !           148: +
        !           149:  static char empty_sum[MAX_DIGEST_LEN];
        !           150:  static int flist_count_offset; /* for --delete --progress */
        !           151:  static int show_filelist_progress;
        !           152: @@ -277,6 +280,45 @@ static inline int is_excluded(const char *fname, int is_dir, int filter_level)
        !           153:        return name_is_excluded(fname, is_dir ? NAME_IS_DIR : NAME_IS_FILE, filter_level);
        !           154:  }
        !           155:  
        !           156: +static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
        !           157: +{
        !           158: +      struct file_struct *f1 = *file1;
        !           159: +      struct file_struct *f2 = *file2;
        !           160: +      int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
        !           161: +      int diff;
        !           162: +
        !           163: +      if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
        !           164: +              if (!f2->basename || !S_ISREG(f2->mode) || !len2)
        !           165: +                      return 0;
        !           166: +              return 1;
        !           167: +      }
        !           168: +      if (!f2->basename || !S_ISREG(f2->mode) || !len2)
        !           169: +              return -1;
        !           170: +
        !           171: +      /* Don't use diff for values that are longer than an int. */
        !           172: +      if (len1 != len2)
        !           173: +              return len1 < len2 ? -1 : 1;
        !           174: +
        !           175: +      if (always_checksum) {
        !           176: +              diff = u_memcmp(F_SUM(f1), F_SUM(f2), flist_csum_len);
        !           177: +              if (diff)
        !           178: +                      return diff;
        !           179: +      } else if (f1->modtime != f2->modtime)
        !           180: +              return f1->modtime < f2->modtime ? -1 : 1;
        !           181: +
        !           182: +      diff = u_strcmp(f1->basename, f2->basename);
        !           183: +      if (diff)
        !           184: +              return diff;
        !           185: +
        !           186: +      if (f1->dirname == f2->dirname)
        !           187: +              return 0;
        !           188: +      if (!f1->dirname)
        !           189: +              return -1;
        !           190: +      if (!f2->dirname)
        !           191: +              return 1;
        !           192: +      return u_strcmp(f1->dirname, f2->dirname);
        !           193: +}
        !           194: +
        !           195:  static void send_directory(int f, struct file_list *flist,
        !           196:                           char *fbuf, int len, int flags);
        !           197:  
        !           198: @@ -2675,6 +2717,23 @@ struct file_list *recv_file_list(int f, int dir_ndx)
        !           199:         * for a non-relative transfer in recv_file_entry(). */
        !           200:        flist_sort_and_clean(flist, relative_paths);
        !           201:  
        !           202: +      if (detect_renamed) {
        !           203: +              int j = flist->used;
        !           204: +              the_fattr_list.used = j;
        !           205: +              the_fattr_list.files = new_array(struct file_struct *, j);
        !           206: +              memcpy(the_fattr_list.files, flist->files,
        !           207: +                     j * sizeof (struct file_struct *));
        !           208: +              qsort(the_fattr_list.files, j,
        !           209: +                    sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
        !           210: +              the_fattr_list.low = 0;
        !           211: +              while (j-- > 0) {
        !           212: +                      struct file_struct *fp = the_fattr_list.files[j];
        !           213: +                      if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
        !           214: +                              break;
        !           215: +              }
        !           216: +              the_fattr_list.high = j;
        !           217: +      }
        !           218: +
        !           219:        if (protocol_version < 30) {
        !           220:                /* Recv the io_error flag */
        !           221:                int err = read_int(f);
        !           222: diff --git a/generator.c b/generator.c
        !           223: --- a/generator.c
        !           224: +++ b/generator.c
        !           225: @@ -79,6 +79,7 @@ extern int always_checksum;
        !           226:  extern int flist_csum_len;
        !           227:  extern char *partial_dir;
        !           228:  extern int alt_dest_type;
        !           229: +extern int detect_renamed;
        !           230:  extern int whole_file;
        !           231:  extern int list_only;
        !           232:  extern int read_batch;
        !           233: @@ -97,11 +98,13 @@ extern char *tmpdir;
        !           234:  extern char *basis_dir[MAX_BASIS_DIRS+1];
        !           235:  extern struct file_list *cur_flist, *first_flist, *dir_flist;
        !           236:  extern filter_rule_list filter_list, daemon_filter_list;
        !           237: +extern struct file_list the_fattr_list;
        !           238:  
        !           239:  int maybe_ATTRS_REPORT = 0;
        !           240:  int maybe_ATTRS_ACCURATE_TIME = 0;
        !           241:  
        !           242:  static dev_t dev_zero;
        !           243: +static int unexplored_dirs = 1;
        !           244:  static int deldelay_size = 0, deldelay_cnt = 0;
        !           245:  static char *deldelay_buf = NULL;
        !           246:  static int deldelay_fd = -1;
        !           247: @@ -269,14 +272,19 @@ static void do_delayed_deletions(char *delbuf)
        !           248:   * all the --delete-WHEN options.  Note that the fbuf pointer must point to a
        !           249:   * MAXPATHLEN buffer with the name of the directory in it (the functions we
        !           250:   * call will append names onto the end, but the old dir value will be restored
        !           251: - * on exit). */
        !           252: -static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
        !           253: + * on exit).
        !           254: + *
        !           255: + * Note:  --detect-rename may use this routine with DEL_NO_DELETIONS set!
        !           256: + */
        !           257: +static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
        !           258: +                        int del_flags)
        !           259:  {
        !           260:        static int already_warned = 0;
        !           261:        static struct hashtable *dev_tbl;
        !           262:        struct file_list *dirlist;
        !           263: -      char delbuf[MAXPATHLEN];
        !           264: -      int dlen, i;
        !           265: +      char *p, delbuf[MAXPATHLEN];
        !           266: +      unsigned remainder;
        !           267: +      int dlen, i, restore_dot = 0;
        !           268:  
        !           269:        if (!fbuf) {
        !           270:                change_local_filter_dir(NULL, 0, 0);
        !           271: @@ -290,17 +298,22 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
        !           272:                maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
        !           273:  
        !           274:        if (io_error & IOERR_GENERAL && !ignore_errors) {
        !           275: -              if (already_warned)
        !           276: +              if (!already_warned) {
        !           277: +                      rprintf(FINFO,
        !           278: +                          "IO error encountered -- skipping file deletion\n");
        !           279: +                      already_warned = 1;
        !           280: +              }
        !           281: +              if (!detect_renamed)
        !           282:                        return;
        !           283: -              rprintf(FINFO,
        !           284: -                      "IO error encountered -- skipping file deletion\n");
        !           285: -              already_warned = 1;
        !           286: -              return;
        !           287: +              del_flags |= DEL_NO_DELETIONS;
        !           288:        }
        !           289:  
        !           290:        dlen = strlen(fbuf);
        !           291:        change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
        !           292:  
        !           293: +      if (detect_renamed)
        !           294: +              unexplored_dirs--;
        !           295: +
        !           296:        if (one_file_system) {
        !           297:                if (!dev_tbl)
        !           298:                        dev_tbl = hashtable_create(16, HT_KEY64);
        !           299: @@ -316,6 +329,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
        !           300:  
        !           301:        dirlist = get_dirlist(fbuf, dlen, 0);
        !           302:  
        !           303: +      p = fbuf + dlen;
        !           304: +      if (dlen == 1 && *fbuf == '.') {
        !           305: +              restore_dot = 1;
        !           306: +              p = fbuf;
        !           307: +      } else if (dlen != 1 || *fbuf != '/')
        !           308: +              *p++ = '/';
        !           309: +      remainder = MAXPATHLEN - (p - fbuf);
        !           310: +
        !           311:        /* If an item in dirlist is not found in flist, delete it
        !           312:         * from the filesystem. */
        !           313:        for (i = dirlist->used; i--; ) {
        !           314: @@ -328,6 +349,10 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
        !           315:                                        f_name(fp, NULL));
        !           316:                        continue;
        !           317:                }
        !           318: +              if (detect_renamed && S_ISREG(fp->mode)) {
        !           319: +                      strlcpy(p, fp->basename, remainder);
        !           320: +                      look_for_rename(fp, fbuf);
        !           321: +              }
        !           322:                /* Here we want to match regardless of file type.  Replacement
        !           323:                 * of a file with one of another type is handled separately by
        !           324:                 * a delete_item call with a DEL_MAKE_ROOM flag. */
        !           325: @@ -336,14 +361,19 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
        !           326:                        if (!(fp->mode & S_IWUSR) && !am_root && fp->flags & FLAG_OWNED_BY_US)
        !           327:                                flags |= DEL_NO_UID_WRITE;
        !           328:                        f_name(fp, delbuf);
        !           329: -                      if (delete_during == 2) {
        !           330: -                              if (!remember_delete(fp, delbuf, flags))
        !           331: +                      if (delete_during == 2 && !(del_flags & DEL_NO_DELETIONS)) {
        !           332: +                              if (!remember_delete(fp, delbuf, del_flags | flags))
        !           333:                                        break;
        !           334:                        } else
        !           335: -                              delete_item(delbuf, fp->mode, flags);
        !           336: -              }
        !           337: +                              delete_item(delbuf, fp->mode, del_flags | flags);
        !           338: +              } else if (detect_renamed && S_ISDIR(fp->mode))
        !           339: +                      unexplored_dirs++;
        !           340:        }
        !           341:  
        !           342: +      if (restore_dot)
        !           343: +              fbuf[0] = '.';
        !           344: +      fbuf[dlen] = '\0';
        !           345: +
        !           346:        flist_free(dirlist);
        !           347:  }
        !           348:  
        !           349: @@ -379,14 +409,125 @@ static void do_delete_pass(void)
        !           350:                 || !S_ISDIR(st.st_mode))
        !           351:                        continue;
        !           352:  
        !           353: -              delete_in_dir(fbuf, file, &st.st_dev);
        !           354: +              delete_in_dir(fbuf, file, &st.st_dev, 0);
        !           355:        }
        !           356: -      delete_in_dir(NULL, NULL, &dev_zero);
        !           357: +      delete_in_dir(NULL, NULL, &dev_zero, 0);
        !           358:  
        !           359:        if (INFO_GTE(FLIST, 2) && !am_server)
        !           360:                rprintf(FINFO, "                    \r");
        !           361:  }
        !           362:  
        !           363: +/* Search for a regular file that matches either (1) the size & modified
        !           364: + * time (plus the basename, if possible) or (2) the size & checksum.  If
        !           365: + * we find an exact match down to the dirname, return -1 because we found
        !           366: + * an up-to-date file in the transfer, not a renamed file. */
        !           367: +static int fattr_find(struct file_struct *f, char *fname)
        !           368: +{
        !           369: +      int low = the_fattr_list.low, high = the_fattr_list.high;
        !           370: +      int mid, ok_match = -1, good_match = -1;
        !           371: +      struct file_struct *fmid;
        !           372: +      int diff;
        !           373: +
        !           374: +      while (low <= high) {
        !           375: +              mid = (low + high) / 2;
        !           376: +              fmid = the_fattr_list.files[mid];
        !           377: +              if (F_LENGTH(fmid) != F_LENGTH(f)) {
        !           378: +                      if (F_LENGTH(fmid) < F_LENGTH(f))
        !           379: +                              low = mid + 1;
        !           380: +                      else
        !           381: +                              high = mid - 1;
        !           382: +                      continue;
        !           383: +              }
        !           384: +              if (always_checksum) {
        !           385: +                      /* We use the FLAG_FILE_SENT flag to indicate when we
        !           386: +                       * have computed the checksum for an entry. */
        !           387: +                      if (!(f->flags & FLAG_FILE_SENT)) {
        !           388: +                              STRUCT_STAT st;
        !           389: +                              if (fmid->modtime == f->modtime
        !           390: +                               && f_name_cmp(fmid, f) == 0)
        !           391: +                                      return -1; /* assume we can't help */
        !           392: +                              st.st_size = F_LENGTH(f);
        !           393: +                              st.st_mtime = f->modtime;
        !           394: +                              file_checksum(fname, &st, F_SUM(f));
        !           395: +                              f->flags |= FLAG_FILE_SENT;
        !           396: +                      }
        !           397: +                      diff = u_memcmp(F_SUM(fmid), F_SUM(f), flist_csum_len);
        !           398: +                      if (diff) {
        !           399: +                              if (diff < 0)
        !           400: +                                      low = mid + 1;
        !           401: +                              else
        !           402: +                                      high = mid - 1;
        !           403: +                              continue;
        !           404: +                      }
        !           405: +              } else {
        !           406: +                      if (fmid->modtime != f->modtime) {
        !           407: +                              if (fmid->modtime < f->modtime)
        !           408: +                                      low = mid + 1;
        !           409: +                              else
        !           410: +                                      high = mid - 1;
        !           411: +                              continue;
        !           412: +                      }
        !           413: +              }
        !           414: +              ok_match = mid;
        !           415: +              diff = u_strcmp(fmid->basename, f->basename);
        !           416: +              if (diff == 0) {
        !           417: +                      good_match = mid;
        !           418: +                      if (fmid->dirname == f->dirname)
        !           419: +                              return -1; /* file is up-to-date */
        !           420: +                      if (!fmid->dirname) {
        !           421: +                              low = mid + 1;
        !           422: +                              continue;
        !           423: +                      }
        !           424: +                      if (!f->dirname) {
        !           425: +                              high = mid - 1;
        !           426: +                              continue;
        !           427: +                      }
        !           428: +                      diff = u_strcmp(fmid->dirname, f->dirname);
        !           429: +                      if (diff == 0)
        !           430: +                              return -1; /* file is up-to-date */
        !           431: +              }
        !           432: +              if (diff < 0)
        !           433: +                      low = mid + 1;
        !           434: +              else
        !           435: +                      high = mid - 1;
        !           436: +      }
        !           437: +
        !           438: +      return good_match >= 0 ? good_match : ok_match;
        !           439: +}
        !           440: +
        !           441: +void look_for_rename(struct file_struct *file, char *fname)
        !           442: +{
        !           443: +      struct file_struct *fp;
        !           444: +      char *partialptr, *fn;
        !           445: +      STRUCT_STAT st;
        !           446: +      int ndx;
        !           447: +
        !           448: +      if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
        !           449: +              return;
        !           450: +
        !           451: +      fp = the_fattr_list.files[ndx];
        !           452: +      fn = f_name(fp, NULL);
        !           453: +      /* We don't provide an alternate-basis file if there is a basis file. */
        !           454: +      if (link_stat(fn, &st, 0) == 0)
        !           455: +              return;
        !           456: +
        !           457: +      if (!dry_run) {
        !           458: +              if ((partialptr = partial_dir_fname(fn)) == NULL
        !           459: +               || !handle_partial_dir(partialptr, PDIR_CREATE))
        !           460: +                      return;
        !           461: +              /* We only use the file if we can hard-link it into our tmp dir. */
        !           462: +              if (link(fname, partialptr) != 0) {
        !           463: +                      if (errno != EEXIST)
        !           464: +                              handle_partial_dir(partialptr, PDIR_DELETE);
        !           465: +                      return;
        !           466: +              }
        !           467: +      }
        !           468: +
        !           469: +      /* I think this falls into the -vv category with "%s is uptodate", etc. */
        !           470: +      if (INFO_GTE(MISC, 2))
        !           471: +              rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
        !           472: +}
        !           473: +
        !           474:  static inline int mtime_differs(STRUCT_STAT *stp, struct file_struct *file)
        !           475:  {
        !           476:  #ifdef ST_MTIME_NSEC
        !           477: @@ -1187,6 +1328,7 @@ static void list_file_entry(struct file_struct *f)
        !           478:        }
        !           479:  }
        !           480:  
        !           481: +static struct bitbag *delayed_bits = NULL;
        !           482:  static int phase = 0;
        !           483:  static int dflt_perms;
        !           484:  
        !           485: @@ -1323,7 +1465,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
        !           486:                         && do_stat(dn, &sx.st) < 0) {
        !           487:                                if (dry_run)
        !           488:                                        goto parent_is_dry_missing;
        !           489: -                              if (make_path(fname, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0) {
        !           490: +                              if (make_path(fname, ACCESSPERMS, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0) {
        !           491:                                        rsyserr(FERROR_XFER, errno,
        !           492:                                                "recv_generator: mkdir %s failed",
        !           493:                                                full_fname(dn));
        !           494: @@ -1459,7 +1601,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
        !           495:                }
        !           496:                if (real_ret != 0 && do_mkdir(fname,file->mode|added_perms) < 0 && errno != EEXIST) {
        !           497:                        if (!relative_paths || errno != ENOENT
        !           498: -                       || make_path(fname, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0
        !           499: +                       || make_path(fname, ACCESSPERMS, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0
        !           500:                         || (do_mkdir(fname, file->mode|added_perms) < 0 && errno != EEXIST)) {
        !           501:                                rsyserr(FERROR_XFER, errno,
        !           502:                                        "recv_generator: mkdir %s failed",
        !           503: @@ -1507,9 +1649,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
        !           504:                }
        !           505:                else if (delete_during && f_out != -1 && !phase
        !           506:                    && !(file->flags & FLAG_MISSING_DIR)) {
        !           507: -                      if (file->flags & FLAG_CONTENT_DIR)
        !           508: -                              delete_in_dir(fname, file, &real_sx.st.st_dev);
        !           509: -                      else
        !           510: +                      if (file->flags & FLAG_CONTENT_DIR) {
        !           511: +                              if (detect_renamed && real_ret != 0)
        !           512: +                                      unexplored_dirs++;
        !           513: +                              delete_in_dir(fname, file, &real_sx.st.st_dev,
        !           514: +                                            delete_during < 0 ? DEL_NO_DELETIONS : 0);
        !           515: +                      } else
        !           516:                                change_local_filter_dir(fname, strlen(fname), F_DEPTH(file));
        !           517:                }
        !           518:                prior_dir_file = file;
        !           519: @@ -1786,8 +1931,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
        !           520:                        goto cleanup;
        !           521:                }
        !           522:  #endif
        !           523: -              if (stat_errno == ENOENT)
        !           524: +              if (stat_errno == ENOENT) {
        !           525: +                      if (detect_renamed && unexplored_dirs > 0
        !           526: +                       && F_LENGTH(file)) {
        !           527: +                              bitbag_set_bit(delayed_bits, ndx);
        !           528: +                              return;
        !           529: +                      }
        !           530:                        goto notify_others;
        !           531: +              }
        !           532:                rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s",
        !           533:                        full_fname(fname));
        !           534:                goto cleanup;
        !           535: @@ -2251,6 +2402,12 @@ void generate_files(int f_out, const char *local_name)
        !           536:        if (DEBUG_GTE(GENR, 1))
        !           537:                rprintf(FINFO, "generator starting pid=%d\n", (int)getpid());
        !           538:  
        !           539: +      if (detect_renamed) {
        !           540: +              delayed_bits = bitbag_create(cur_flist->used);
        !           541: +              if (!delete_before && !delete_during)
        !           542: +                      delete_during = -1;
        !           543: +      }
        !           544: +
        !           545:        if (delete_before && !solo_file && cur_flist->used > 0)
        !           546:                do_delete_pass();
        !           547:        if (delete_during == 2) {
        !           548: @@ -2259,7 +2416,7 @@ void generate_files(int f_out, const char *local_name)
        !           549:        }
        !           550:        info_levels[INFO_FLIST] = info_levels[INFO_PROGRESS] = 0;
        !           551:  
        !           552: -      if (append_mode > 0 || whole_file < 0)
        !           553: +      if (append_mode > 0 || detect_renamed || whole_file < 0)
        !           554:                whole_file = 0;
        !           555:        if (DEBUG_GTE(FLIST, 1)) {
        !           556:                rprintf(FINFO, "delta-transmission %s\n",
        !           557: @@ -2295,7 +2452,7 @@ void generate_files(int f_out, const char *local_name)
        !           558:                                                dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
        !           559:                                        } else
        !           560:                                                dirdev = MAKEDEV(0, 0);
        !           561: -                                      delete_in_dir(fbuf, fp, &dirdev);
        !           562: +                                      delete_in_dir(fbuf, fp, &dirdev, 0);
        !           563:                                } else
        !           564:                                        change_local_filter_dir(fbuf, strlen(fbuf), F_DEPTH(fp));
        !           565:                        }
        !           566: @@ -2342,7 +2499,21 @@ void generate_files(int f_out, const char *local_name)
        !           567:        } while ((cur_flist = cur_flist->next) != NULL);
        !           568:  
        !           569:        if (delete_during)
        !           570: -              delete_in_dir(NULL, NULL, &dev_zero);
        !           571: +              delete_in_dir(NULL, NULL, &dev_zero, 0);
        !           572: +      if (detect_renamed) {
        !           573: +              if (delete_during < 0)
        !           574: +                      delete_during = 0;
        !           575: +              detect_renamed = 0;
        !           576: +
        !           577: +              for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
        !           578: +                      struct file_struct *file = cur_flist->files[i];
        !           579: +                      if (local_name)
        !           580: +                              strlcpy(fbuf, local_name, sizeof fbuf);
        !           581: +                      else
        !           582: +                              f_name(file, fbuf);
        !           583: +                      recv_generator(fbuf, file, i, itemizing, code, f_out);
        !           584: +              }
        !           585: +      }
        !           586:        phase++;
        !           587:        if (DEBUG_GTE(GENR, 1))
        !           588:                rprintf(FINFO, "generate_files phase=%d\n", phase);
        !           589: diff --git a/main.c b/main.c
        !           590: --- a/main.c
        !           591: +++ b/main.c
        !           592: @@ -721,7 +721,7 @@ static char *get_local_name(struct file_list *flist, char *dest_path)
        !           593:        trailing_slash = cp && !cp[1];
        !           594:  
        !           595:        if (mkpath_dest_arg && statret < 0 && (cp || file_total > 1)) {
        !           596: -              int ret = make_path(dest_path, file_total > 1 && !trailing_slash ? 0 : MKP_DROP_NAME);
        !           597: +              int ret = make_path(dest_path, ACCESSPERMS, file_total > 1 && !trailing_slash ? 0 : MKP_DROP_NAME);
        !           598:                if (ret < 0)
        !           599:                        goto mkdir_error;
        !           600:                if (INFO_GTE(NAME, 1)) {
        !           601: diff --git a/options.c b/options.c
        !           602: --- a/options.c
        !           603: +++ b/options.c
        !           604: @@ -84,6 +84,7 @@ int am_server = 0;
        !           605:  int am_sender = 0;
        !           606:  int am_starting_up = 1;
        !           607:  int relative_paths = -1;
        !           608: +int detect_renamed = 0;
        !           609:  int implied_dirs = 1;
        !           610:  int missing_args = 0; /* 0 = FERROR_XFER, 1 = ignore, 2 = delete */
        !           611:  int numeric_ids = 0;
        !           612: @@ -733,6 +734,7 @@ static struct poptOption long_options[] = {
        !           613:    {"compare-dest",     0,  POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
        !           614:    {"copy-dest",        0,  POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
        !           615:    {"link-dest",        0,  POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
        !           616: +  {"detect-renamed",   0,  POPT_ARG_NONE,   &detect_renamed, 0, 0, 0 },
        !           617:    {"fuzzy",           'y', POPT_ARG_NONE,   0, 'y', 0, 0 },
        !           618:    {"no-fuzzy",         0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
        !           619:    {"no-y",             0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
        !           620: @@ -2346,7 +2348,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
        !           621:                inplace = 1;
        !           622:        }
        !           623:  
        !           624: -      if (delay_updates && !partial_dir)
        !           625: +      if ((delay_updates || detect_renamed) && !partial_dir)
        !           626:                partial_dir = tmp_partialdir;
        !           627:  
        !           628:        if (inplace) {
        !           629: @@ -2355,6 +2357,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
        !           630:                        snprintf(err_buf, sizeof err_buf,
        !           631:                                 "--%s cannot be used with --%s\n",
        !           632:                                 append_mode ? "append" : "inplace",
        !           633: +                               detect_renamed ? "detect-renamed" :
        !           634:                                 delay_updates ? "delay-updates" : "partial-dir");
        !           635:                        return 0;
        !           636:                }
        !           637: @@ -2760,6 +2763,8 @@ void server_options(char **args, int *argc_p)
        !           638:                        args[ac++] = "--super";
        !           639:                if (size_only)
        !           640:                        args[ac++] = "--size-only";
        !           641: +              if (detect_renamed)
        !           642: +                      args[ac++] = "--detect-renamed";
        !           643:                if (do_stats)
        !           644:                        args[ac++] = "--stats";
        !           645:        } else {
        !           646: diff --git a/receiver.c b/receiver.c
        !           647: --- a/receiver.c
        !           648: +++ b/receiver.c
        !           649: @@ -217,7 +217,7 @@ int open_tmpfile(char *fnametmp, const char *fname, struct file_struct *file)
        !           650:         * information should have been previously transferred, but that may
        !           651:         * not be the case with -R */
        !           652:        if (fd == -1 && relative_paths && errno == ENOENT
        !           653: -       && make_path(fnametmp, MKP_SKIP_SLASH | MKP_DROP_NAME) == 0) {
        !           654: +       && make_path(fnametmp, ACCESSPERMS, MKP_SKIP_SLASH | MKP_DROP_NAME) == 0) {
        !           655:                /* Get back to name with XXXXXX in it. */
        !           656:                get_tmpname(fnametmp, fname, False);
        !           657:                fd = do_mkstemp(fnametmp, (file->mode|added_perms) & INITACCESSPERMS);
        !           658: diff --git a/rsync.1.md b/rsync.1.md
        !           659: --- a/rsync.1.md
        !           660: +++ b/rsync.1.md
        !           661: @@ -421,6 +421,7 @@ detailed description below for a complete description.
        !           662:  --modify-window=NUM, -@  set the accuracy for mod-time comparisons
        !           663:  --temp-dir=DIR, -T       create temporary files in directory DIR
        !           664:  --fuzzy, -y              find similar file for basis if no dest file
        !           665: +--detect-renamed         try to find renamed files to speed the xfer
        !           666:  --compare-dest=DIR       also compare destination files relative to DIR
        !           667:  --copy-dest=DIR          ... and include copies of unchanged files
        !           668:  --link-dest=DIR          hardlink to files in DIR when unchanged
        !           669: @@ -2247,6 +2248,22 @@ your home directory (remove the '=' for that).
        !           670:      fuzzy-match files, so either use `--delete-after` or specify some filename
        !           671:      exclusions if you need to prevent this.
        !           672:  
        !           673: +0.  `--detect-renamed`
        !           674: +
        !           675: +    With this option, for each new source file (call it `src/S`), rsync looks
        !           676: +    for a file `dest/D` anywhere in the destination that passes the quick check
        !           677: +    with `src/S`.  If such a `dest/D` is found, rsync uses it as an alternate
        !           678: +    basis for transferring `S`.  The idea is that if `src/S` was renamed from
        !           679: +    `src/D` (as opposed to `src/S` passing the quick check with `dest/D` by
        !           680: +    coincidence), the delta-transfer algorithm will find that all the data
        !           681: +    matches between `src/S` and `dest/D`, and the transfer will be really fast.
        !           682: +
        !           683: +    By default, alternate-basis files are hard-linked into a directory named
        !           684: +    ".~tmp~" in each file's destination directory, but if you've specified the
        !           685: +    `--partial-dir` option, that directory will be used instead.  These
        !           686: +    otential alternate-basis files will be removed as the transfer progresses.
        !           687: +    This option conflicts with `--inplace` and `--append`.
        !           688: +
        !           689:  0.  `--compare-dest=DIR`
        !           690:  
        !           691:      This option instructs rsync to use _DIR_ on the destination machine as an
        !           692: diff --git a/rsync.h b/rsync.h
        !           693: --- a/rsync.h
        !           694: +++ b/rsync.h
        !           695: @@ -272,7 +272,7 @@ enum msgcode {
        !           696:  #define NDX_DEL_STATS -3
        !           697:  #define NDX_FLIST_OFFSET -101
        !           698:  
        !           699: -/* For calling delete_item() and delete_dir_contents(). */
        !           700: +/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
        !           701:  #define DEL_NO_UID_WRITE      (1<<0) /* file/dir has our uid w/o write perm */
        !           702:  #define DEL_RECURSE           (1<<1) /* if dir, delete all contents */
        !           703:  #define DEL_DIR_IS_EMPTY      (1<<2) /* internal delete_FUNCTIONS use only */
        !           704: @@ -282,6 +282,7 @@ enum msgcode {
        !           705:  #define DEL_FOR_DEVICE                (1<<6) /* making room for a replacement device */
        !           706:  #define DEL_FOR_SPECIAL       (1<<7) /* making room for a replacement special */
        !           707:  #define DEL_FOR_BACKUP                (1<<8) /* the delete is for a backup operation */
        !           708: +#define DEL_NO_DELETIONS      (1<<9) /* just check for renames w/o deleting */
        !           709:  
        !           710:  #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL)
        !           711:  
        !           712: diff --git a/util.c b/util.c
        !           713: --- a/util.c
        !           714: +++ b/util.c
        !           715: @@ -182,7 +182,7 @@ int set_times(const char *fname, STRUCT_STAT *stp)
        !           716:  /* Create any necessary directories in fname.  Any missing directories are
        !           717:   * created with default permissions.  Returns < 0 on error, or the number
        !           718:   * of directories created. */
        !           719: -int make_path(char *fname, int flags)
        !           720: +int make_path(char *fname, mode_t mode, int flags)
        !           721:  {
        !           722:        char *end, *p;
        !           723:        int ret = 0;
        !           724: @@ -213,7 +213,7 @@ int make_path(char *fname, int flags)
        !           725:                                else
        !           726:                                        errno = ENOTDIR;
        !           727:                        }
        !           728: -              } else if (do_mkdir(fname, ACCESSPERMS) == 0) {
        !           729: +              } else if (do_mkdir(fname, mode) == 0) {
        !           730:                        ret++;
        !           731:                        break;
        !           732:                }
        !           733: @@ -252,7 +252,7 @@ int make_path(char *fname, int flags)
        !           734:                p += strlen(p);
        !           735:                if (ret < 0) /* Skip mkdir on error, but keep restoring the path. */
        !           736:                        continue;
        !           737: -              if (do_mkdir(fname, ACCESSPERMS) < 0)
        !           738: +              if (do_mkdir(fname, mode) < 0)
        !           739:                        ret = -ret - 1;
        !           740:                else
        !           741:                        ret++;
        !           742: @@ -1162,6 +1162,32 @@ char *normalize_path(char *path, BOOL force_newbuf, unsigned int *len_ptr)
        !           743:        return path;
        !           744:  }
        !           745:  
        !           746: +/* We need to supply our own strcmp function for file list comparisons
        !           747: + * to ensure that signed/unsigned usage is consistent between machines. */
        !           748: +int u_strcmp(const char *p1, const char *p2)
        !           749: +{
        !           750: +        for ( ; *p1; p1++, p2++) {
        !           751: +              if (*p1 != *p2)
        !           752: +                      break;
        !           753: +      }
        !           754: +
        !           755: +      return (int)*(uchar*)p1 - (int)*(uchar*)p2;
        !           756: +}
        !           757: +
        !           758: +/* We need a memcmp function compares unsigned-byte values. */
        !           759: +int u_memcmp(const void *p1, const void *p2, size_t len)
        !           760: +{
        !           761: +      const uchar *u1 = p1;
        !           762: +      const uchar *u2 = p2;
        !           763: +
        !           764: +      while (len--) {
        !           765: +              if (*u1 != *u2)
        !           766: +                      return (int)*u1 - (int)*u2;
        !           767: +      }
        !           768: +
        !           769: +      return 0;
        !           770: +}
        !           771: +
        !           772:  /**
        !           773:   * Return a quoted string with the full pathname of the indicated filename.
        !           774:   * The string " (in MODNAME)" may also be appended.  The returned pointer
        !           775: @@ -1255,7 +1281,7 @@ int handle_partial_dir(const char *fname, int create)
        !           776:                        }
        !           777:                        statret = -1;
        !           778:                }
        !           779: -              if (statret < 0 && do_mkdir(dir, 0700) < 0) {
        !           780: +              if (statret < 0 && make_path(dir, 0700, 0) < 0) {
        !           781:                        *fn = '/';
        !           782:                        return 0;
        !           783:                }
        !           784: diff -Nurp a/rsync.1 b/rsync.1
        !           785: --- a/rsync.1
        !           786: +++ b/rsync.1
        !           787: @@ -497,6 +497,7 @@ detailed description below for a complet
        !           788:  --modify-window=NUM, -@  set the accuracy for mod-time comparisons
        !           789:  --temp-dir=DIR, -T       create temporary files in directory DIR
        !           790:  --fuzzy, -y              find similar file for basis if no dest file
        !           791: +--detect-renamed         try to find renamed files to speed the xfer
        !           792:  --compare-dest=DIR       also compare destination files relative to DIR
        !           793:  --copy-dest=DIR          ... and include copies of unchanged files
        !           794:  --link-dest=DIR          hardlink to files in DIR when unchanged
        !           795: @@ -2290,6 +2291,20 @@ alternate destination directories that a
        !           796:  Note that the use of the \fB\-\-delete\fP option might get rid of any potential
        !           797:  fuzzy-match files, so either use \fB\-\-delete-after\fP or specify some filename
        !           798:  exclusions if you need to prevent this.
        !           799: +.IP "\fB\-\-detect-renamed\fP"
        !           800: +With this option, for each new source file (call it \fBsrc/S\fP), rsync looks
        !           801: +for a file \fBdest/D\fP anywhere in the destination that passes the quick check
        !           802: +with \fBsrc/S\fP.  If such a \fBdest/D\fP is found, rsync uses it as an alternate
        !           803: +basis for transferring \fBS\fP.  The idea is that if \fBsrc/S\fP was renamed from
        !           804: +\fBsrc/D\fP (as opposed to \fBsrc/S\fP passing the quick check with \fBdest/D\fP by
        !           805: +coincidence), the delta-transfer algorithm will find that all the data
        !           806: +matches between \fBsrc/S\fP and \fBdest/D\fP, and the transfer will be really fast.
        !           807: +.IP
        !           808: +By default, alternate-basis files are hard-linked into a directory named
        !           809: +".~tmp~" in each file's destination directory, but if you've specified the
        !           810: +\fB\-\-partial-dir\fP option, that directory will be used instead.  These
        !           811: +otential alternate-basis files will be removed as the transfer progresses.
        !           812: +This option conflicts with \fB\-\-inplace\fP and \fB\-\-append\fP.
        !           813:  .IP "\fB\-\-compare-dest=DIR\fP"
        !           814:  This option instructs rsync to use \fIDIR\fP on the destination machine as an
        !           815:  additional hierarchy to compare destination files against doing transfers
        !           816: diff -Nurp a/rsync.1.html b/rsync.1.html
        !           817: --- a/rsync.1.html
        !           818: +++ b/rsync.1.html
        !           819: @@ -412,6 +412,7 @@ detailed description below for a complet
        !           820:  --modify-window=NUM, -@  set the accuracy for mod-time comparisons
        !           821:  --temp-dir=DIR, -T       create temporary files in directory DIR
        !           822:  --fuzzy, -y              find similar file for basis if no dest file
        !           823: +--detect-renamed         try to find renamed files to speed the xfer
        !           824:  --compare-dest=DIR       also compare destination files relative to DIR
        !           825:  --copy-dest=DIR          ... and include copies of unchanged files
        !           826:  --link-dest=DIR          hardlink to files in DIR when unchanged
        !           827: @@ -2135,6 +2136,21 @@ fuzzy-match files, so either use <code>-
        !           828:  exclusions if you need to prevent this.</p>
        !           829:  </dd>
        !           830:  
        !           831: +<dt><code>--detect-renamed</code></dt><dd>
        !           832: +<p>With this option, for each new source file (call it <code>src/S</code>), rsync looks
        !           833: +for a file <code>dest/D</code> anywhere in the destination that passes the quick check
        !           834: +with <code>src/S</code>.  If such a <code>dest/D</code> is found, rsync uses it as an alternate
        !           835: +basis for transferring <code>S</code>.  The idea is that if <code>src/S</code> was renamed from
        !           836: +<code>src/D</code> (as opposed to <code>src/S</code> passing the quick check with <code>dest/D</code> by
        !           837: +coincidence), the delta-transfer algorithm will find that all the data
        !           838: +matches between <code>src/S</code> and <code>dest/D</code>, and the transfer will be really fast.</p>
        !           839: +<p>By default, alternate-basis files are hard-linked into a directory named
        !           840: +&quot;.~tmp~&quot; in each file's destination directory, but if you've specified the
        !           841: +<code>--partial-dir</code> option, that directory will be used instead.  These
        !           842: +otential alternate-basis files will be removed as the transfer progresses.
        !           843: +This option conflicts with <code>--inplace</code> and <code>--append</code>.</p>
        !           844: +</dd>
        !           845: +
        !           846:  <dt><code>--compare-dest=DIR</code></dt><dd>
        !           847:  <p>This option instructs rsync to use <u>DIR</u> on the destination machine as an
        !           848:  additional hierarchy to compare destination files against doing transfers

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>