Annotation of embedaddon/rsync/patches/detect-renamed.diff, revision 1.1
1.1 ! misho 1: This patch adds the --detect-renamed option which makes rsync notice files
! 2: that either (1) match in size & modify-time (plus the basename, if possible)
! 3: or (2) match in size & checksum (when --checksum was also specified) and use
! 4: each match as an alternate basis file to speed up the transfer.
! 5:
! 6: The algorithm attempts to scan the receiving-side's files in an efficient
! 7: manner. If --delete[-before] is enabled, we'll take advantage of the
! 8: pre-transfer delete pass to prepare any alternate-basis-file matches we
! 9: might find. If --delete-before is not enabled, rsync does the rename scan
! 10: during the regular file-sending scan (scanning each directory right before
! 11: the generator starts updating files from that dir). In this latter mode,
! 12: rsync might delay the updating of a file (if no alternate-basis match was
! 13: yet found) until the full scan of the receiving side is complete, at which
! 14: point any delayed files are processed.
! 15:
! 16: I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
! 17: takes advantage of rsync's pre-existing partial-dir logic. This uses less
! 18: memory than trying to keep track of the matches internally, and also allows
! 19: any deletions or file-updates to occur normally without interfering with
! 20: these alternate-basis discoveries.
! 21:
! 22: To use this patch, run these commands for a successful build:
! 23:
! 24: patch -p1 <patches/detect-renamed.diff
! 25: ./configure (optional if already run)
! 26: make
! 27:
! 28: TODO:
! 29:
! 30: The routine that makes missing directories for files that get renamed
! 31: down into a new sub-hierarchy doesn't properly handle the case where some
! 32: path elements might exist but not be a dir yet. We need to either change
! 33: our stash-ahead algorithm (to not require unknown path elements) or we
! 34: need to create a better path-making routine.
! 35:
! 36: We need to never return a match from fattr_find() that has a basis
! 37: file. This will ensure that we don't try to give a renamed file to
! 38: a file that can't use it, while missing out on giving it to a file
! 39: that could use it.
! 40:
! 41: based-on: e94bad1c156fc3910f24e2b3b71a81b0b0bdeb70
! 42: diff --git a/backup.c b/backup.c
! 43: --- a/backup.c
! 44: +++ b/backup.c
! 45: @@ -162,7 +162,7 @@ char *get_backup_name(const char *fname)
! 46: int ret;
! 47: if (backup_dir_len > 1)
! 48: backup_dir_buf[backup_dir_len-1] = '\0';
! 49: - ret = make_path(backup_dir_buf, 0);
! 50: + ret = make_path(backup_dir_buf, ACCESSPERMS, 0);
! 51: if (backup_dir_len > 1)
! 52: backup_dir_buf[backup_dir_len-1] = '/';
! 53: if (ret < 0)
! 54: diff --git a/compat.c b/compat.c
! 55: --- a/compat.c
! 56: +++ b/compat.c
! 57: @@ -39,6 +39,7 @@ extern int checksum_seed;
! 58: extern int basis_dir_cnt;
! 59: extern int prune_empty_dirs;
! 60: extern int protocol_version;
! 61: +extern int detect_renamed;
! 62: extern int protect_args;
! 63: extern int preserve_uid;
! 64: extern int preserve_gid;
! 65: @@ -159,6 +160,7 @@ void set_allow_inc_recurse(void)
! 66: allow_inc_recurse = 0;
! 67: else if (!am_sender
! 68: && (delete_before || delete_after
! 69: + || detect_renamed
! 70: || delay_updates || prune_empty_dirs))
! 71: allow_inc_recurse = 0;
! 72: else if (am_server && !local_server
! 73: diff --git a/delete.c b/delete.c
! 74: --- a/delete.c
! 75: +++ b/delete.c
! 76: @@ -25,6 +25,7 @@
! 77: extern int am_root;
! 78: extern int make_backups;
! 79: extern int max_delete;
! 80: +extern int detect_renamed;
! 81: extern char *backup_dir;
! 82: extern char *backup_suffix;
! 83: extern int backup_suffix_len;
! 84: @@ -44,6 +45,8 @@ static inline int is_backup_file(char *fn)
! 85: * its contents, otherwise just checks for content. Returns DR_SUCCESS or
! 86: * DR_NOT_EMPTY. Note that fname must point to a MAXPATHLEN buffer! (The
! 87: * buffer is used for recursion, but returned unchanged.)
! 88: + *
! 89: + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
! 90: */
! 91: static enum delret delete_dir_contents(char *fname, uint16 flags)
! 92: {
! 93: @@ -63,7 +66,9 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
! 94: save_filters = push_local_filters(fname, dlen);
! 95:
! 96: non_perishable_cnt = 0;
! 97: + file_extra_cnt += SUM_EXTRA_CNT;
! 98: dirlist = get_dirlist(fname, dlen, 0);
! 99: + file_extra_cnt -= SUM_EXTRA_CNT;
! 100: ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
! 101:
! 102: if (!dirlist->used)
! 103: @@ -103,7 +108,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
! 104: if (S_ISDIR(fp->mode)) {
! 105: if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
! 106: ret = DR_NOT_EMPTY;
! 107: - }
! 108: + } else if (detect_renamed && S_ISREG(fp->mode))
! 109: + look_for_rename(fp, fname);
! 110: if (delete_item(fname, fp->mode, flags) != DR_SUCCESS)
! 111: ret = DR_NOT_EMPTY;
! 112: }
! 113: @@ -126,6 +132,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
! 114: *
! 115: * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
! 116: * a directory! (The buffer is used for recursion, but returned unchanged.)
! 117: + *
! 118: + * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
! 119: */
! 120: enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
! 121: {
! 122: @@ -153,6 +161,9 @@ enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
! 123: /* OK: try to delete the directory. */
! 124: }
! 125:
! 126: + if (flags & DEL_NO_DELETIONS)
! 127: + return DR_SUCCESS;
! 128: +
! 129: if (!(flags & DEL_MAKE_ROOM) && max_delete >= 0 && stats.deleted_files >= max_delete) {
! 130: skipped_deletes++;
! 131: return DR_AT_LIMIT;
! 132: diff --git a/flist.c b/flist.c
! 133: --- a/flist.c
! 134: +++ b/flist.c
! 135: @@ -64,6 +64,7 @@ extern int non_perishable_cnt;
! 136: extern int prune_empty_dirs;
! 137: extern int copy_links;
! 138: extern int copy_unsafe_links;
! 139: +extern int detect_renamed;
! 140: extern int protocol_version;
! 141: extern int sanitize_paths;
! 142: extern int munge_symlinks;
! 143: @@ -130,6 +131,8 @@ static int64 tmp_dev = -1, tmp_ino;
! 144: #endif
! 145: static char tmp_sum[MAX_DIGEST_LEN];
! 146:
! 147: +struct file_list the_fattr_list;
! 148: +
! 149: static char empty_sum[MAX_DIGEST_LEN];
! 150: static int flist_count_offset; /* for --delete --progress */
! 151: static int show_filelist_progress;
! 152: @@ -277,6 +280,45 @@ static inline int is_excluded(const char *fname, int is_dir, int filter_level)
! 153: return name_is_excluded(fname, is_dir ? NAME_IS_DIR : NAME_IS_FILE, filter_level);
! 154: }
! 155:
! 156: +static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
! 157: +{
! 158: + struct file_struct *f1 = *file1;
! 159: + struct file_struct *f2 = *file2;
! 160: + int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
! 161: + int diff;
! 162: +
! 163: + if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
! 164: + if (!f2->basename || !S_ISREG(f2->mode) || !len2)
! 165: + return 0;
! 166: + return 1;
! 167: + }
! 168: + if (!f2->basename || !S_ISREG(f2->mode) || !len2)
! 169: + return -1;
! 170: +
! 171: + /* Don't use diff for values that are longer than an int. */
! 172: + if (len1 != len2)
! 173: + return len1 < len2 ? -1 : 1;
! 174: +
! 175: + if (always_checksum) {
! 176: + diff = u_memcmp(F_SUM(f1), F_SUM(f2), flist_csum_len);
! 177: + if (diff)
! 178: + return diff;
! 179: + } else if (f1->modtime != f2->modtime)
! 180: + return f1->modtime < f2->modtime ? -1 : 1;
! 181: +
! 182: + diff = u_strcmp(f1->basename, f2->basename);
! 183: + if (diff)
! 184: + return diff;
! 185: +
! 186: + if (f1->dirname == f2->dirname)
! 187: + return 0;
! 188: + if (!f1->dirname)
! 189: + return -1;
! 190: + if (!f2->dirname)
! 191: + return 1;
! 192: + return u_strcmp(f1->dirname, f2->dirname);
! 193: +}
! 194: +
! 195: static void send_directory(int f, struct file_list *flist,
! 196: char *fbuf, int len, int flags);
! 197:
! 198: @@ -2675,6 +2717,23 @@ struct file_list *recv_file_list(int f, int dir_ndx)
! 199: * for a non-relative transfer in recv_file_entry(). */
! 200: flist_sort_and_clean(flist, relative_paths);
! 201:
! 202: + if (detect_renamed) {
! 203: + int j = flist->used;
! 204: + the_fattr_list.used = j;
! 205: + the_fattr_list.files = new_array(struct file_struct *, j);
! 206: + memcpy(the_fattr_list.files, flist->files,
! 207: + j * sizeof (struct file_struct *));
! 208: + qsort(the_fattr_list.files, j,
! 209: + sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
! 210: + the_fattr_list.low = 0;
! 211: + while (j-- > 0) {
! 212: + struct file_struct *fp = the_fattr_list.files[j];
! 213: + if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
! 214: + break;
! 215: + }
! 216: + the_fattr_list.high = j;
! 217: + }
! 218: +
! 219: if (protocol_version < 30) {
! 220: /* Recv the io_error flag */
! 221: int err = read_int(f);
! 222: diff --git a/generator.c b/generator.c
! 223: --- a/generator.c
! 224: +++ b/generator.c
! 225: @@ -79,6 +79,7 @@ extern int always_checksum;
! 226: extern int flist_csum_len;
! 227: extern char *partial_dir;
! 228: extern int alt_dest_type;
! 229: +extern int detect_renamed;
! 230: extern int whole_file;
! 231: extern int list_only;
! 232: extern int read_batch;
! 233: @@ -97,11 +98,13 @@ extern char *tmpdir;
! 234: extern char *basis_dir[MAX_BASIS_DIRS+1];
! 235: extern struct file_list *cur_flist, *first_flist, *dir_flist;
! 236: extern filter_rule_list filter_list, daemon_filter_list;
! 237: +extern struct file_list the_fattr_list;
! 238:
! 239: int maybe_ATTRS_REPORT = 0;
! 240: int maybe_ATTRS_ACCURATE_TIME = 0;
! 241:
! 242: static dev_t dev_zero;
! 243: +static int unexplored_dirs = 1;
! 244: static int deldelay_size = 0, deldelay_cnt = 0;
! 245: static char *deldelay_buf = NULL;
! 246: static int deldelay_fd = -1;
! 247: @@ -269,14 +272,19 @@ static void do_delayed_deletions(char *delbuf)
! 248: * all the --delete-WHEN options. Note that the fbuf pointer must point to a
! 249: * MAXPATHLEN buffer with the name of the directory in it (the functions we
! 250: * call will append names onto the end, but the old dir value will be restored
! 251: - * on exit). */
! 252: -static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
! 253: + * on exit).
! 254: + *
! 255: + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
! 256: + */
! 257: +static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
! 258: + int del_flags)
! 259: {
! 260: static int already_warned = 0;
! 261: static struct hashtable *dev_tbl;
! 262: struct file_list *dirlist;
! 263: - char delbuf[MAXPATHLEN];
! 264: - int dlen, i;
! 265: + char *p, delbuf[MAXPATHLEN];
! 266: + unsigned remainder;
! 267: + int dlen, i, restore_dot = 0;
! 268:
! 269: if (!fbuf) {
! 270: change_local_filter_dir(NULL, 0, 0);
! 271: @@ -290,17 +298,22 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
! 272: maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
! 273:
! 274: if (io_error & IOERR_GENERAL && !ignore_errors) {
! 275: - if (already_warned)
! 276: + if (!already_warned) {
! 277: + rprintf(FINFO,
! 278: + "IO error encountered -- skipping file deletion\n");
! 279: + already_warned = 1;
! 280: + }
! 281: + if (!detect_renamed)
! 282: return;
! 283: - rprintf(FINFO,
! 284: - "IO error encountered -- skipping file deletion\n");
! 285: - already_warned = 1;
! 286: - return;
! 287: + del_flags |= DEL_NO_DELETIONS;
! 288: }
! 289:
! 290: dlen = strlen(fbuf);
! 291: change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
! 292:
! 293: + if (detect_renamed)
! 294: + unexplored_dirs--;
! 295: +
! 296: if (one_file_system) {
! 297: if (!dev_tbl)
! 298: dev_tbl = hashtable_create(16, HT_KEY64);
! 299: @@ -316,6 +329,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
! 300:
! 301: dirlist = get_dirlist(fbuf, dlen, 0);
! 302:
! 303: + p = fbuf + dlen;
! 304: + if (dlen == 1 && *fbuf == '.') {
! 305: + restore_dot = 1;
! 306: + p = fbuf;
! 307: + } else if (dlen != 1 || *fbuf != '/')
! 308: + *p++ = '/';
! 309: + remainder = MAXPATHLEN - (p - fbuf);
! 310: +
! 311: /* If an item in dirlist is not found in flist, delete it
! 312: * from the filesystem. */
! 313: for (i = dirlist->used; i--; ) {
! 314: @@ -328,6 +349,10 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
! 315: f_name(fp, NULL));
! 316: continue;
! 317: }
! 318: + if (detect_renamed && S_ISREG(fp->mode)) {
! 319: + strlcpy(p, fp->basename, remainder);
! 320: + look_for_rename(fp, fbuf);
! 321: + }
! 322: /* Here we want to match regardless of file type. Replacement
! 323: * of a file with one of another type is handled separately by
! 324: * a delete_item call with a DEL_MAKE_ROOM flag. */
! 325: @@ -336,14 +361,19 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
! 326: if (!(fp->mode & S_IWUSR) && !am_root && fp->flags & FLAG_OWNED_BY_US)
! 327: flags |= DEL_NO_UID_WRITE;
! 328: f_name(fp, delbuf);
! 329: - if (delete_during == 2) {
! 330: - if (!remember_delete(fp, delbuf, flags))
! 331: + if (delete_during == 2 && !(del_flags & DEL_NO_DELETIONS)) {
! 332: + if (!remember_delete(fp, delbuf, del_flags | flags))
! 333: break;
! 334: } else
! 335: - delete_item(delbuf, fp->mode, flags);
! 336: - }
! 337: + delete_item(delbuf, fp->mode, del_flags | flags);
! 338: + } else if (detect_renamed && S_ISDIR(fp->mode))
! 339: + unexplored_dirs++;
! 340: }
! 341:
! 342: + if (restore_dot)
! 343: + fbuf[0] = '.';
! 344: + fbuf[dlen] = '\0';
! 345: +
! 346: flist_free(dirlist);
! 347: }
! 348:
! 349: @@ -379,14 +409,125 @@ static void do_delete_pass(void)
! 350: || !S_ISDIR(st.st_mode))
! 351: continue;
! 352:
! 353: - delete_in_dir(fbuf, file, &st.st_dev);
! 354: + delete_in_dir(fbuf, file, &st.st_dev, 0);
! 355: }
! 356: - delete_in_dir(NULL, NULL, &dev_zero);
! 357: + delete_in_dir(NULL, NULL, &dev_zero, 0);
! 358:
! 359: if (INFO_GTE(FLIST, 2) && !am_server)
! 360: rprintf(FINFO, " \r");
! 361: }
! 362:
! 363: +/* Search for a regular file that matches either (1) the size & modified
! 364: + * time (plus the basename, if possible) or (2) the size & checksum. If
! 365: + * we find an exact match down to the dirname, return -1 because we found
! 366: + * an up-to-date file in the transfer, not a renamed file. */
! 367: +static int fattr_find(struct file_struct *f, char *fname)
! 368: +{
! 369: + int low = the_fattr_list.low, high = the_fattr_list.high;
! 370: + int mid, ok_match = -1, good_match = -1;
! 371: + struct file_struct *fmid;
! 372: + int diff;
! 373: +
! 374: + while (low <= high) {
! 375: + mid = (low + high) / 2;
! 376: + fmid = the_fattr_list.files[mid];
! 377: + if (F_LENGTH(fmid) != F_LENGTH(f)) {
! 378: + if (F_LENGTH(fmid) < F_LENGTH(f))
! 379: + low = mid + 1;
! 380: + else
! 381: + high = mid - 1;
! 382: + continue;
! 383: + }
! 384: + if (always_checksum) {
! 385: + /* We use the FLAG_FILE_SENT flag to indicate when we
! 386: + * have computed the checksum for an entry. */
! 387: + if (!(f->flags & FLAG_FILE_SENT)) {
! 388: + STRUCT_STAT st;
! 389: + if (fmid->modtime == f->modtime
! 390: + && f_name_cmp(fmid, f) == 0)
! 391: + return -1; /* assume we can't help */
! 392: + st.st_size = F_LENGTH(f);
! 393: + st.st_mtime = f->modtime;
! 394: + file_checksum(fname, &st, F_SUM(f));
! 395: + f->flags |= FLAG_FILE_SENT;
! 396: + }
! 397: + diff = u_memcmp(F_SUM(fmid), F_SUM(f), flist_csum_len);
! 398: + if (diff) {
! 399: + if (diff < 0)
! 400: + low = mid + 1;
! 401: + else
! 402: + high = mid - 1;
! 403: + continue;
! 404: + }
! 405: + } else {
! 406: + if (fmid->modtime != f->modtime) {
! 407: + if (fmid->modtime < f->modtime)
! 408: + low = mid + 1;
! 409: + else
! 410: + high = mid - 1;
! 411: + continue;
! 412: + }
! 413: + }
! 414: + ok_match = mid;
! 415: + diff = u_strcmp(fmid->basename, f->basename);
! 416: + if (diff == 0) {
! 417: + good_match = mid;
! 418: + if (fmid->dirname == f->dirname)
! 419: + return -1; /* file is up-to-date */
! 420: + if (!fmid->dirname) {
! 421: + low = mid + 1;
! 422: + continue;
! 423: + }
! 424: + if (!f->dirname) {
! 425: + high = mid - 1;
! 426: + continue;
! 427: + }
! 428: + diff = u_strcmp(fmid->dirname, f->dirname);
! 429: + if (diff == 0)
! 430: + return -1; /* file is up-to-date */
! 431: + }
! 432: + if (diff < 0)
! 433: + low = mid + 1;
! 434: + else
! 435: + high = mid - 1;
! 436: + }
! 437: +
! 438: + return good_match >= 0 ? good_match : ok_match;
! 439: +}
! 440: +
! 441: +void look_for_rename(struct file_struct *file, char *fname)
! 442: +{
! 443: + struct file_struct *fp;
! 444: + char *partialptr, *fn;
! 445: + STRUCT_STAT st;
! 446: + int ndx;
! 447: +
! 448: + if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
! 449: + return;
! 450: +
! 451: + fp = the_fattr_list.files[ndx];
! 452: + fn = f_name(fp, NULL);
! 453: + /* We don't provide an alternate-basis file if there is a basis file. */
! 454: + if (link_stat(fn, &st, 0) == 0)
! 455: + return;
! 456: +
! 457: + if (!dry_run) {
! 458: + if ((partialptr = partial_dir_fname(fn)) == NULL
! 459: + || !handle_partial_dir(partialptr, PDIR_CREATE))
! 460: + return;
! 461: + /* We only use the file if we can hard-link it into our tmp dir. */
! 462: + if (link(fname, partialptr) != 0) {
! 463: + if (errno != EEXIST)
! 464: + handle_partial_dir(partialptr, PDIR_DELETE);
! 465: + return;
! 466: + }
! 467: + }
! 468: +
! 469: + /* I think this falls into the -vv category with "%s is uptodate", etc. */
! 470: + if (INFO_GTE(MISC, 2))
! 471: + rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
! 472: +}
! 473: +
! 474: static inline int mtime_differs(STRUCT_STAT *stp, struct file_struct *file)
! 475: {
! 476: #ifdef ST_MTIME_NSEC
! 477: @@ -1187,6 +1328,7 @@ static void list_file_entry(struct file_struct *f)
! 478: }
! 479: }
! 480:
! 481: +static struct bitbag *delayed_bits = NULL;
! 482: static int phase = 0;
! 483: static int dflt_perms;
! 484:
! 485: @@ -1323,7 +1465,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
! 486: && do_stat(dn, &sx.st) < 0) {
! 487: if (dry_run)
! 488: goto parent_is_dry_missing;
! 489: - if (make_path(fname, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0) {
! 490: + if (make_path(fname, ACCESSPERMS, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0) {
! 491: rsyserr(FERROR_XFER, errno,
! 492: "recv_generator: mkdir %s failed",
! 493: full_fname(dn));
! 494: @@ -1459,7 +1601,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
! 495: }
! 496: if (real_ret != 0 && do_mkdir(fname,file->mode|added_perms) < 0 && errno != EEXIST) {
! 497: if (!relative_paths || errno != ENOENT
! 498: - || make_path(fname, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0
! 499: + || make_path(fname, ACCESSPERMS, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0
! 500: || (do_mkdir(fname, file->mode|added_perms) < 0 && errno != EEXIST)) {
! 501: rsyserr(FERROR_XFER, errno,
! 502: "recv_generator: mkdir %s failed",
! 503: @@ -1507,9 +1649,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
! 504: }
! 505: else if (delete_during && f_out != -1 && !phase
! 506: && !(file->flags & FLAG_MISSING_DIR)) {
! 507: - if (file->flags & FLAG_CONTENT_DIR)
! 508: - delete_in_dir(fname, file, &real_sx.st.st_dev);
! 509: - else
! 510: + if (file->flags & FLAG_CONTENT_DIR) {
! 511: + if (detect_renamed && real_ret != 0)
! 512: + unexplored_dirs++;
! 513: + delete_in_dir(fname, file, &real_sx.st.st_dev,
! 514: + delete_during < 0 ? DEL_NO_DELETIONS : 0);
! 515: + } else
! 516: change_local_filter_dir(fname, strlen(fname), F_DEPTH(file));
! 517: }
! 518: prior_dir_file = file;
! 519: @@ -1786,8 +1931,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
! 520: goto cleanup;
! 521: }
! 522: #endif
! 523: - if (stat_errno == ENOENT)
! 524: + if (stat_errno == ENOENT) {
! 525: + if (detect_renamed && unexplored_dirs > 0
! 526: + && F_LENGTH(file)) {
! 527: + bitbag_set_bit(delayed_bits, ndx);
! 528: + return;
! 529: + }
! 530: goto notify_others;
! 531: + }
! 532: rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s",
! 533: full_fname(fname));
! 534: goto cleanup;
! 535: @@ -2251,6 +2402,12 @@ void generate_files(int f_out, const char *local_name)
! 536: if (DEBUG_GTE(GENR, 1))
! 537: rprintf(FINFO, "generator starting pid=%d\n", (int)getpid());
! 538:
! 539: + if (detect_renamed) {
! 540: + delayed_bits = bitbag_create(cur_flist->used);
! 541: + if (!delete_before && !delete_during)
! 542: + delete_during = -1;
! 543: + }
! 544: +
! 545: if (delete_before && !solo_file && cur_flist->used > 0)
! 546: do_delete_pass();
! 547: if (delete_during == 2) {
! 548: @@ -2259,7 +2416,7 @@ void generate_files(int f_out, const char *local_name)
! 549: }
! 550: info_levels[INFO_FLIST] = info_levels[INFO_PROGRESS] = 0;
! 551:
! 552: - if (append_mode > 0 || whole_file < 0)
! 553: + if (append_mode > 0 || detect_renamed || whole_file < 0)
! 554: whole_file = 0;
! 555: if (DEBUG_GTE(FLIST, 1)) {
! 556: rprintf(FINFO, "delta-transmission %s\n",
! 557: @@ -2295,7 +2452,7 @@ void generate_files(int f_out, const char *local_name)
! 558: dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
! 559: } else
! 560: dirdev = MAKEDEV(0, 0);
! 561: - delete_in_dir(fbuf, fp, &dirdev);
! 562: + delete_in_dir(fbuf, fp, &dirdev, 0);
! 563: } else
! 564: change_local_filter_dir(fbuf, strlen(fbuf), F_DEPTH(fp));
! 565: }
! 566: @@ -2342,7 +2499,21 @@ void generate_files(int f_out, const char *local_name)
! 567: } while ((cur_flist = cur_flist->next) != NULL);
! 568:
! 569: if (delete_during)
! 570: - delete_in_dir(NULL, NULL, &dev_zero);
! 571: + delete_in_dir(NULL, NULL, &dev_zero, 0);
! 572: + if (detect_renamed) {
! 573: + if (delete_during < 0)
! 574: + delete_during = 0;
! 575: + detect_renamed = 0;
! 576: +
! 577: + for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
! 578: + struct file_struct *file = cur_flist->files[i];
! 579: + if (local_name)
! 580: + strlcpy(fbuf, local_name, sizeof fbuf);
! 581: + else
! 582: + f_name(file, fbuf);
! 583: + recv_generator(fbuf, file, i, itemizing, code, f_out);
! 584: + }
! 585: + }
! 586: phase++;
! 587: if (DEBUG_GTE(GENR, 1))
! 588: rprintf(FINFO, "generate_files phase=%d\n", phase);
! 589: diff --git a/main.c b/main.c
! 590: --- a/main.c
! 591: +++ b/main.c
! 592: @@ -721,7 +721,7 @@ static char *get_local_name(struct file_list *flist, char *dest_path)
! 593: trailing_slash = cp && !cp[1];
! 594:
! 595: if (mkpath_dest_arg && statret < 0 && (cp || file_total > 1)) {
! 596: - int ret = make_path(dest_path, file_total > 1 && !trailing_slash ? 0 : MKP_DROP_NAME);
! 597: + int ret = make_path(dest_path, ACCESSPERMS, file_total > 1 && !trailing_slash ? 0 : MKP_DROP_NAME);
! 598: if (ret < 0)
! 599: goto mkdir_error;
! 600: if (INFO_GTE(NAME, 1)) {
! 601: diff --git a/options.c b/options.c
! 602: --- a/options.c
! 603: +++ b/options.c
! 604: @@ -84,6 +84,7 @@ int am_server = 0;
! 605: int am_sender = 0;
! 606: int am_starting_up = 1;
! 607: int relative_paths = -1;
! 608: +int detect_renamed = 0;
! 609: int implied_dirs = 1;
! 610: int missing_args = 0; /* 0 = FERROR_XFER, 1 = ignore, 2 = delete */
! 611: int numeric_ids = 0;
! 612: @@ -733,6 +734,7 @@ static struct poptOption long_options[] = {
! 613: {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
! 614: {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
! 615: {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
! 616: + {"detect-renamed", 0, POPT_ARG_NONE, &detect_renamed, 0, 0, 0 },
! 617: {"fuzzy", 'y', POPT_ARG_NONE, 0, 'y', 0, 0 },
! 618: {"no-fuzzy", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
! 619: {"no-y", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
! 620: @@ -2346,7 +2348,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
! 621: inplace = 1;
! 622: }
! 623:
! 624: - if (delay_updates && !partial_dir)
! 625: + if ((delay_updates || detect_renamed) && !partial_dir)
! 626: partial_dir = tmp_partialdir;
! 627:
! 628: if (inplace) {
! 629: @@ -2355,6 +2357,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
! 630: snprintf(err_buf, sizeof err_buf,
! 631: "--%s cannot be used with --%s\n",
! 632: append_mode ? "append" : "inplace",
! 633: + detect_renamed ? "detect-renamed" :
! 634: delay_updates ? "delay-updates" : "partial-dir");
! 635: return 0;
! 636: }
! 637: @@ -2760,6 +2763,8 @@ void server_options(char **args, int *argc_p)
! 638: args[ac++] = "--super";
! 639: if (size_only)
! 640: args[ac++] = "--size-only";
! 641: + if (detect_renamed)
! 642: + args[ac++] = "--detect-renamed";
! 643: if (do_stats)
! 644: args[ac++] = "--stats";
! 645: } else {
! 646: diff --git a/receiver.c b/receiver.c
! 647: --- a/receiver.c
! 648: +++ b/receiver.c
! 649: @@ -217,7 +217,7 @@ int open_tmpfile(char *fnametmp, const char *fname, struct file_struct *file)
! 650: * information should have been previously transferred, but that may
! 651: * not be the case with -R */
! 652: if (fd == -1 && relative_paths && errno == ENOENT
! 653: - && make_path(fnametmp, MKP_SKIP_SLASH | MKP_DROP_NAME) == 0) {
! 654: + && make_path(fnametmp, ACCESSPERMS, MKP_SKIP_SLASH | MKP_DROP_NAME) == 0) {
! 655: /* Get back to name with XXXXXX in it. */
! 656: get_tmpname(fnametmp, fname, False);
! 657: fd = do_mkstemp(fnametmp, (file->mode|added_perms) & INITACCESSPERMS);
! 658: diff --git a/rsync.1.md b/rsync.1.md
! 659: --- a/rsync.1.md
! 660: +++ b/rsync.1.md
! 661: @@ -421,6 +421,7 @@ detailed description below for a complete description.
! 662: --modify-window=NUM, -@ set the accuracy for mod-time comparisons
! 663: --temp-dir=DIR, -T create temporary files in directory DIR
! 664: --fuzzy, -y find similar file for basis if no dest file
! 665: +--detect-renamed try to find renamed files to speed the xfer
! 666: --compare-dest=DIR also compare destination files relative to DIR
! 667: --copy-dest=DIR ... and include copies of unchanged files
! 668: --link-dest=DIR hardlink to files in DIR when unchanged
! 669: @@ -2247,6 +2248,22 @@ your home directory (remove the '=' for that).
! 670: fuzzy-match files, so either use `--delete-after` or specify some filename
! 671: exclusions if you need to prevent this.
! 672:
! 673: +0. `--detect-renamed`
! 674: +
! 675: + With this option, for each new source file (call it `src/S`), rsync looks
! 676: + for a file `dest/D` anywhere in the destination that passes the quick check
! 677: + with `src/S`. If such a `dest/D` is found, rsync uses it as an alternate
! 678: + basis for transferring `S`. The idea is that if `src/S` was renamed from
! 679: + `src/D` (as opposed to `src/S` passing the quick check with `dest/D` by
! 680: + coincidence), the delta-transfer algorithm will find that all the data
! 681: + matches between `src/S` and `dest/D`, and the transfer will be really fast.
! 682: +
! 683: + By default, alternate-basis files are hard-linked into a directory named
! 684: + ".~tmp~" in each file's destination directory, but if you've specified the
! 685: + `--partial-dir` option, that directory will be used instead. These
! 686: + otential alternate-basis files will be removed as the transfer progresses.
! 687: + This option conflicts with `--inplace` and `--append`.
! 688: +
! 689: 0. `--compare-dest=DIR`
! 690:
! 691: This option instructs rsync to use _DIR_ on the destination machine as an
! 692: diff --git a/rsync.h b/rsync.h
! 693: --- a/rsync.h
! 694: +++ b/rsync.h
! 695: @@ -272,7 +272,7 @@ enum msgcode {
! 696: #define NDX_DEL_STATS -3
! 697: #define NDX_FLIST_OFFSET -101
! 698:
! 699: -/* For calling delete_item() and delete_dir_contents(). */
! 700: +/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
! 701: #define DEL_NO_UID_WRITE (1<<0) /* file/dir has our uid w/o write perm */
! 702: #define DEL_RECURSE (1<<1) /* if dir, delete all contents */
! 703: #define DEL_DIR_IS_EMPTY (1<<2) /* internal delete_FUNCTIONS use only */
! 704: @@ -282,6 +282,7 @@ enum msgcode {
! 705: #define DEL_FOR_DEVICE (1<<6) /* making room for a replacement device */
! 706: #define DEL_FOR_SPECIAL (1<<7) /* making room for a replacement special */
! 707: #define DEL_FOR_BACKUP (1<<8) /* the delete is for a backup operation */
! 708: +#define DEL_NO_DELETIONS (1<<9) /* just check for renames w/o deleting */
! 709:
! 710: #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL)
! 711:
! 712: diff --git a/util.c b/util.c
! 713: --- a/util.c
! 714: +++ b/util.c
! 715: @@ -182,7 +182,7 @@ int set_times(const char *fname, STRUCT_STAT *stp)
! 716: /* Create any necessary directories in fname. Any missing directories are
! 717: * created with default permissions. Returns < 0 on error, or the number
! 718: * of directories created. */
! 719: -int make_path(char *fname, int flags)
! 720: +int make_path(char *fname, mode_t mode, int flags)
! 721: {
! 722: char *end, *p;
! 723: int ret = 0;
! 724: @@ -213,7 +213,7 @@ int make_path(char *fname, int flags)
! 725: else
! 726: errno = ENOTDIR;
! 727: }
! 728: - } else if (do_mkdir(fname, ACCESSPERMS) == 0) {
! 729: + } else if (do_mkdir(fname, mode) == 0) {
! 730: ret++;
! 731: break;
! 732: }
! 733: @@ -252,7 +252,7 @@ int make_path(char *fname, int flags)
! 734: p += strlen(p);
! 735: if (ret < 0) /* Skip mkdir on error, but keep restoring the path. */
! 736: continue;
! 737: - if (do_mkdir(fname, ACCESSPERMS) < 0)
! 738: + if (do_mkdir(fname, mode) < 0)
! 739: ret = -ret - 1;
! 740: else
! 741: ret++;
! 742: @@ -1162,6 +1162,32 @@ char *normalize_path(char *path, BOOL force_newbuf, unsigned int *len_ptr)
! 743: return path;
! 744: }
! 745:
! 746: +/* We need to supply our own strcmp function for file list comparisons
! 747: + * to ensure that signed/unsigned usage is consistent between machines. */
! 748: +int u_strcmp(const char *p1, const char *p2)
! 749: +{
! 750: + for ( ; *p1; p1++, p2++) {
! 751: + if (*p1 != *p2)
! 752: + break;
! 753: + }
! 754: +
! 755: + return (int)*(uchar*)p1 - (int)*(uchar*)p2;
! 756: +}
! 757: +
! 758: +/* We need a memcmp function compares unsigned-byte values. */
! 759: +int u_memcmp(const void *p1, const void *p2, size_t len)
! 760: +{
! 761: + const uchar *u1 = p1;
! 762: + const uchar *u2 = p2;
! 763: +
! 764: + while (len--) {
! 765: + if (*u1 != *u2)
! 766: + return (int)*u1 - (int)*u2;
! 767: + }
! 768: +
! 769: + return 0;
! 770: +}
! 771: +
! 772: /**
! 773: * Return a quoted string with the full pathname of the indicated filename.
! 774: * The string " (in MODNAME)" may also be appended. The returned pointer
! 775: @@ -1255,7 +1281,7 @@ int handle_partial_dir(const char *fname, int create)
! 776: }
! 777: statret = -1;
! 778: }
! 779: - if (statret < 0 && do_mkdir(dir, 0700) < 0) {
! 780: + if (statret < 0 && make_path(dir, 0700, 0) < 0) {
! 781: *fn = '/';
! 782: return 0;
! 783: }
! 784: diff -Nurp a/rsync.1 b/rsync.1
! 785: --- a/rsync.1
! 786: +++ b/rsync.1
! 787: @@ -497,6 +497,7 @@ detailed description below for a complet
! 788: --modify-window=NUM, -@ set the accuracy for mod-time comparisons
! 789: --temp-dir=DIR, -T create temporary files in directory DIR
! 790: --fuzzy, -y find similar file for basis if no dest file
! 791: +--detect-renamed try to find renamed files to speed the xfer
! 792: --compare-dest=DIR also compare destination files relative to DIR
! 793: --copy-dest=DIR ... and include copies of unchanged files
! 794: --link-dest=DIR hardlink to files in DIR when unchanged
! 795: @@ -2290,6 +2291,20 @@ alternate destination directories that a
! 796: Note that the use of the \fB\-\-delete\fP option might get rid of any potential
! 797: fuzzy-match files, so either use \fB\-\-delete-after\fP or specify some filename
! 798: exclusions if you need to prevent this.
! 799: +.IP "\fB\-\-detect-renamed\fP"
! 800: +With this option, for each new source file (call it \fBsrc/S\fP), rsync looks
! 801: +for a file \fBdest/D\fP anywhere in the destination that passes the quick check
! 802: +with \fBsrc/S\fP. If such a \fBdest/D\fP is found, rsync uses it as an alternate
! 803: +basis for transferring \fBS\fP. The idea is that if \fBsrc/S\fP was renamed from
! 804: +\fBsrc/D\fP (as opposed to \fBsrc/S\fP passing the quick check with \fBdest/D\fP by
! 805: +coincidence), the delta-transfer algorithm will find that all the data
! 806: +matches between \fBsrc/S\fP and \fBdest/D\fP, and the transfer will be really fast.
! 807: +.IP
! 808: +By default, alternate-basis files are hard-linked into a directory named
! 809: +".~tmp~" in each file's destination directory, but if you've specified the
! 810: +\fB\-\-partial-dir\fP option, that directory will be used instead. These
! 811: +otential alternate-basis files will be removed as the transfer progresses.
! 812: +This option conflicts with \fB\-\-inplace\fP and \fB\-\-append\fP.
! 813: .IP "\fB\-\-compare-dest=DIR\fP"
! 814: This option instructs rsync to use \fIDIR\fP on the destination machine as an
! 815: additional hierarchy to compare destination files against doing transfers
! 816: diff -Nurp a/rsync.1.html b/rsync.1.html
! 817: --- a/rsync.1.html
! 818: +++ b/rsync.1.html
! 819: @@ -412,6 +412,7 @@ detailed description below for a complet
! 820: --modify-window=NUM, -@ set the accuracy for mod-time comparisons
! 821: --temp-dir=DIR, -T create temporary files in directory DIR
! 822: --fuzzy, -y find similar file for basis if no dest file
! 823: +--detect-renamed try to find renamed files to speed the xfer
! 824: --compare-dest=DIR also compare destination files relative to DIR
! 825: --copy-dest=DIR ... and include copies of unchanged files
! 826: --link-dest=DIR hardlink to files in DIR when unchanged
! 827: @@ -2135,6 +2136,21 @@ fuzzy-match files, so either use <code>-
! 828: exclusions if you need to prevent this.</p>
! 829: </dd>
! 830:
! 831: +<dt><code>--detect-renamed</code></dt><dd>
! 832: +<p>With this option, for each new source file (call it <code>src/S</code>), rsync looks
! 833: +for a file <code>dest/D</code> anywhere in the destination that passes the quick check
! 834: +with <code>src/S</code>. If such a <code>dest/D</code> is found, rsync uses it as an alternate
! 835: +basis for transferring <code>S</code>. The idea is that if <code>src/S</code> was renamed from
! 836: +<code>src/D</code> (as opposed to <code>src/S</code> passing the quick check with <code>dest/D</code> by
! 837: +coincidence), the delta-transfer algorithm will find that all the data
! 838: +matches between <code>src/S</code> and <code>dest/D</code>, and the transfer will be really fast.</p>
! 839: +<p>By default, alternate-basis files are hard-linked into a directory named
! 840: +".~tmp~" in each file's destination directory, but if you've specified the
! 841: +<code>--partial-dir</code> option, that directory will be used instead. These
! 842: +otential alternate-basis files will be removed as the transfer progresses.
! 843: +This option conflicts with <code>--inplace</code> and <code>--append</code>.</p>
! 844: +</dd>
! 845: +
! 846: <dt><code>--compare-dest=DIR</code></dt><dd>
! 847: <p>This option instructs rsync to use <u>DIR</u> on the destination machine as an
! 848: additional hierarchy to compare destination files against doing transfers
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>