This patch adds the --detect-renamed option which makes rsync notice files that either (1) match in size & modify-time (plus the basename, if possible) or (2) match in size & checksum (when --checksum was also specified) and use each match as an alternate basis file to speed up the transfer. The algorithm attempts to scan the receiving-side's files in an efficient manner. If --delete[-before] is enabled, we'll take advantage of the pre-transfer delete pass to prepare any alternate-basis-file matches we might find. If --delete-before is not enabled, rsync does the rename scan during the regular file-sending scan (scanning each directory right before the generator starts updating files from that dir). In this latter mode, rsync might delay the updating of a file (if no alternate-basis match was yet found) until the full scan of the receiving side is complete, at which point any delayed files are processed. I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that takes advantage of rsync's pre-existing partial-dir logic. This uses less memory than trying to keep track of the matches internally, and also allows any deletions or file-updates to occur normally without interfering with these alternate-basis discoveries. To use this patch, run these commands for a successful build: patch -p1 1) backup_dir_buf[backup_dir_len-1] = '\0'; - ret = make_path(backup_dir_buf, 0); + ret = make_path(backup_dir_buf, ACCESSPERMS, 0); if (backup_dir_len > 1) backup_dir_buf[backup_dir_len-1] = '/'; if (ret < 0) diff --git a/compat.c b/compat.c --- a/compat.c +++ b/compat.c @@ -39,6 +39,7 @@ extern int checksum_seed; extern int basis_dir_cnt; extern int prune_empty_dirs; extern int protocol_version; +extern int detect_renamed; extern int protect_args; extern int preserve_uid; extern int preserve_gid; @@ -159,6 +160,7 @@ void set_allow_inc_recurse(void) allow_inc_recurse = 0; else if (!am_sender && (delete_before || delete_after + || detect_renamed || delay_updates || prune_empty_dirs)) allow_inc_recurse = 0; else if (am_server && !local_server diff --git a/delete.c b/delete.c --- a/delete.c +++ b/delete.c @@ -25,6 +25,7 @@ extern int am_root; extern int make_backups; extern int max_delete; +extern int detect_renamed; extern char *backup_dir; extern char *backup_suffix; extern int backup_suffix_len; @@ -44,6 +45,8 @@ static inline int is_backup_file(char *fn) * its contents, otherwise just checks for content. Returns DR_SUCCESS or * DR_NOT_EMPTY. Note that fname must point to a MAXPATHLEN buffer! (The * buffer is used for recursion, but returned unchanged.) + * + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set! */ static enum delret delete_dir_contents(char *fname, uint16 flags) { @@ -63,7 +66,9 @@ static enum delret delete_dir_contents(char *fname, uint16 flags) save_filters = push_local_filters(fname, dlen); non_perishable_cnt = 0; + file_extra_cnt += SUM_EXTRA_CNT; dirlist = get_dirlist(fname, dlen, 0); + file_extra_cnt -= SUM_EXTRA_CNT; ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS; if (!dirlist->used) @@ -103,7 +108,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags) if (S_ISDIR(fp->mode)) { if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS) ret = DR_NOT_EMPTY; - } + } else if (detect_renamed && S_ISREG(fp->mode)) + look_for_rename(fp, fname); if (delete_item(fname, fp->mode, flags) != DR_SUCCESS) ret = DR_NOT_EMPTY; } @@ -126,6 +132,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags) * * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's * a directory! (The buffer is used for recursion, but returned unchanged.) + * + * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set! */ enum delret delete_item(char *fbuf, uint16 mode, uint16 flags) { @@ -153,6 +161,9 @@ enum delret delete_item(char *fbuf, uint16 mode, uint16 flags) /* OK: try to delete the directory. */ } + if (flags & DEL_NO_DELETIONS) + return DR_SUCCESS; + if (!(flags & DEL_MAKE_ROOM) && max_delete >= 0 && stats.deleted_files >= max_delete) { skipped_deletes++; return DR_AT_LIMIT; diff --git a/flist.c b/flist.c --- a/flist.c +++ b/flist.c @@ -64,6 +64,7 @@ extern int non_perishable_cnt; extern int prune_empty_dirs; extern int copy_links; extern int copy_unsafe_links; +extern int detect_renamed; extern int protocol_version; extern int sanitize_paths; extern int munge_symlinks; @@ -130,6 +131,8 @@ static int64 tmp_dev = -1, tmp_ino; #endif static char tmp_sum[MAX_DIGEST_LEN]; +struct file_list the_fattr_list; + static char empty_sum[MAX_DIGEST_LEN]; static int flist_count_offset; /* for --delete --progress */ static int show_filelist_progress; @@ -277,6 +280,45 @@ static inline int is_excluded(const char *fname, int is_dir, int filter_level) return name_is_excluded(fname, is_dir ? NAME_IS_DIR : NAME_IS_FILE, filter_level); } +static int fattr_compare(struct file_struct **file1, struct file_struct **file2) +{ + struct file_struct *f1 = *file1; + struct file_struct *f2 = *file2; + int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2); + int diff; + + if (!f1->basename || !S_ISREG(f1->mode) || !len1) { + if (!f2->basename || !S_ISREG(f2->mode) || !len2) + return 0; + return 1; + } + if (!f2->basename || !S_ISREG(f2->mode) || !len2) + return -1; + + /* Don't use diff for values that are longer than an int. */ + if (len1 != len2) + return len1 < len2 ? -1 : 1; + + if (always_checksum) { + diff = u_memcmp(F_SUM(f1), F_SUM(f2), flist_csum_len); + if (diff) + return diff; + } else if (f1->modtime != f2->modtime) + return f1->modtime < f2->modtime ? -1 : 1; + + diff = u_strcmp(f1->basename, f2->basename); + if (diff) + return diff; + + if (f1->dirname == f2->dirname) + return 0; + if (!f1->dirname) + return -1; + if (!f2->dirname) + return 1; + return u_strcmp(f1->dirname, f2->dirname); +} + static void send_directory(int f, struct file_list *flist, char *fbuf, int len, int flags); @@ -2675,6 +2717,23 @@ struct file_list *recv_file_list(int f, int dir_ndx) * for a non-relative transfer in recv_file_entry(). */ flist_sort_and_clean(flist, relative_paths); + if (detect_renamed) { + int j = flist->used; + the_fattr_list.used = j; + the_fattr_list.files = new_array(struct file_struct *, j); + memcpy(the_fattr_list.files, flist->files, + j * sizeof (struct file_struct *)); + qsort(the_fattr_list.files, j, + sizeof the_fattr_list.files[0], (int (*)())fattr_compare); + the_fattr_list.low = 0; + while (j-- > 0) { + struct file_struct *fp = the_fattr_list.files[j]; + if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp)) + break; + } + the_fattr_list.high = j; + } + if (protocol_version < 30) { /* Recv the io_error flag */ int err = read_int(f); diff --git a/generator.c b/generator.c --- a/generator.c +++ b/generator.c @@ -79,6 +79,7 @@ extern int always_checksum; extern int flist_csum_len; extern char *partial_dir; extern int alt_dest_type; +extern int detect_renamed; extern int whole_file; extern int list_only; extern int read_batch; @@ -97,11 +98,13 @@ extern char *tmpdir; extern char *basis_dir[MAX_BASIS_DIRS+1]; extern struct file_list *cur_flist, *first_flist, *dir_flist; extern filter_rule_list filter_list, daemon_filter_list; +extern struct file_list the_fattr_list; int maybe_ATTRS_REPORT = 0; int maybe_ATTRS_ACCURATE_TIME = 0; static dev_t dev_zero; +static int unexplored_dirs = 1; static int deldelay_size = 0, deldelay_cnt = 0; static char *deldelay_buf = NULL; static int deldelay_fd = -1; @@ -269,14 +272,19 @@ static void do_delayed_deletions(char *delbuf) * all the --delete-WHEN options. Note that the fbuf pointer must point to a * MAXPATHLEN buffer with the name of the directory in it (the functions we * call will append names onto the end, but the old dir value will be restored - * on exit). */ -static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) + * on exit). + * + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set! + */ +static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev, + int del_flags) { static int already_warned = 0; static struct hashtable *dev_tbl; struct file_list *dirlist; - char delbuf[MAXPATHLEN]; - int dlen, i; + char *p, delbuf[MAXPATHLEN]; + unsigned remainder; + int dlen, i, restore_dot = 0; if (!fbuf) { change_local_filter_dir(NULL, 0, 0); @@ -290,17 +298,22 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH); if (io_error & IOERR_GENERAL && !ignore_errors) { - if (already_warned) + if (!already_warned) { + rprintf(FINFO, + "IO error encountered -- skipping file deletion\n"); + already_warned = 1; + } + if (!detect_renamed) return; - rprintf(FINFO, - "IO error encountered -- skipping file deletion\n"); - already_warned = 1; - return; + del_flags |= DEL_NO_DELETIONS; } dlen = strlen(fbuf); change_local_filter_dir(fbuf, dlen, F_DEPTH(file)); + if (detect_renamed) + unexplored_dirs--; + if (one_file_system) { if (!dev_tbl) dev_tbl = hashtable_create(16, HT_KEY64); @@ -316,6 +329,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) dirlist = get_dirlist(fbuf, dlen, 0); + p = fbuf + dlen; + if (dlen == 1 && *fbuf == '.') { + restore_dot = 1; + p = fbuf; + } else if (dlen != 1 || *fbuf != '/') + *p++ = '/'; + remainder = MAXPATHLEN - (p - fbuf); + /* If an item in dirlist is not found in flist, delete it * from the filesystem. */ for (i = dirlist->used; i--; ) { @@ -328,6 +349,10 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) f_name(fp, NULL)); continue; } + if (detect_renamed && S_ISREG(fp->mode)) { + strlcpy(p, fp->basename, remainder); + look_for_rename(fp, fbuf); + } /* Here we want to match regardless of file type. Replacement * of a file with one of another type is handled separately by * a delete_item call with a DEL_MAKE_ROOM flag. */ @@ -336,14 +361,19 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) if (!(fp->mode & S_IWUSR) && !am_root && fp->flags & FLAG_OWNED_BY_US) flags |= DEL_NO_UID_WRITE; f_name(fp, delbuf); - if (delete_during == 2) { - if (!remember_delete(fp, delbuf, flags)) + if (delete_during == 2 && !(del_flags & DEL_NO_DELETIONS)) { + if (!remember_delete(fp, delbuf, del_flags | flags)) break; } else - delete_item(delbuf, fp->mode, flags); - } + delete_item(delbuf, fp->mode, del_flags | flags); + } else if (detect_renamed && S_ISDIR(fp->mode)) + unexplored_dirs++; } + if (restore_dot) + fbuf[0] = '.'; + fbuf[dlen] = '\0'; + flist_free(dirlist); } @@ -379,14 +409,125 @@ static void do_delete_pass(void) || !S_ISDIR(st.st_mode)) continue; - delete_in_dir(fbuf, file, &st.st_dev); + delete_in_dir(fbuf, file, &st.st_dev, 0); } - delete_in_dir(NULL, NULL, &dev_zero); + delete_in_dir(NULL, NULL, &dev_zero, 0); if (INFO_GTE(FLIST, 2) && !am_server) rprintf(FINFO, " \r"); } +/* Search for a regular file that matches either (1) the size & modified + * time (plus the basename, if possible) or (2) the size & checksum. If + * we find an exact match down to the dirname, return -1 because we found + * an up-to-date file in the transfer, not a renamed file. */ +static int fattr_find(struct file_struct *f, char *fname) +{ + int low = the_fattr_list.low, high = the_fattr_list.high; + int mid, ok_match = -1, good_match = -1; + struct file_struct *fmid; + int diff; + + while (low <= high) { + mid = (low + high) / 2; + fmid = the_fattr_list.files[mid]; + if (F_LENGTH(fmid) != F_LENGTH(f)) { + if (F_LENGTH(fmid) < F_LENGTH(f)) + low = mid + 1; + else + high = mid - 1; + continue; + } + if (always_checksum) { + /* We use the FLAG_FILE_SENT flag to indicate when we + * have computed the checksum for an entry. */ + if (!(f->flags & FLAG_FILE_SENT)) { + STRUCT_STAT st; + if (fmid->modtime == f->modtime + && f_name_cmp(fmid, f) == 0) + return -1; /* assume we can't help */ + st.st_size = F_LENGTH(f); + st.st_mtime = f->modtime; + file_checksum(fname, &st, F_SUM(f)); + f->flags |= FLAG_FILE_SENT; + } + diff = u_memcmp(F_SUM(fmid), F_SUM(f), flist_csum_len); + if (diff) { + if (diff < 0) + low = mid + 1; + else + high = mid - 1; + continue; + } + } else { + if (fmid->modtime != f->modtime) { + if (fmid->modtime < f->modtime) + low = mid + 1; + else + high = mid - 1; + continue; + } + } + ok_match = mid; + diff = u_strcmp(fmid->basename, f->basename); + if (diff == 0) { + good_match = mid; + if (fmid->dirname == f->dirname) + return -1; /* file is up-to-date */ + if (!fmid->dirname) { + low = mid + 1; + continue; + } + if (!f->dirname) { + high = mid - 1; + continue; + } + diff = u_strcmp(fmid->dirname, f->dirname); + if (diff == 0) + return -1; /* file is up-to-date */ + } + if (diff < 0) + low = mid + 1; + else + high = mid - 1; + } + + return good_match >= 0 ? good_match : ok_match; +} + +void look_for_rename(struct file_struct *file, char *fname) +{ + struct file_struct *fp; + char *partialptr, *fn; + STRUCT_STAT st; + int ndx; + + if (!partial_dir || (ndx = fattr_find(file, fname)) < 0) + return; + + fp = the_fattr_list.files[ndx]; + fn = f_name(fp, NULL); + /* We don't provide an alternate-basis file if there is a basis file. */ + if (link_stat(fn, &st, 0) == 0) + return; + + if (!dry_run) { + if ((partialptr = partial_dir_fname(fn)) == NULL + || !handle_partial_dir(partialptr, PDIR_CREATE)) + return; + /* We only use the file if we can hard-link it into our tmp dir. */ + if (link(fname, partialptr) != 0) { + if (errno != EEXIST) + handle_partial_dir(partialptr, PDIR_DELETE); + return; + } + } + + /* I think this falls into the -vv category with "%s is uptodate", etc. */ + if (INFO_GTE(MISC, 2)) + rprintf(FINFO, "found renamed: %s => %s\n", fname, fn); +} + static inline int mtime_differs(STRUCT_STAT *stp, struct file_struct *file) { #ifdef ST_MTIME_NSEC @@ -1187,6 +1328,7 @@ static void list_file_entry(struct file_struct *f) } } +static struct bitbag *delayed_bits = NULL; static int phase = 0; static int dflt_perms; @@ -1323,7 +1465,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, && do_stat(dn, &sx.st) < 0) { if (dry_run) goto parent_is_dry_missing; - if (make_path(fname, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0) { + if (make_path(fname, ACCESSPERMS, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0) { rsyserr(FERROR_XFER, errno, "recv_generator: mkdir %s failed", full_fname(dn)); @@ -1459,7 +1601,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, } if (real_ret != 0 && do_mkdir(fname,file->mode|added_perms) < 0 && errno != EEXIST) { if (!relative_paths || errno != ENOENT - || make_path(fname, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0 + || make_path(fname, ACCESSPERMS, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0 || (do_mkdir(fname, file->mode|added_perms) < 0 && errno != EEXIST)) { rsyserr(FERROR_XFER, errno, "recv_generator: mkdir %s failed", @@ -1507,9 +1649,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, } else if (delete_during && f_out != -1 && !phase && !(file->flags & FLAG_MISSING_DIR)) { - if (file->flags & FLAG_CONTENT_DIR) - delete_in_dir(fname, file, &real_sx.st.st_dev); - else + if (file->flags & FLAG_CONTENT_DIR) { + if (detect_renamed && real_ret != 0) + unexplored_dirs++; + delete_in_dir(fname, file, &real_sx.st.st_dev, + delete_during < 0 ? DEL_NO_DELETIONS : 0); + } else change_local_filter_dir(fname, strlen(fname), F_DEPTH(file)); } prior_dir_file = file; @@ -1786,8 +1931,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, goto cleanup; } #endif - if (stat_errno == ENOENT) + if (stat_errno == ENOENT) { + if (detect_renamed && unexplored_dirs > 0 + && F_LENGTH(file)) { + bitbag_set_bit(delayed_bits, ndx); + return; + } goto notify_others; + } rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s", full_fname(fname)); goto cleanup; @@ -2251,6 +2402,12 @@ void generate_files(int f_out, const char *local_name) if (DEBUG_GTE(GENR, 1)) rprintf(FINFO, "generator starting pid=%d\n", (int)getpid()); + if (detect_renamed) { + delayed_bits = bitbag_create(cur_flist->used); + if (!delete_before && !delete_during) + delete_during = -1; + } + if (delete_before && !solo_file && cur_flist->used > 0) do_delete_pass(); if (delete_during == 2) { @@ -2259,7 +2416,7 @@ void generate_files(int f_out, const char *local_name) } info_levels[INFO_FLIST] = info_levels[INFO_PROGRESS] = 0; - if (append_mode > 0 || whole_file < 0) + if (append_mode > 0 || detect_renamed || whole_file < 0) whole_file = 0; if (DEBUG_GTE(FLIST, 1)) { rprintf(FINFO, "delta-transmission %s\n", @@ -2295,7 +2452,7 @@ void generate_files(int f_out, const char *local_name) dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp)); } else dirdev = MAKEDEV(0, 0); - delete_in_dir(fbuf, fp, &dirdev); + delete_in_dir(fbuf, fp, &dirdev, 0); } else change_local_filter_dir(fbuf, strlen(fbuf), F_DEPTH(fp)); } @@ -2342,7 +2499,21 @@ void generate_files(int f_out, const char *local_name) } while ((cur_flist = cur_flist->next) != NULL); if (delete_during) - delete_in_dir(NULL, NULL, &dev_zero); + delete_in_dir(NULL, NULL, &dev_zero, 0); + if (detect_renamed) { + if (delete_during < 0) + delete_during = 0; + detect_renamed = 0; + + for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) { + struct file_struct *file = cur_flist->files[i]; + if (local_name) + strlcpy(fbuf, local_name, sizeof fbuf); + else + f_name(file, fbuf); + recv_generator(fbuf, file, i, itemizing, code, f_out); + } + } phase++; if (DEBUG_GTE(GENR, 1)) rprintf(FINFO, "generate_files phase=%d\n", phase); diff --git a/main.c b/main.c --- a/main.c +++ b/main.c @@ -721,7 +721,7 @@ static char *get_local_name(struct file_list *flist, char *dest_path) trailing_slash = cp && !cp[1]; if (mkpath_dest_arg && statret < 0 && (cp || file_total > 1)) { - int ret = make_path(dest_path, file_total > 1 && !trailing_slash ? 0 : MKP_DROP_NAME); + int ret = make_path(dest_path, ACCESSPERMS, file_total > 1 && !trailing_slash ? 0 : MKP_DROP_NAME); if (ret < 0) goto mkdir_error; if (INFO_GTE(NAME, 1)) { diff --git a/options.c b/options.c --- a/options.c +++ b/options.c @@ -84,6 +84,7 @@ int am_server = 0; int am_sender = 0; int am_starting_up = 1; int relative_paths = -1; +int detect_renamed = 0; int implied_dirs = 1; int missing_args = 0; /* 0 = FERROR_XFER, 1 = ignore, 2 = delete */ int numeric_ids = 0; @@ -733,6 +734,7 @@ static struct poptOption long_options[] = { {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 }, + {"detect-renamed", 0, POPT_ARG_NONE, &detect_renamed, 0, 0, 0 }, {"fuzzy", 'y', POPT_ARG_NONE, 0, 'y', 0, 0 }, {"no-fuzzy", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 }, {"no-y", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 }, @@ -2346,7 +2348,7 @@ int parse_arguments(int *argc_p, const char ***argv_p) inplace = 1; } - if (delay_updates && !partial_dir) + if ((delay_updates || detect_renamed) && !partial_dir) partial_dir = tmp_partialdir; if (inplace) { @@ -2355,6 +2357,7 @@ int parse_arguments(int *argc_p, const char ***argv_p) snprintf(err_buf, sizeof err_buf, "--%s cannot be used with --%s\n", append_mode ? "append" : "inplace", + detect_renamed ? "detect-renamed" : delay_updates ? "delay-updates" : "partial-dir"); return 0; } @@ -2760,6 +2763,8 @@ void server_options(char **args, int *argc_p) args[ac++] = "--super"; if (size_only) args[ac++] = "--size-only"; + if (detect_renamed) + args[ac++] = "--detect-renamed"; if (do_stats) args[ac++] = "--stats"; } else { diff --git a/receiver.c b/receiver.c --- a/receiver.c +++ b/receiver.c @@ -217,7 +217,7 @@ int open_tmpfile(char *fnametmp, const char *fname, struct file_struct *file) * information should have been previously transferred, but that may * not be the case with -R */ if (fd == -1 && relative_paths && errno == ENOENT - && make_path(fnametmp, MKP_SKIP_SLASH | MKP_DROP_NAME) == 0) { + && make_path(fnametmp, ACCESSPERMS, MKP_SKIP_SLASH | MKP_DROP_NAME) == 0) { /* Get back to name with XXXXXX in it. */ get_tmpname(fnametmp, fname, False); fd = do_mkstemp(fnametmp, (file->mode|added_perms) & INITACCESSPERMS); diff --git a/rsync.1.md b/rsync.1.md --- a/rsync.1.md +++ b/rsync.1.md @@ -421,6 +421,7 @@ detailed description below for a complete description. --modify-window=NUM, -@ set the accuracy for mod-time comparisons --temp-dir=DIR, -T create temporary files in directory DIR --fuzzy, -y find similar file for basis if no dest file +--detect-renamed try to find renamed files to speed the xfer --compare-dest=DIR also compare destination files relative to DIR --copy-dest=DIR ... and include copies of unchanged files --link-dest=DIR hardlink to files in DIR when unchanged @@ -2247,6 +2248,22 @@ your home directory (remove the '=' for that). fuzzy-match files, so either use `--delete-after` or specify some filename exclusions if you need to prevent this. +0. `--detect-renamed` + + With this option, for each new source file (call it `src/S`), rsync looks + for a file `dest/D` anywhere in the destination that passes the quick check + with `src/S`. If such a `dest/D` is found, rsync uses it as an alternate + basis for transferring `S`. The idea is that if `src/S` was renamed from + `src/D` (as opposed to `src/S` passing the quick check with `dest/D` by + coincidence), the delta-transfer algorithm will find that all the data + matches between `src/S` and `dest/D`, and the transfer will be really fast. + + By default, alternate-basis files are hard-linked into a directory named + ".~tmp~" in each file's destination directory, but if you've specified the + `--partial-dir` option, that directory will be used instead. These + otential alternate-basis files will be removed as the transfer progresses. + This option conflicts with `--inplace` and `--append`. + 0. `--compare-dest=DIR` This option instructs rsync to use _DIR_ on the destination machine as an diff --git a/rsync.h b/rsync.h --- a/rsync.h +++ b/rsync.h @@ -272,7 +272,7 @@ enum msgcode { #define NDX_DEL_STATS -3 #define NDX_FLIST_OFFSET -101 -/* For calling delete_item() and delete_dir_contents(). */ +/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */ #define DEL_NO_UID_WRITE (1<<0) /* file/dir has our uid w/o write perm */ #define DEL_RECURSE (1<<1) /* if dir, delete all contents */ #define DEL_DIR_IS_EMPTY (1<<2) /* internal delete_FUNCTIONS use only */ @@ -282,6 +282,7 @@ enum msgcode { #define DEL_FOR_DEVICE (1<<6) /* making room for a replacement device */ #define DEL_FOR_SPECIAL (1<<7) /* making room for a replacement special */ #define DEL_FOR_BACKUP (1<<8) /* the delete is for a backup operation */ +#define DEL_NO_DELETIONS (1<<9) /* just check for renames w/o deleting */ #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL) diff --git a/util.c b/util.c --- a/util.c +++ b/util.c @@ -182,7 +182,7 @@ int set_times(const char *fname, STRUCT_STAT *stp) /* Create any necessary directories in fname. Any missing directories are * created with default permissions. Returns < 0 on error, or the number * of directories created. */ -int make_path(char *fname, int flags) +int make_path(char *fname, mode_t mode, int flags) { char *end, *p; int ret = 0; @@ -213,7 +213,7 @@ int make_path(char *fname, int flags) else errno = ENOTDIR; } - } else if (do_mkdir(fname, ACCESSPERMS) == 0) { + } else if (do_mkdir(fname, mode) == 0) { ret++; break; } @@ -252,7 +252,7 @@ int make_path(char *fname, int flags) p += strlen(p); if (ret < 0) /* Skip mkdir on error, but keep restoring the path. */ continue; - if (do_mkdir(fname, ACCESSPERMS) < 0) + if (do_mkdir(fname, mode) < 0) ret = -ret - 1; else ret++; @@ -1162,6 +1162,32 @@ char *normalize_path(char *path, BOOL force_newbuf, unsigned int *len_ptr) return path; } +/* We need to supply our own strcmp function for file list comparisons + * to ensure that signed/unsigned usage is consistent between machines. */ +int u_strcmp(const char *p1, const char *p2) +{ + for ( ; *p1; p1++, p2++) { + if (*p1 != *p2) + break; + } + + return (int)*(uchar*)p1 - (int)*(uchar*)p2; +} + +/* We need a memcmp function compares unsigned-byte values. */ +int u_memcmp(const void *p1, const void *p2, size_t len) +{ + const uchar *u1 = p1; + const uchar *u2 = p2; + + while (len--) { + if (*u1 != *u2) + return (int)*u1 - (int)*u2; + } + + return 0; +} + /** * Return a quoted string with the full pathname of the indicated filename. * The string " (in MODNAME)" may also be appended. The returned pointer @@ -1255,7 +1281,7 @@ int handle_partial_dir(const char *fname, int create) } statret = -1; } - if (statret < 0 && do_mkdir(dir, 0700) < 0) { + if (statret < 0 && make_path(dir, 0700, 0) < 0) { *fn = '/'; return 0; } diff -Nurp a/rsync.1 b/rsync.1 --- a/rsync.1 +++ b/rsync.1 @@ -497,6 +497,7 @@ detailed description below for a complet --modify-window=NUM, -@ set the accuracy for mod-time comparisons --temp-dir=DIR, -T create temporary files in directory DIR --fuzzy, -y find similar file for basis if no dest file +--detect-renamed try to find renamed files to speed the xfer --compare-dest=DIR also compare destination files relative to DIR --copy-dest=DIR ... and include copies of unchanged files --link-dest=DIR hardlink to files in DIR when unchanged @@ -2290,6 +2291,20 @@ alternate destination directories that a Note that the use of the \fB\-\-delete\fP option might get rid of any potential fuzzy-match files, so either use \fB\-\-delete-after\fP or specify some filename exclusions if you need to prevent this. +.IP "\fB\-\-detect-renamed\fP" +With this option, for each new source file (call it \fBsrc/S\fP), rsync looks +for a file \fBdest/D\fP anywhere in the destination that passes the quick check +with \fBsrc/S\fP. If such a \fBdest/D\fP is found, rsync uses it as an alternate +basis for transferring \fBS\fP. The idea is that if \fBsrc/S\fP was renamed from +\fBsrc/D\fP (as opposed to \fBsrc/S\fP passing the quick check with \fBdest/D\fP by +coincidence), the delta-transfer algorithm will find that all the data +matches between \fBsrc/S\fP and \fBdest/D\fP, and the transfer will be really fast. +.IP +By default, alternate-basis files are hard-linked into a directory named +".~tmp~" in each file's destination directory, but if you've specified the +\fB\-\-partial-dir\fP option, that directory will be used instead. These +otential alternate-basis files will be removed as the transfer progresses. +This option conflicts with \fB\-\-inplace\fP and \fB\-\-append\fP. .IP "\fB\-\-compare-dest=DIR\fP" This option instructs rsync to use \fIDIR\fP on the destination machine as an additional hierarchy to compare destination files against doing transfers diff -Nurp a/rsync.1.html b/rsync.1.html --- a/rsync.1.html +++ b/rsync.1.html @@ -412,6 +412,7 @@ detailed description below for a complet --modify-window=NUM, -@ set the accuracy for mod-time comparisons --temp-dir=DIR, -T create temporary files in directory DIR --fuzzy, -y find similar file for basis if no dest file +--detect-renamed try to find renamed files to speed the xfer --compare-dest=DIR also compare destination files relative to DIR --copy-dest=DIR ... and include copies of unchanged files --link-dest=DIR hardlink to files in DIR when unchanged @@ -2135,6 +2136,21 @@ fuzzy-match files, so either use - exclusions if you need to prevent this.

+
--detect-renamed
+

With this option, for each new source file (call it src/S), rsync looks +for a file dest/D anywhere in the destination that passes the quick check +with src/S. If such a dest/D is found, rsync uses it as an alternate +basis for transferring S. The idea is that if src/S was renamed from +src/D (as opposed to src/S passing the quick check with dest/D by +coincidence), the delta-transfer algorithm will find that all the data +matches between src/S and dest/D, and the transfer will be really fast.

+

By default, alternate-basis files are hard-linked into a directory named +".~tmp~" in each file's destination directory, but if you've specified the +--partial-dir option, that directory will be used instead. These +otential alternate-basis files will be removed as the transfer progresses. +This option conflicts with --inplace and --append.

+
+
--compare-dest=DIR

This option instructs rsync to use DIR on the destination machine as an additional hierarchy to compare destination files against doing transfers