File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / rsync / patches / detect-renamed.diff
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 00:32:36 2021 UTC (3 years, 3 months ago) by misho
Branches: rsync, MAIN
CVS tags: v3_2_3, HEAD
rsync 3.2.3

    1: This patch adds the --detect-renamed option which makes rsync notice files
    2: that either (1) match in size & modify-time (plus the basename, if possible)
    3: or (2) match in size & checksum (when --checksum was also specified) and use
    4: each match as an alternate basis file to speed up the transfer.
    5: 
    6: The algorithm attempts to scan the receiving-side's files in an efficient
    7: manner.  If --delete[-before] is enabled, we'll take advantage of the
    8: pre-transfer delete pass to prepare any alternate-basis-file matches we
    9: might find.  If --delete-before is not enabled, rsync does the rename scan
   10: during the regular file-sending scan (scanning each directory right before
   11: the generator starts updating files from that dir).  In this latter mode,
   12: rsync might delay the updating of a file (if no alternate-basis match was
   13: yet found) until the full scan of the receiving side is complete, at which
   14: point any delayed files are processed.
   15: 
   16: I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
   17: takes advantage of rsync's pre-existing partial-dir logic.  This uses less
   18: memory than trying to keep track of the matches internally, and also allows
   19: any deletions or file-updates to occur normally without interfering with
   20: these alternate-basis discoveries.
   21: 
   22: To use this patch, run these commands for a successful build:
   23: 
   24:     patch -p1 <patches/detect-renamed.diff
   25:     ./configure                                 (optional if already run)
   26:     make
   27: 
   28: TODO:
   29: 
   30:   The routine that makes missing directories for files that get renamed
   31:   down into a new sub-hierarchy doesn't properly handle the case where some
   32:   path elements might exist but not be a dir yet.  We need to either change
   33:   our stash-ahead algorithm (to not require unknown path elements) or we
   34:   need to create a better path-making routine.
   35: 
   36:   We need to never return a match from fattr_find() that has a basis
   37:   file.  This will ensure that we don't try to give a renamed file to
   38:   a file that can't use it, while missing out on giving it to a file
   39:   that could use it.
   40: 
   41: based-on: e94bad1c156fc3910f24e2b3b71a81b0b0bdeb70
   42: diff --git a/backup.c b/backup.c
   43: --- a/backup.c
   44: +++ b/backup.c
   45: @@ -162,7 +162,7 @@ char *get_backup_name(const char *fname)
   46:  			int ret;
   47:  			if (backup_dir_len > 1)
   48:  				backup_dir_buf[backup_dir_len-1] = '\0';
   49: -			ret = make_path(backup_dir_buf, 0);
   50: +			ret = make_path(backup_dir_buf, ACCESSPERMS, 0);
   51:  			if (backup_dir_len > 1)
   52:  				backup_dir_buf[backup_dir_len-1] = '/';
   53:  			if (ret < 0)
   54: diff --git a/compat.c b/compat.c
   55: --- a/compat.c
   56: +++ b/compat.c
   57: @@ -39,6 +39,7 @@ extern int checksum_seed;
   58:  extern int basis_dir_cnt;
   59:  extern int prune_empty_dirs;
   60:  extern int protocol_version;
   61: +extern int detect_renamed;
   62:  extern int protect_args;
   63:  extern int preserve_uid;
   64:  extern int preserve_gid;
   65: @@ -159,6 +160,7 @@ void set_allow_inc_recurse(void)
   66:  		allow_inc_recurse = 0;
   67:  	else if (!am_sender
   68:  	 && (delete_before || delete_after
   69: +	  || detect_renamed
   70:  	  || delay_updates || prune_empty_dirs))
   71:  		allow_inc_recurse = 0;
   72:  	else if (am_server && !local_server
   73: diff --git a/delete.c b/delete.c
   74: --- a/delete.c
   75: +++ b/delete.c
   76: @@ -25,6 +25,7 @@
   77:  extern int am_root;
   78:  extern int make_backups;
   79:  extern int max_delete;
   80: +extern int detect_renamed;
   81:  extern char *backup_dir;
   82:  extern char *backup_suffix;
   83:  extern int backup_suffix_len;
   84: @@ -44,6 +45,8 @@ static inline int is_backup_file(char *fn)
   85:   * its contents, otherwise just checks for content.  Returns DR_SUCCESS or
   86:   * DR_NOT_EMPTY.  Note that fname must point to a MAXPATHLEN buffer!  (The
   87:   * buffer is used for recursion, but returned unchanged.)
   88: + *
   89: + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
   90:   */
   91:  static enum delret delete_dir_contents(char *fname, uint16 flags)
   92:  {
   93: @@ -63,7 +66,9 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
   94:  	save_filters = push_local_filters(fname, dlen);
   95:  
   96:  	non_perishable_cnt = 0;
   97: +	file_extra_cnt += SUM_EXTRA_CNT;
   98:  	dirlist = get_dirlist(fname, dlen, 0);
   99: +	file_extra_cnt -= SUM_EXTRA_CNT;
  100:  	ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
  101:  
  102:  	if (!dirlist->used)
  103: @@ -103,7 +108,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
  104:  		if (S_ISDIR(fp->mode)) {
  105:  			if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
  106:  				ret = DR_NOT_EMPTY;
  107: -		}
  108: +		} else if (detect_renamed && S_ISREG(fp->mode))
  109: +			look_for_rename(fp, fname);
  110:  		if (delete_item(fname, fp->mode, flags) != DR_SUCCESS)
  111:  			ret = DR_NOT_EMPTY;
  112:  	}
  113: @@ -126,6 +132,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
  114:   *
  115:   * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
  116:   * a directory! (The buffer is used for recursion, but returned unchanged.)
  117: + *
  118: + * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
  119:   */
  120:  enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
  121:  {
  122: @@ -153,6 +161,9 @@ enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
  123:  		/* OK: try to delete the directory. */
  124:  	}
  125:  
  126: +	if (flags & DEL_NO_DELETIONS)
  127: +		return DR_SUCCESS;
  128: +
  129:  	if (!(flags & DEL_MAKE_ROOM) && max_delete >= 0 && stats.deleted_files >= max_delete) {
  130:  		skipped_deletes++;
  131:  		return DR_AT_LIMIT;
  132: diff --git a/flist.c b/flist.c
  133: --- a/flist.c
  134: +++ b/flist.c
  135: @@ -64,6 +64,7 @@ extern int non_perishable_cnt;
  136:  extern int prune_empty_dirs;
  137:  extern int copy_links;
  138:  extern int copy_unsafe_links;
  139: +extern int detect_renamed;
  140:  extern int protocol_version;
  141:  extern int sanitize_paths;
  142:  extern int munge_symlinks;
  143: @@ -130,6 +131,8 @@ static int64 tmp_dev = -1, tmp_ino;
  144:  #endif
  145:  static char tmp_sum[MAX_DIGEST_LEN];
  146:  
  147: +struct file_list the_fattr_list;
  148: +
  149:  static char empty_sum[MAX_DIGEST_LEN];
  150:  static int flist_count_offset; /* for --delete --progress */
  151:  static int show_filelist_progress;
  152: @@ -277,6 +280,45 @@ static inline int is_excluded(const char *fname, int is_dir, int filter_level)
  153:  	return name_is_excluded(fname, is_dir ? NAME_IS_DIR : NAME_IS_FILE, filter_level);
  154:  }
  155:  
  156: +static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
  157: +{
  158: +	struct file_struct *f1 = *file1;
  159: +	struct file_struct *f2 = *file2;
  160: +	int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
  161: +	int diff;
  162: +
  163: +	if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
  164: +		if (!f2->basename || !S_ISREG(f2->mode) || !len2)
  165: +			return 0;
  166: +		return 1;
  167: +	}
  168: +	if (!f2->basename || !S_ISREG(f2->mode) || !len2)
  169: +		return -1;
  170: +
  171: +	/* Don't use diff for values that are longer than an int. */
  172: +	if (len1 != len2)
  173: +		return len1 < len2 ? -1 : 1;
  174: +
  175: +	if (always_checksum) {
  176: +		diff = u_memcmp(F_SUM(f1), F_SUM(f2), flist_csum_len);
  177: +		if (diff)
  178: +			return diff;
  179: +	} else if (f1->modtime != f2->modtime)
  180: +		return f1->modtime < f2->modtime ? -1 : 1;
  181: +
  182: +	diff = u_strcmp(f1->basename, f2->basename);
  183: +	if (diff)
  184: +		return diff;
  185: +
  186: +	if (f1->dirname == f2->dirname)
  187: +		return 0;
  188: +	if (!f1->dirname)
  189: +		return -1;
  190: +	if (!f2->dirname)
  191: +		return 1;
  192: +	return u_strcmp(f1->dirname, f2->dirname);
  193: +}
  194: +
  195:  static void send_directory(int f, struct file_list *flist,
  196:  			   char *fbuf, int len, int flags);
  197:  
  198: @@ -2675,6 +2717,23 @@ struct file_list *recv_file_list(int f, int dir_ndx)
  199:  	 * for a non-relative transfer in recv_file_entry(). */
  200:  	flist_sort_and_clean(flist, relative_paths);
  201:  
  202: +	if (detect_renamed) {
  203: +		int j = flist->used;
  204: +		the_fattr_list.used = j;
  205: +		the_fattr_list.files = new_array(struct file_struct *, j);
  206: +		memcpy(the_fattr_list.files, flist->files,
  207: +		       j * sizeof (struct file_struct *));
  208: +		qsort(the_fattr_list.files, j,
  209: +		      sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
  210: +		the_fattr_list.low = 0;
  211: +		while (j-- > 0) {
  212: +			struct file_struct *fp = the_fattr_list.files[j];
  213: +			if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
  214: +				break;
  215: +		}
  216: +		the_fattr_list.high = j;
  217: +	}
  218: +
  219:  	if (protocol_version < 30) {
  220:  		/* Recv the io_error flag */
  221:  		int err = read_int(f);
  222: diff --git a/generator.c b/generator.c
  223: --- a/generator.c
  224: +++ b/generator.c
  225: @@ -79,6 +79,7 @@ extern int always_checksum;
  226:  extern int flist_csum_len;
  227:  extern char *partial_dir;
  228:  extern int alt_dest_type;
  229: +extern int detect_renamed;
  230:  extern int whole_file;
  231:  extern int list_only;
  232:  extern int read_batch;
  233: @@ -97,11 +98,13 @@ extern char *tmpdir;
  234:  extern char *basis_dir[MAX_BASIS_DIRS+1];
  235:  extern struct file_list *cur_flist, *first_flist, *dir_flist;
  236:  extern filter_rule_list filter_list, daemon_filter_list;
  237: +extern struct file_list the_fattr_list;
  238:  
  239:  int maybe_ATTRS_REPORT = 0;
  240:  int maybe_ATTRS_ACCURATE_TIME = 0;
  241:  
  242:  static dev_t dev_zero;
  243: +static int unexplored_dirs = 1;
  244:  static int deldelay_size = 0, deldelay_cnt = 0;
  245:  static char *deldelay_buf = NULL;
  246:  static int deldelay_fd = -1;
  247: @@ -269,14 +272,19 @@ static void do_delayed_deletions(char *delbuf)
  248:   * all the --delete-WHEN options.  Note that the fbuf pointer must point to a
  249:   * MAXPATHLEN buffer with the name of the directory in it (the functions we
  250:   * call will append names onto the end, but the old dir value will be restored
  251: - * on exit). */
  252: -static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
  253: + * on exit).
  254: + *
  255: + * Note:  --detect-rename may use this routine with DEL_NO_DELETIONS set!
  256: + */
  257: +static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
  258: +			  int del_flags)
  259:  {
  260:  	static int already_warned = 0;
  261:  	static struct hashtable *dev_tbl;
  262:  	struct file_list *dirlist;
  263: -	char delbuf[MAXPATHLEN];
  264: -	int dlen, i;
  265: +	char *p, delbuf[MAXPATHLEN];
  266: +	unsigned remainder;
  267: +	int dlen, i, restore_dot = 0;
  268:  
  269:  	if (!fbuf) {
  270:  		change_local_filter_dir(NULL, 0, 0);
  271: @@ -290,17 +298,22 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
  272:  		maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
  273:  
  274:  	if (io_error & IOERR_GENERAL && !ignore_errors) {
  275: -		if (already_warned)
  276: +		if (!already_warned) {
  277: +			rprintf(FINFO,
  278: +			    "IO error encountered -- skipping file deletion\n");
  279: +			already_warned = 1;
  280: +		}
  281: +		if (!detect_renamed)
  282:  			return;
  283: -		rprintf(FINFO,
  284: -			"IO error encountered -- skipping file deletion\n");
  285: -		already_warned = 1;
  286: -		return;
  287: +		del_flags |= DEL_NO_DELETIONS;
  288:  	}
  289:  
  290:  	dlen = strlen(fbuf);
  291:  	change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
  292:  
  293: +	if (detect_renamed)
  294: +		unexplored_dirs--;
  295: +
  296:  	if (one_file_system) {
  297:  		if (!dev_tbl)
  298:  			dev_tbl = hashtable_create(16, HT_KEY64);
  299: @@ -316,6 +329,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
  300:  
  301:  	dirlist = get_dirlist(fbuf, dlen, 0);
  302:  
  303: +	p = fbuf + dlen;
  304: +	if (dlen == 1 && *fbuf == '.') {
  305: +		restore_dot = 1;
  306: +		p = fbuf;
  307: +	} else if (dlen != 1 || *fbuf != '/')
  308: +		*p++ = '/';
  309: +	remainder = MAXPATHLEN - (p - fbuf);
  310: +
  311:  	/* If an item in dirlist is not found in flist, delete it
  312:  	 * from the filesystem. */
  313:  	for (i = dirlist->used; i--; ) {
  314: @@ -328,6 +349,10 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
  315:  					f_name(fp, NULL));
  316:  			continue;
  317:  		}
  318: +		if (detect_renamed && S_ISREG(fp->mode)) {
  319: +			strlcpy(p, fp->basename, remainder);
  320: +			look_for_rename(fp, fbuf);
  321: +		}
  322:  		/* Here we want to match regardless of file type.  Replacement
  323:  		 * of a file with one of another type is handled separately by
  324:  		 * a delete_item call with a DEL_MAKE_ROOM flag. */
  325: @@ -336,14 +361,19 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
  326:  			if (!(fp->mode & S_IWUSR) && !am_root && fp->flags & FLAG_OWNED_BY_US)
  327:  				flags |= DEL_NO_UID_WRITE;
  328:  			f_name(fp, delbuf);
  329: -			if (delete_during == 2) {
  330: -				if (!remember_delete(fp, delbuf, flags))
  331: +			if (delete_during == 2 && !(del_flags & DEL_NO_DELETIONS)) {
  332: +				if (!remember_delete(fp, delbuf, del_flags | flags))
  333:  					break;
  334:  			} else
  335: -				delete_item(delbuf, fp->mode, flags);
  336: -		}
  337: +				delete_item(delbuf, fp->mode, del_flags | flags);
  338: +		} else if (detect_renamed && S_ISDIR(fp->mode))
  339: +			unexplored_dirs++;
  340:  	}
  341:  
  342: +	if (restore_dot)
  343: +		fbuf[0] = '.';
  344: +	fbuf[dlen] = '\0';
  345: +
  346:  	flist_free(dirlist);
  347:  }
  348:  
  349: @@ -379,14 +409,125 @@ static void do_delete_pass(void)
  350:  		 || !S_ISDIR(st.st_mode))
  351:  			continue;
  352:  
  353: -		delete_in_dir(fbuf, file, &st.st_dev);
  354: +		delete_in_dir(fbuf, file, &st.st_dev, 0);
  355:  	}
  356: -	delete_in_dir(NULL, NULL, &dev_zero);
  357: +	delete_in_dir(NULL, NULL, &dev_zero, 0);
  358:  
  359:  	if (INFO_GTE(FLIST, 2) && !am_server)
  360:  		rprintf(FINFO, "                    \r");
  361:  }
  362:  
  363: +/* Search for a regular file that matches either (1) the size & modified
  364: + * time (plus the basename, if possible) or (2) the size & checksum.  If
  365: + * we find an exact match down to the dirname, return -1 because we found
  366: + * an up-to-date file in the transfer, not a renamed file. */
  367: +static int fattr_find(struct file_struct *f, char *fname)
  368: +{
  369: +	int low = the_fattr_list.low, high = the_fattr_list.high;
  370: +	int mid, ok_match = -1, good_match = -1;
  371: +	struct file_struct *fmid;
  372: +	int diff;
  373: +
  374: +	while (low <= high) {
  375: +		mid = (low + high) / 2;
  376: +		fmid = the_fattr_list.files[mid];
  377: +		if (F_LENGTH(fmid) != F_LENGTH(f)) {
  378: +			if (F_LENGTH(fmid) < F_LENGTH(f))
  379: +				low = mid + 1;
  380: +			else
  381: +				high = mid - 1;
  382: +			continue;
  383: +		}
  384: +		if (always_checksum) {
  385: +			/* We use the FLAG_FILE_SENT flag to indicate when we
  386: +			 * have computed the checksum for an entry. */
  387: +			if (!(f->flags & FLAG_FILE_SENT)) {
  388: +				STRUCT_STAT st;
  389: +				if (fmid->modtime == f->modtime
  390: +				 && f_name_cmp(fmid, f) == 0)
  391: +					return -1; /* assume we can't help */
  392: +				st.st_size = F_LENGTH(f);
  393: +				st.st_mtime = f->modtime;
  394: +				file_checksum(fname, &st, F_SUM(f));
  395: +				f->flags |= FLAG_FILE_SENT;
  396: +			}
  397: +			diff = u_memcmp(F_SUM(fmid), F_SUM(f), flist_csum_len);
  398: +			if (diff) {
  399: +				if (diff < 0)
  400: +					low = mid + 1;
  401: +				else
  402: +					high = mid - 1;
  403: +				continue;
  404: +			}
  405: +		} else {
  406: +			if (fmid->modtime != f->modtime) {
  407: +				if (fmid->modtime < f->modtime)
  408: +					low = mid + 1;
  409: +				else
  410: +					high = mid - 1;
  411: +				continue;
  412: +			}
  413: +		}
  414: +		ok_match = mid;
  415: +		diff = u_strcmp(fmid->basename, f->basename);
  416: +		if (diff == 0) {
  417: +			good_match = mid;
  418: +			if (fmid->dirname == f->dirname)
  419: +				return -1; /* file is up-to-date */
  420: +			if (!fmid->dirname) {
  421: +				low = mid + 1;
  422: +				continue;
  423: +			}
  424: +			if (!f->dirname) {
  425: +				high = mid - 1;
  426: +				continue;
  427: +			}
  428: +			diff = u_strcmp(fmid->dirname, f->dirname);
  429: +			if (diff == 0)
  430: +				return -1; /* file is up-to-date */
  431: +		}
  432: +		if (diff < 0)
  433: +			low = mid + 1;
  434: +		else
  435: +			high = mid - 1;
  436: +	}
  437: +
  438: +	return good_match >= 0 ? good_match : ok_match;
  439: +}
  440: +
  441: +void look_for_rename(struct file_struct *file, char *fname)
  442: +{
  443: +	struct file_struct *fp;
  444: +	char *partialptr, *fn;
  445: +	STRUCT_STAT st;
  446: +	int ndx;
  447: +
  448: +	if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
  449: +		return;
  450: +
  451: +	fp = the_fattr_list.files[ndx];
  452: +	fn = f_name(fp, NULL);
  453: +	/* We don't provide an alternate-basis file if there is a basis file. */
  454: +	if (link_stat(fn, &st, 0) == 0)
  455: +		return;
  456: +
  457: +	if (!dry_run) {
  458: +		if ((partialptr = partial_dir_fname(fn)) == NULL
  459: +		 || !handle_partial_dir(partialptr, PDIR_CREATE))
  460: +			return;
  461: +		/* We only use the file if we can hard-link it into our tmp dir. */
  462: +		if (link(fname, partialptr) != 0) {
  463: +			if (errno != EEXIST)
  464: +				handle_partial_dir(partialptr, PDIR_DELETE);
  465: +			return;
  466: +		}
  467: +	}
  468: +
  469: +	/* I think this falls into the -vv category with "%s is uptodate", etc. */
  470: +	if (INFO_GTE(MISC, 2))
  471: +		rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
  472: +}
  473: +
  474:  static inline int mtime_differs(STRUCT_STAT *stp, struct file_struct *file)
  475:  {
  476:  #ifdef ST_MTIME_NSEC
  477: @@ -1187,6 +1328,7 @@ static void list_file_entry(struct file_struct *f)
  478:  	}
  479:  }
  480:  
  481: +static struct bitbag *delayed_bits = NULL;
  482:  static int phase = 0;
  483:  static int dflt_perms;
  484:  
  485: @@ -1323,7 +1465,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
  486:  			 && do_stat(dn, &sx.st) < 0) {
  487:  				if (dry_run)
  488:  					goto parent_is_dry_missing;
  489: -				if (make_path(fname, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0) {
  490: +				if (make_path(fname, ACCESSPERMS, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0) {
  491:  					rsyserr(FERROR_XFER, errno,
  492:  						"recv_generator: mkdir %s failed",
  493:  						full_fname(dn));
  494: @@ -1459,7 +1601,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
  495:  		}
  496:  		if (real_ret != 0 && do_mkdir(fname,file->mode|added_perms) < 0 && errno != EEXIST) {
  497:  			if (!relative_paths || errno != ENOENT
  498: -			 || make_path(fname, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0
  499: +			 || make_path(fname, ACCESSPERMS, MKP_DROP_NAME | MKP_SKIP_SLASH) < 0
  500:  			 || (do_mkdir(fname, file->mode|added_perms) < 0 && errno != EEXIST)) {
  501:  				rsyserr(FERROR_XFER, errno,
  502:  					"recv_generator: mkdir %s failed",
  503: @@ -1507,9 +1649,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
  504:  		}
  505:  		else if (delete_during && f_out != -1 && !phase
  506:  		    && !(file->flags & FLAG_MISSING_DIR)) {
  507: -			if (file->flags & FLAG_CONTENT_DIR)
  508: -				delete_in_dir(fname, file, &real_sx.st.st_dev);
  509: -			else
  510: +			if (file->flags & FLAG_CONTENT_DIR) {
  511: +				if (detect_renamed && real_ret != 0)
  512: +					unexplored_dirs++;
  513: +				delete_in_dir(fname, file, &real_sx.st.st_dev,
  514: +					      delete_during < 0 ? DEL_NO_DELETIONS : 0);
  515: +			} else
  516:  				change_local_filter_dir(fname, strlen(fname), F_DEPTH(file));
  517:  		}
  518:  		prior_dir_file = file;
  519: @@ -1786,8 +1931,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
  520:  			goto cleanup;
  521:  		}
  522:  #endif
  523: -		if (stat_errno == ENOENT)
  524: +		if (stat_errno == ENOENT) {
  525: +			if (detect_renamed && unexplored_dirs > 0
  526: +			 && F_LENGTH(file)) {
  527: +				bitbag_set_bit(delayed_bits, ndx);
  528: +				return;
  529: +			}
  530:  			goto notify_others;
  531: +		}
  532:  		rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s",
  533:  			full_fname(fname));
  534:  		goto cleanup;
  535: @@ -2251,6 +2402,12 @@ void generate_files(int f_out, const char *local_name)
  536:  	if (DEBUG_GTE(GENR, 1))
  537:  		rprintf(FINFO, "generator starting pid=%d\n", (int)getpid());
  538:  
  539: +	if (detect_renamed) {
  540: +		delayed_bits = bitbag_create(cur_flist->used);
  541: +		if (!delete_before && !delete_during)
  542: +			delete_during = -1;
  543: +	}
  544: +
  545:  	if (delete_before && !solo_file && cur_flist->used > 0)
  546:  		do_delete_pass();
  547:  	if (delete_during == 2) {
  548: @@ -2259,7 +2416,7 @@ void generate_files(int f_out, const char *local_name)
  549:  	}
  550:  	info_levels[INFO_FLIST] = info_levels[INFO_PROGRESS] = 0;
  551:  
  552: -	if (append_mode > 0 || whole_file < 0)
  553: +	if (append_mode > 0 || detect_renamed || whole_file < 0)
  554:  		whole_file = 0;
  555:  	if (DEBUG_GTE(FLIST, 1)) {
  556:  		rprintf(FINFO, "delta-transmission %s\n",
  557: @@ -2295,7 +2452,7 @@ void generate_files(int f_out, const char *local_name)
  558:  						dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
  559:  					} else
  560:  						dirdev = MAKEDEV(0, 0);
  561: -					delete_in_dir(fbuf, fp, &dirdev);
  562: +					delete_in_dir(fbuf, fp, &dirdev, 0);
  563:  				} else
  564:  					change_local_filter_dir(fbuf, strlen(fbuf), F_DEPTH(fp));
  565:  			}
  566: @@ -2342,7 +2499,21 @@ void generate_files(int f_out, const char *local_name)
  567:  	} while ((cur_flist = cur_flist->next) != NULL);
  568:  
  569:  	if (delete_during)
  570: -		delete_in_dir(NULL, NULL, &dev_zero);
  571: +		delete_in_dir(NULL, NULL, &dev_zero, 0);
  572: +	if (detect_renamed) {
  573: +		if (delete_during < 0)
  574: +			delete_during = 0;
  575: +		detect_renamed = 0;
  576: +
  577: +		for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
  578: +			struct file_struct *file = cur_flist->files[i];
  579: +			if (local_name)
  580: +				strlcpy(fbuf, local_name, sizeof fbuf);
  581: +			else
  582: +				f_name(file, fbuf);
  583: +			recv_generator(fbuf, file, i, itemizing, code, f_out);
  584: +		}
  585: +	}
  586:  	phase++;
  587:  	if (DEBUG_GTE(GENR, 1))
  588:  		rprintf(FINFO, "generate_files phase=%d\n", phase);
  589: diff --git a/main.c b/main.c
  590: --- a/main.c
  591: +++ b/main.c
  592: @@ -721,7 +721,7 @@ static char *get_local_name(struct file_list *flist, char *dest_path)
  593:  	trailing_slash = cp && !cp[1];
  594:  
  595:  	if (mkpath_dest_arg && statret < 0 && (cp || file_total > 1)) {
  596: -		int ret = make_path(dest_path, file_total > 1 && !trailing_slash ? 0 : MKP_DROP_NAME);
  597: +		int ret = make_path(dest_path, ACCESSPERMS, file_total > 1 && !trailing_slash ? 0 : MKP_DROP_NAME);
  598:  		if (ret < 0)
  599:  			goto mkdir_error;
  600:  		if (INFO_GTE(NAME, 1)) {
  601: diff --git a/options.c b/options.c
  602: --- a/options.c
  603: +++ b/options.c
  604: @@ -84,6 +84,7 @@ int am_server = 0;
  605:  int am_sender = 0;
  606:  int am_starting_up = 1;
  607:  int relative_paths = -1;
  608: +int detect_renamed = 0;
  609:  int implied_dirs = 1;
  610:  int missing_args = 0; /* 0 = FERROR_XFER, 1 = ignore, 2 = delete */
  611:  int numeric_ids = 0;
  612: @@ -733,6 +734,7 @@ static struct poptOption long_options[] = {
  613:    {"compare-dest",     0,  POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
  614:    {"copy-dest",        0,  POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
  615:    {"link-dest",        0,  POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
  616: +  {"detect-renamed",   0,  POPT_ARG_NONE,   &detect_renamed, 0, 0, 0 },
  617:    {"fuzzy",           'y', POPT_ARG_NONE,   0, 'y', 0, 0 },
  618:    {"no-fuzzy",         0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
  619:    {"no-y",             0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
  620: @@ -2346,7 +2348,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
  621:  		inplace = 1;
  622:  	}
  623:  
  624: -	if (delay_updates && !partial_dir)
  625: +	if ((delay_updates || detect_renamed) && !partial_dir)
  626:  		partial_dir = tmp_partialdir;
  627:  
  628:  	if (inplace) {
  629: @@ -2355,6 +2357,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
  630:  			snprintf(err_buf, sizeof err_buf,
  631:  				 "--%s cannot be used with --%s\n",
  632:  				 append_mode ? "append" : "inplace",
  633: +				 detect_renamed ? "detect-renamed" :
  634:  				 delay_updates ? "delay-updates" : "partial-dir");
  635:  			return 0;
  636:  		}
  637: @@ -2760,6 +2763,8 @@ void server_options(char **args, int *argc_p)
  638:  			args[ac++] = "--super";
  639:  		if (size_only)
  640:  			args[ac++] = "--size-only";
  641: +		if (detect_renamed)
  642: +			args[ac++] = "--detect-renamed";
  643:  		if (do_stats)
  644:  			args[ac++] = "--stats";
  645:  	} else {
  646: diff --git a/receiver.c b/receiver.c
  647: --- a/receiver.c
  648: +++ b/receiver.c
  649: @@ -217,7 +217,7 @@ int open_tmpfile(char *fnametmp, const char *fname, struct file_struct *file)
  650:  	 * information should have been previously transferred, but that may
  651:  	 * not be the case with -R */
  652:  	if (fd == -1 && relative_paths && errno == ENOENT
  653: -	 && make_path(fnametmp, MKP_SKIP_SLASH | MKP_DROP_NAME) == 0) {
  654: +	 && make_path(fnametmp, ACCESSPERMS, MKP_SKIP_SLASH | MKP_DROP_NAME) == 0) {
  655:  		/* Get back to name with XXXXXX in it. */
  656:  		get_tmpname(fnametmp, fname, False);
  657:  		fd = do_mkstemp(fnametmp, (file->mode|added_perms) & INITACCESSPERMS);
  658: diff --git a/rsync.1.md b/rsync.1.md
  659: --- a/rsync.1.md
  660: +++ b/rsync.1.md
  661: @@ -421,6 +421,7 @@ detailed description below for a complete description.
  662:  --modify-window=NUM, -@  set the accuracy for mod-time comparisons
  663:  --temp-dir=DIR, -T       create temporary files in directory DIR
  664:  --fuzzy, -y              find similar file for basis if no dest file
  665: +--detect-renamed         try to find renamed files to speed the xfer
  666:  --compare-dest=DIR       also compare destination files relative to DIR
  667:  --copy-dest=DIR          ... and include copies of unchanged files
  668:  --link-dest=DIR          hardlink to files in DIR when unchanged
  669: @@ -2247,6 +2248,22 @@ your home directory (remove the '=' for that).
  670:      fuzzy-match files, so either use `--delete-after` or specify some filename
  671:      exclusions if you need to prevent this.
  672:  
  673: +0.  `--detect-renamed`
  674: +
  675: +    With this option, for each new source file (call it `src/S`), rsync looks
  676: +    for a file `dest/D` anywhere in the destination that passes the quick check
  677: +    with `src/S`.  If such a `dest/D` is found, rsync uses it as an alternate
  678: +    basis for transferring `S`.  The idea is that if `src/S` was renamed from
  679: +    `src/D` (as opposed to `src/S` passing the quick check with `dest/D` by
  680: +    coincidence), the delta-transfer algorithm will find that all the data
  681: +    matches between `src/S` and `dest/D`, and the transfer will be really fast.
  682: +
  683: +    By default, alternate-basis files are hard-linked into a directory named
  684: +    ".~tmp~" in each file's destination directory, but if you've specified the
  685: +    `--partial-dir` option, that directory will be used instead.  These
  686: +    otential alternate-basis files will be removed as the transfer progresses.
  687: +    This option conflicts with `--inplace` and `--append`.
  688: +
  689:  0.  `--compare-dest=DIR`
  690:  
  691:      This option instructs rsync to use _DIR_ on the destination machine as an
  692: diff --git a/rsync.h b/rsync.h
  693: --- a/rsync.h
  694: +++ b/rsync.h
  695: @@ -272,7 +272,7 @@ enum msgcode {
  696:  #define NDX_DEL_STATS -3
  697:  #define NDX_FLIST_OFFSET -101
  698:  
  699: -/* For calling delete_item() and delete_dir_contents(). */
  700: +/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
  701:  #define DEL_NO_UID_WRITE 	(1<<0) /* file/dir has our uid w/o write perm */
  702:  #define DEL_RECURSE		(1<<1) /* if dir, delete all contents */
  703:  #define DEL_DIR_IS_EMPTY	(1<<2) /* internal delete_FUNCTIONS use only */
  704: @@ -282,6 +282,7 @@ enum msgcode {
  705:  #define DEL_FOR_DEVICE		(1<<6) /* making room for a replacement device */
  706:  #define DEL_FOR_SPECIAL 	(1<<7) /* making room for a replacement special */
  707:  #define DEL_FOR_BACKUP	 	(1<<8) /* the delete is for a backup operation */
  708: +#define DEL_NO_DELETIONS	(1<<9) /* just check for renames w/o deleting */
  709:  
  710:  #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL)
  711:  
  712: diff --git a/util.c b/util.c
  713: --- a/util.c
  714: +++ b/util.c
  715: @@ -182,7 +182,7 @@ int set_times(const char *fname, STRUCT_STAT *stp)
  716:  /* Create any necessary directories in fname.  Any missing directories are
  717:   * created with default permissions.  Returns < 0 on error, or the number
  718:   * of directories created. */
  719: -int make_path(char *fname, int flags)
  720: +int make_path(char *fname, mode_t mode, int flags)
  721:  {
  722:  	char *end, *p;
  723:  	int ret = 0;
  724: @@ -213,7 +213,7 @@ int make_path(char *fname, int flags)
  725:  				else
  726:  					errno = ENOTDIR;
  727:  			}
  728: -		} else if (do_mkdir(fname, ACCESSPERMS) == 0) {
  729: +		} else if (do_mkdir(fname, mode) == 0) {
  730:  			ret++;
  731:  			break;
  732:  		}
  733: @@ -252,7 +252,7 @@ int make_path(char *fname, int flags)
  734:  		p += strlen(p);
  735:  		if (ret < 0) /* Skip mkdir on error, but keep restoring the path. */
  736:  			continue;
  737: -		if (do_mkdir(fname, ACCESSPERMS) < 0)
  738: +		if (do_mkdir(fname, mode) < 0)
  739:  			ret = -ret - 1;
  740:  		else
  741:  			ret++;
  742: @@ -1162,6 +1162,32 @@ char *normalize_path(char *path, BOOL force_newbuf, unsigned int *len_ptr)
  743:  	return path;
  744:  }
  745:  
  746: +/* We need to supply our own strcmp function for file list comparisons
  747: + * to ensure that signed/unsigned usage is consistent between machines. */
  748: +int u_strcmp(const char *p1, const char *p2)
  749: +{
  750: +        for ( ; *p1; p1++, p2++) {
  751: +		if (*p1 != *p2)
  752: +			break;
  753: +	}
  754: +
  755: +	return (int)*(uchar*)p1 - (int)*(uchar*)p2;
  756: +}
  757: +
  758: +/* We need a memcmp function compares unsigned-byte values. */
  759: +int u_memcmp(const void *p1, const void *p2, size_t len)
  760: +{
  761: +	const uchar *u1 = p1;
  762: +	const uchar *u2 = p2;
  763: +
  764: +	while (len--) {
  765: +		if (*u1 != *u2)
  766: +			return (int)*u1 - (int)*u2;
  767: +	}
  768: +
  769: +	return 0;
  770: +}
  771: +
  772:  /**
  773:   * Return a quoted string with the full pathname of the indicated filename.
  774:   * The string " (in MODNAME)" may also be appended.  The returned pointer
  775: @@ -1255,7 +1281,7 @@ int handle_partial_dir(const char *fname, int create)
  776:  			}
  777:  			statret = -1;
  778:  		}
  779: -		if (statret < 0 && do_mkdir(dir, 0700) < 0) {
  780: +		if (statret < 0 && make_path(dir, 0700, 0) < 0) {
  781:  			*fn = '/';
  782:  			return 0;
  783:  		}
  784: diff -Nurp a/rsync.1 b/rsync.1
  785: --- a/rsync.1
  786: +++ b/rsync.1
  787: @@ -497,6 +497,7 @@ detailed description below for a complet
  788:  --modify-window=NUM, -@  set the accuracy for mod-time comparisons
  789:  --temp-dir=DIR, -T       create temporary files in directory DIR
  790:  --fuzzy, -y              find similar file for basis if no dest file
  791: +--detect-renamed         try to find renamed files to speed the xfer
  792:  --compare-dest=DIR       also compare destination files relative to DIR
  793:  --copy-dest=DIR          ... and include copies of unchanged files
  794:  --link-dest=DIR          hardlink to files in DIR when unchanged
  795: @@ -2290,6 +2291,20 @@ alternate destination directories that a
  796:  Note that the use of the \fB\-\-delete\fP option might get rid of any potential
  797:  fuzzy-match files, so either use \fB\-\-delete-after\fP or specify some filename
  798:  exclusions if you need to prevent this.
  799: +.IP "\fB\-\-detect-renamed\fP"
  800: +With this option, for each new source file (call it \fBsrc/S\fP), rsync looks
  801: +for a file \fBdest/D\fP anywhere in the destination that passes the quick check
  802: +with \fBsrc/S\fP.  If such a \fBdest/D\fP is found, rsync uses it as an alternate
  803: +basis for transferring \fBS\fP.  The idea is that if \fBsrc/S\fP was renamed from
  804: +\fBsrc/D\fP (as opposed to \fBsrc/S\fP passing the quick check with \fBdest/D\fP by
  805: +coincidence), the delta-transfer algorithm will find that all the data
  806: +matches between \fBsrc/S\fP and \fBdest/D\fP, and the transfer will be really fast.
  807: +.IP
  808: +By default, alternate-basis files are hard-linked into a directory named
  809: +".~tmp~" in each file's destination directory, but if you've specified the
  810: +\fB\-\-partial-dir\fP option, that directory will be used instead.  These
  811: +otential alternate-basis files will be removed as the transfer progresses.
  812: +This option conflicts with \fB\-\-inplace\fP and \fB\-\-append\fP.
  813:  .IP "\fB\-\-compare-dest=DIR\fP"
  814:  This option instructs rsync to use \fIDIR\fP on the destination machine as an
  815:  additional hierarchy to compare destination files against doing transfers
  816: diff -Nurp a/rsync.1.html b/rsync.1.html
  817: --- a/rsync.1.html
  818: +++ b/rsync.1.html
  819: @@ -412,6 +412,7 @@ detailed description below for a complet
  820:  --modify-window=NUM, -@  set the accuracy for mod-time comparisons
  821:  --temp-dir=DIR, -T       create temporary files in directory DIR
  822:  --fuzzy, -y              find similar file for basis if no dest file
  823: +--detect-renamed         try to find renamed files to speed the xfer
  824:  --compare-dest=DIR       also compare destination files relative to DIR
  825:  --copy-dest=DIR          ... and include copies of unchanged files
  826:  --link-dest=DIR          hardlink to files in DIR when unchanged
  827: @@ -2135,6 +2136,21 @@ fuzzy-match files, so either use <code>-
  828:  exclusions if you need to prevent this.</p>
  829:  </dd>
  830:  
  831: +<dt><code>--detect-renamed</code></dt><dd>
  832: +<p>With this option, for each new source file (call it <code>src/S</code>), rsync looks
  833: +for a file <code>dest/D</code> anywhere in the destination that passes the quick check
  834: +with <code>src/S</code>.  If such a <code>dest/D</code> is found, rsync uses it as an alternate
  835: +basis for transferring <code>S</code>.  The idea is that if <code>src/S</code> was renamed from
  836: +<code>src/D</code> (as opposed to <code>src/S</code> passing the quick check with <code>dest/D</code> by
  837: +coincidence), the delta-transfer algorithm will find that all the data
  838: +matches between <code>src/S</code> and <code>dest/D</code>, and the transfer will be really fast.</p>
  839: +<p>By default, alternate-basis files are hard-linked into a directory named
  840: +&quot;.~tmp~&quot; in each file's destination directory, but if you've specified the
  841: +<code>--partial-dir</code> option, that directory will be used instead.  These
  842: +otential alternate-basis files will be removed as the transfer progresses.
  843: +This option conflicts with <code>--inplace</code> and <code>--append</code>.</p>
  844: +</dd>
  845: +
  846:  <dt><code>--compare-dest=DIR</code></dt><dd>
  847:  <p>This option instructs rsync to use <u>DIR</u> on the destination machine as an
  848:  additional hierarchy to compare destination files against doing transfers

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>