Optimize the --checksum option using externally created .rsyncsums files.
This adds a new option, --sumfiles=MODE, that allows you to use a cache of
checksums when performing a --checksum transfer. These checksum files
(.rsyncsums) must be created by some other process -- see the perl script,
rsyncsums, in the support dir for one way.
This option can be particularly helpful to a public mirror that wants to
pre-compute their .rsyncsums files, set the "checksum files = strict" option
in their daemon config file, and thus make it quite efficient for a client
rsync to make use of the --checksum option on their server.
To use this patch, run these commands for a successful build:
patch -p1 <patches/checksum-reading.diff
./configure (optional if already run)
make
based-on: e94bad1c156fc3910f24e2b3b71a81b0b0bdeb70
diff --git a/clientserver.c b/clientserver.c
--- a/clientserver.c
+++ b/clientserver.c
@@ -44,6 +44,8 @@ extern int numeric_ids;
extern int filesfrom_fd;
extern int remote_protocol;
extern int protocol_version;
+extern int always_checksum;
+extern int checksum_files;
extern int io_timeout;
extern int no_detach;
extern int write_batch;
@@ -1033,6 +1035,9 @@ static int rsync_module(int f_in, int f_out, int i, const char *addr, const char
} else if (am_root < 0) /* Treat --fake-super from client as --super. */
am_root = 2;
+ checksum_files = always_checksum ? lp_checksum_files(i)
+ : CSF_IGNORE_FILES;
+
if (filesfrom_fd == 0)
filesfrom_fd = f_in;
diff --git a/daemon-parm.txt b/daemon-parm.txt
--- a/daemon-parm.txt
+++ b/daemon-parm.txt
@@ -49,6 +49,7 @@ INTEGER max_connections 0
INTEGER max_verbosity 1
INTEGER timeout 0
+ENUM checksum_files CSF_IGNORE_FILES
ENUM syslog_facility LOG_DAEMON
BOOL fake_super False
diff --git a/flist.c b/flist.c
--- a/flist.c
+++ b/flist.c
@@ -22,6 +22,7 @@
#include "rsync.h"
#include "ifuncs.h"
+#include "itypes.h"
#include "rounding.h"
#include "inums.h"
#include "io.h"
@@ -33,6 +34,7 @@ extern int am_sender;
extern int am_generator;
extern int inc_recurse;
extern int always_checksum;
+extern int basis_dir_cnt;
extern int checksum_type;
extern int module_id;
extern int ignore_errors;
@@ -62,6 +64,7 @@ extern int implied_dirs;
extern int ignore_perishable;
extern int non_perishable_cnt;
extern int prune_empty_dirs;
+extern int checksum_files;
extern int copy_links;
extern int copy_unsafe_links;
extern int protocol_version;
@@ -73,6 +76,7 @@ extern int sender_symlink_iconv;
extern int output_needs_newline;
extern int sender_keeps_checksum;
extern int unsort_ndx;
+extern char *basis_dir[];
extern uid_t our_uid;
extern struct stats stats;
extern char *filesfrom_host;
@@ -90,6 +94,20 @@ extern int filesfrom_convert;
extern iconv_t ic_send, ic_recv;
#endif
+#ifdef HAVE_UTIMENSAT
+#ifdef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
+#define ST_MTIME_NSEC st_mtim.tv_nsec
+#elif defined(HAVE_STRUCT_STAT_ST_MTIMENSEC)
+#define ST_MTIME_NSEC st_mtimensec
+#endif
+#endif
+
+#define RSYNCSUMS_FILE ".rsyncsums"
+#define RSYNCSUMS_LEN (sizeof RSYNCSUMS_FILE-1)
+
+#define CLEAN_STRIP_ROOT (1<<0)
+#define CLEAN_KEEP_LAST (1<<1)
+
#define PTR_SIZE (sizeof (struct file_struct *))
int io_error;
@@ -134,8 +152,12 @@ static char empty_sum[MAX_DIGEST_LEN];
static int flist_count_offset; /* for --delete --progress */
static int show_filelist_progress;
+static struct csum_cache {
+ struct file_list *flist;
+} *csum_cache = NULL;
+
static struct file_list *flist_new(int flags, const char *msg);
-static void flist_sort_and_clean(struct file_list *flist, int strip_root);
+static void flist_sort_and_clean(struct file_list *flist, int flags);
static void output_flist(struct file_list *flist);
void init_flist(void)
@@ -324,6 +346,235 @@ static void flist_done_allocating(struct file_list *flist)
flist->pool_boundary = ptr;
}
+void reset_checksum_cache()
+{
+ int slot, slots = am_sender ? 1 : basis_dir_cnt + 1;
+
+ if (!csum_cache)
+ csum_cache = new_array0(struct csum_cache, slots);
+
+ for (slot = 0; slot < slots; slot++) {
+ struct file_list *flist = csum_cache[slot].flist;
+
+ if (flist) {
+ /* Reset the pool memory and empty the file-list array. */
+ pool_free_old(flist->file_pool,
+ pool_boundary(flist->file_pool, 0));
+ flist->used = 0;
+ } else
+ flist = csum_cache[slot].flist = flist_new(FLIST_TEMP, "reset_checksum_cache");
+
+ flist->low = 0;
+ flist->high = -1;
+ flist->next = NULL;
+ }
+}
+
+/* The basename_len count is the length of the basename + 1 for the '\0'. */
+static int add_checksum(struct file_list *flist, const char *dirname,
+ const char *basename, int basename_len, OFF_T file_length,
+ time_t mtime, uint32 ctime, uint32 inode,
+ const char *sum)
+{
+ struct file_struct *file;
+ int alloc_len, extra_len;
+ char *bp;
+
+ if (basename_len == RSYNCSUMS_LEN+1 && *basename == '.'
+ && strcmp(basename, RSYNCSUMS_FILE) == 0)
+ return 0;
+
+ /* "2" is for a 32-bit ctime num and an 32-bit inode num. */
+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT + 2)
+ * EXTRA_LEN;
+#if EXTRA_ROUNDING > 0
+ if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
+ extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
+#endif
+ alloc_len = FILE_STRUCT_LEN + extra_len + basename_len;
+ bp = pool_alloc(flist->file_pool, alloc_len, "add_checksum");
+
+ memset(bp, 0, extra_len + FILE_STRUCT_LEN);
+ bp += extra_len;
+ file = (struct file_struct *)bp;
+ bp += FILE_STRUCT_LEN;
+
+ memcpy(bp, basename, basename_len);
+
+ file->mode = S_IFREG;
+ file->modtime = mtime;
+ file->len32 = (uint32)file_length;
+ if (file_length > 0xFFFFFFFFu) {
+ file->flags |= FLAG_LENGTH64;
+ OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
+ }
+ file->dirname = dirname;
+ F_CTIME(file) = ctime;
+ F_INODE(file) = inode;
+ bp = F_SUM(file);
+ memcpy(bp, sum, flist_csum_len);
+
+ flist_expand(flist, 1);
+ flist->files[flist->used++] = file;
+
+ flist->sorted = flist->files;
+
+ return 1;
+}
+
+/* The "dirname" arg's data must remain unchanged during the lifespan of
+ * the created csum_cache[].flist object because we use it directly. */
+static void read_checksums(int slot, struct file_list *flist, const char *dirname)
+{
+ char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
+ FILE *fp;
+ char *cp;
+ int len, i;
+ time_t mtime;
+ OFF_T file_length;
+ uint32 ctime, inode;
+ int dlen = dirname ? strlcpy(fbuf, dirname, sizeof fbuf) : 0;
+
+ if (dlen >= (int)(sizeof fbuf - 1 - RSYNCSUMS_LEN))
+ return;
+ if (dlen)
+ fbuf[dlen++] = '/';
+ else
+ dirname = NULL;
+ strlcpy(fbuf+dlen, RSYNCSUMS_FILE, sizeof fbuf - dlen);
+ if (slot) {
+ pathjoin(line, sizeof line, basis_dir[slot-1], fbuf);
+ cp = line;
+ } else
+ cp = fbuf;
+ if (!(fp = fopen(cp, "r")))
+ return;
+
+ while (fgets(line, sizeof line, fp)) {
+ cp = line;
+ if (checksum_type == 5) {
+ char *alt_sum = cp;
+ if (*cp == '=')
+ while (*++cp == '=') {}
+ else
+ while (isHexDigit(cp)) cp++;
+ if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
+ break;
+ while (*++cp == ' ') {}
+ }
+
+ if (*cp == '=') {
+ continue;
+ } else {
+ for (i = 0; i < flist_csum_len*2; i++, cp++) {
+ int x;
+ if (isHexDigit(cp)) {
+ if (isDigit(cp))
+ x = *cp - '0';
+ else
+ x = (*cp & 0xF) + 9;
+ } else {
+ cp = "";
+ break;
+ }
+ if (i & 1)
+ sum[i/2] |= x;
+ else
+ sum[i/2] = x << 4;
+ }
+ }
+ if (*cp != ' ')
+ break;
+ while (*++cp == ' ') {}
+
+ if (checksum_type != 5) {
+ char *alt_sum = cp;
+ if (*cp == '=')
+ while (*++cp == '=') {}
+ else
+ while (isHexDigit(cp)) cp++;
+ if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
+ break;
+ while (*++cp == ' ') {}
+ }
+
+ file_length = 0;
+ while (isDigit(cp))
+ file_length = file_length * 10 + *cp++ - '0';
+ if (*cp != ' ')
+ break;
+ while (*++cp == ' ') {}
+
+ mtime = 0;
+ while (isDigit(cp))
+ mtime = mtime * 10 + *cp++ - '0';
+ if (*cp != ' ')
+ break;
+ while (*++cp == ' ') {}
+
+ ctime = 0;
+ while (isDigit(cp))
+ ctime = ctime * 10 + *cp++ - '0';
+ if (*cp != ' ')
+ break;
+ while (*++cp == ' ') {}
+
+ inode = 0;
+ while (isDigit(cp))
+ inode = inode * 10 + *cp++ - '0';
+ if (*cp != ' ')
+ break;
+ while (*++cp == ' ') {}
+
+ len = strlen(cp);
+ while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
+ len--;
+ if (!len)
+ break;
+ cp[len++] = '\0'; /* len now counts the null */
+ if (strchr(cp, '/'))
+ break;
+ if (len > MAXPATHLEN)
+ continue;
+
+ strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
+
+ add_checksum(flist, dirname, cp, len, file_length,
+ mtime, ctime, inode,
+ sum);
+ }
+ fclose(fp);
+
+ flist_sort_and_clean(flist, CLEAN_KEEP_LAST);
+}
+
+void get_cached_checksum(int slot, const char *fname, struct file_struct *file,
+ STRUCT_STAT *stp, char *sum_buf)
+{
+ struct file_list *flist = csum_cache[slot].flist;
+ int j;
+
+ if (!flist->next) {
+ flist->next = cur_flist; /* next points from checksum flist to file flist */
+ read_checksums(slot, flist, file->dirname);
+ }
+
+ if ((j = flist_find(flist, file)) >= 0) {
+ struct file_struct *fp = flist->sorted[j];
+
+ if (F_LENGTH(fp) == stp->st_size
+ && fp->modtime == stp->st_mtime
+ && (checksum_files & CSF_LAX
+ || (F_CTIME(fp) == (uint32)stp->st_ctime
+ && F_INODE(fp) == (uint32)stp->st_ino))) {
+ memcpy(sum_buf, F_SUM(fp), MAX_DIGEST_LEN);
+ return;
+ }
+ }
+
+ file_checksum(fname, stp, sum_buf);
+}
+
/* Call this with EITHER (1) "file, NULL, 0" to chdir() to the file's
* F_PATHNAME(), or (2) "NULL, dir, dirlen" to chdir() to the supplied dir,
* with dir == NULL taken to be the starting directory, and dirlen < 0
@@ -1201,7 +1452,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
STRUCT_STAT *stp, int flags, int filter_level)
{
static char *lastdir;
- static int lastdir_len = -1;
+ static int lastdir_len = -2;
struct file_struct *file;
char thisname[MAXPATHLEN];
char linkname[MAXPATHLEN];
@@ -1347,9 +1598,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
memcpy(lastdir, thisname, len);
lastdir[len] = '\0';
lastdir_len = len;
+ if (checksum_files && am_sender && flist)
+ reset_checksum_cache();
}
- } else
+ } else {
basename = thisname;
+ if (checksum_files && am_sender && flist && lastdir_len == -2) {
+ lastdir_len = -1;
+ reset_checksum_cache();
+ }
+ }
basename_len = strlen(basename) + 1; /* count the '\0' */
#ifdef SUPPORT_LINKS
@@ -1367,11 +1625,8 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
extra_len += EXTRA_LEN;
#endif
- if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
- file_checksum(thisname, &st, tmp_sum);
- if (sender_keeps_checksum)
- extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
- }
+ if (sender_keeps_checksum && S_ISREG(st.st_mode))
+ extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
#if EXTRA_ROUNDING > 0
if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
@@ -1460,8 +1715,14 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
return NULL;
}
- if (sender_keeps_checksum && S_ISREG(st.st_mode))
- memcpy(F_SUM(file), tmp_sum, flist_csum_len);
+ if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
+ if (flist && checksum_files)
+ get_cached_checksum(0, thisname, file, &st, tmp_sum);
+ else
+ file_checksum(thisname, &st, tmp_sum);
+ if (sender_keeps_checksum)
+ memcpy(F_SUM(file), tmp_sum, flist_csum_len);
+ }
if (unsort_ndx)
F_NDX(file) = stats.num_dirs;
@@ -2673,7 +2934,7 @@ struct file_list *recv_file_list(int f, int dir_ndx)
/* The --relative option sends paths with a leading slash, so we need
* to specify the strip_root option here. We rejected leading slashes
* for a non-relative transfer in recv_file_entry(). */
- flist_sort_and_clean(flist, relative_paths);
+ flist_sort_and_clean(flist, relative_paths ? CLEAN_STRIP_ROOT : 0);
if (protocol_version < 30) {
/* Recv the io_error flag */
@@ -2918,7 +3179,7 @@ void flist_free(struct file_list *flist)
/* This routine ensures we don't have any duplicate names in our file list.
* duplicate names can cause corruption because of the pipelining. */
-static void flist_sort_and_clean(struct file_list *flist, int strip_root)
+static void flist_sort_and_clean(struct file_list *flist, int flags)
{
char fbuf[MAXPATHLEN];
int i, prev_i;
@@ -2969,7 +3230,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
/* If one is a dir and the other is not, we want to
* keep the dir because it might have contents in the
* list. Otherwise keep the first one. */
- if (S_ISDIR(file->mode)) {
+ if (S_ISDIR(file->mode) || flags & CLEAN_KEEP_LAST) {
struct file_struct *fp = flist->sorted[j];
if (!S_ISDIR(fp->mode))
keep = i, drop = j;
@@ -2985,8 +3246,8 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
} else
keep = j, drop = i;
- if (!am_sender) {
- if (DEBUG_GTE(DUP, 1)) {
+ if (!am_sender || flags & CLEAN_KEEP_LAST) {
+ if (DEBUG_GTE(DUP, 1) && !(flags & CLEAN_KEEP_LAST)) {
rprintf(FINFO,
"removing duplicate name %s from file list (%d)\n",
f_name(file, fbuf), drop + flist->ndx_start);
@@ -3008,7 +3269,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
}
flist->high = prev_i;
- if (strip_root) {
+ if (flags & CLEAN_STRIP_ROOT) {
/* We need to strip off the leading slashes for relative
* paths, but this must be done _after_ the sorting phase. */
for (i = flist->low; i <= flist->high; i++) {
diff --git a/generator.c b/generator.c
--- a/generator.c
+++ b/generator.c
@@ -52,6 +52,7 @@ extern int delete_after;
extern int missing_args;
extern int msgdone_cnt;
extern int ignore_errors;
+extern int checksum_files;
extern int remove_source_files;
extern int delay_updates;
extern int update_only;
@@ -601,7 +602,7 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre
/* Perform our quick-check heuristic for determining if a file is unchanged. */
-int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
+int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st, int slot)
{
if (st->st_size != F_LENGTH(file))
return 0;
@@ -610,7 +611,10 @@ int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
of the file time to determine whether to sync */
if (always_checksum > 0 && S_ISREG(st->st_mode)) {
char sum[MAX_DIGEST_LEN];
- file_checksum(fn, st, sum);
+ if (checksum_files && slot >= 0)
+ get_cached_checksum(slot, fn, file, st, sum);
+ else
+ file_checksum(fn, st, sum);
return memcmp(sum, F_SUM(file), flist_csum_len) == 0;
}
@@ -907,7 +911,7 @@ static int try_dests_reg(struct file_struct *file, char *fname, int ndx,
best_match = j;
match_level = 1;
}
- if (!unchanged_file(cmpbuf, file, &sxp->st))
+ if (!unchanged_file(cmpbuf, file, &sxp->st, j+1))
continue;
if (match_level == 1) {
best_match = j;
@@ -1220,7 +1224,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
* --ignore-non-existing, daemon exclude, or mkdir failure. */
static struct file_struct *skip_dir = NULL;
static struct file_list *fuzzy_dirlist[MAX_BASIS_DIRS+1];
- static int need_fuzzy_dirlist = 0;
+ static int need_new_dirscan = 0;
struct file_struct *fuzzy_file = NULL;
int fd = -1, f_copy = -1;
stat_x sx, real_sx;
@@ -1337,8 +1341,9 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
fuzzy_dirlist[i] = NULL;
}
}
- need_fuzzy_dirlist = 1;
- }
+ need_new_dirscan = 1;
+ } else if (checksum_files)
+ need_new_dirscan = 1;
#ifdef SUPPORT_ACLS
if (!preserve_perms)
dflt_perms = default_perms_for_dir(dn);
@@ -1346,6 +1351,24 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
}
parent_dirname = dn;
+ if (need_new_dirscan && S_ISREG(file->mode)) {
+ int i;
+ strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
+ for (i = 0; i < fuzzy_basis; i++) {
+ if (i && pathjoin(fnamecmpbuf, MAXPATHLEN, basis_dir[i-1], dn) >= MAXPATHLEN)
+ continue;
+ fuzzy_dirlist[i] = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES | GDL_PERHAPS_DIR);
+ if (fuzzy_dirlist[i] && fuzzy_dirlist[i]->used == 0) {
+ flist_free(fuzzy_dirlist[i]);
+ fuzzy_dirlist[i] = NULL;
+ }
+ }
+ if (checksum_files) {
+ reset_checksum_cache();
+ }
+ need_new_dirscan = 0;
+ }
+
statret = link_stat(fname, &sx.st, keep_dirlinks && is_dir);
stat_errno = errno;
}
@@ -1749,22 +1772,6 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
partialptr = NULL;
if (statret != 0 && fuzzy_basis) {
- if (need_fuzzy_dirlist && S_ISREG(file->mode)) {
- const char *dn = file->dirname ? file->dirname : ".";
- int i;
- strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
- for (i = 0; i < fuzzy_basis; i++) {
- if (i && pathjoin(fnamecmpbuf, MAXPATHLEN, basis_dir[i-1], dn) >= MAXPATHLEN)
- continue;
- fuzzy_dirlist[i] = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES | GDL_PERHAPS_DIR);
- if (fuzzy_dirlist[i] && fuzzy_dirlist[i]->used == 0) {
- flist_free(fuzzy_dirlist[i]);
- fuzzy_dirlist[i] = NULL;
- }
- }
- need_fuzzy_dirlist = 0;
- }
-
/* Sets fnamecmp_type to FNAMECMP_FUZZY or above. */
fuzzy_file = find_fuzzy(file, fuzzy_dirlist, &fnamecmp_type);
if (fuzzy_file) {
@@ -1797,7 +1804,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
;
else if (fnamecmp_type >= FNAMECMP_FUZZY)
;
- else if (unchanged_file(fnamecmp, file, &sx.st)) {
+ else if (unchanged_file(fnamecmp, file, &sx.st, fnamecmp_type == FNAMECMP_FNAME ? 0 : -1)) {
if (partialptr) {
do_unlink(partialptr);
handle_partial_dir(partialptr, PDIR_DELETE);
diff --git a/hlink.c b/hlink.c
--- a/hlink.c
+++ b/hlink.c
@@ -406,7 +406,7 @@ int hard_link_check(struct file_struct *file, int ndx, char *fname,
}
break;
}
- if (!unchanged_file(cmpbuf, file, &alt_sx.st))
+ if (!unchanged_file(cmpbuf, file, &alt_sx.st, j+1))
continue;
statret = 1;
if (unchanged_attrs(cmpbuf, file, &alt_sx))
diff --git a/loadparm.c b/loadparm.c
--- a/loadparm.c
+++ b/loadparm.c
@@ -162,6 +162,13 @@ static struct enum_list enum_syslog_facility[] = {
{ -1, NULL }
};
+static struct enum_list enum_checksum_files[] = {
+ { CSF_IGNORE_FILES, "none" },
+ { CSF_LAX_MODE, "lax" },
+ { CSF_STRICT_MODE, "strict" },
+ { -1, NULL }
+};
+
/* Expand %VAR% references. Any unknown vars or unrecognized
* syntax leaves the raw chars unchanged. */
static char *expand_vars(const char *str)
diff --git a/options.c b/options.c
--- a/options.c
+++ b/options.c
@@ -117,6 +117,7 @@ size_t bwlimit_writemax = 0;
int ignore_existing = 0;
int ignore_non_existing = 0;
int need_messages_from_generator = 0;
+int checksum_files = CSF_IGNORE_FILES;
int max_delete = INT_MIN;
OFF_T max_size = -1;
OFF_T min_size = -1;
@@ -573,7 +574,7 @@ enum {OPT_SERVER = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG, OPT_BLOCK_SIZE,
- OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, OPT_STDERR,
+ OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, OPT_STDERR, OPT_SUMFILES,
OPT_OLD_COMPRESS, OPT_NEW_COMPRESS, OPT_NO_COMPRESS,
OPT_STOP_AFTER, OPT_STOP_AT,
OPT_REFUSED_BASE = 9000};
@@ -729,6 +730,7 @@ static struct poptOption long_options[] = {
{"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
{"checksum-choice", 0, POPT_ARG_STRING, &checksum_choice, 0, 0, 0 },
{"cc", 0, POPT_ARG_STRING, &checksum_choice, 0, 0, 0 },
+ {"sumfiles", 0, POPT_ARG_STRING, 0, OPT_SUMFILES, 0, 0 },
{"block-size", 'B', POPT_ARG_STRING, 0, OPT_BLOCK_SIZE, 0, 0 },
{"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
{"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
@@ -1722,6 +1724,23 @@ int parse_arguments(int *argc_p, const char ***argv_p)
}
break;
+ case OPT_SUMFILES:
+ arg = poptGetOptArg(pc);
+ checksum_files = 0;
+ if (strcmp(arg, "lax") == 0)
+ checksum_files |= CSF_LAX_MODE;
+ else if (strcmp(arg, "strict") == 0)
+ checksum_files |= CSF_STRICT_MODE;
+ else if (strcmp(arg, "none") == 0)
+ checksum_files = CSF_IGNORE_FILES;
+ else {
+ snprintf(err_buf, sizeof err_buf,
+ "Invalid argument passed to --sumfiles (%s)\n",
+ arg);
+ return 0;
+ }
+ break;
+
case OPT_INFO:
arg = poptGetOptArg(pc);
parse_output_words(info_words, info_levels, arg, USER_PRIORITY);
@@ -2052,6 +2071,9 @@ int parse_arguments(int *argc_p, const char ***argv_p)
}
#endif
+ if (!always_checksum)
+ checksum_files = CSF_IGNORE_FILES;
+
if (write_batch && read_batch) {
snprintf(err_buf, sizeof err_buf,
"--write-batch and --read-batch can not be used together\n");
diff --git a/rsync.1.md b/rsync.1.md
--- a/rsync.1.md
+++ b/rsync.1.md
@@ -338,6 +338,7 @@ detailed description below for a complete description.
--quiet, -q suppress non-error messages
--no-motd suppress daemon-mode MOTD
--checksum, -c skip based on checksum, not mod-time & size
+--sumfiles=MODE use .rsyncsums to speedup --checksum mode
--archive, -a archive mode; equals -rlptgoD (no -H,-A,-X)
--no-OPTION turn off an implied OPTION (e.g. --no-D)
--recursive, -r recurse into directories
@@ -698,6 +699,8 @@ your home directory (remove the '=' for that).
file that has the same size as the corresponding sender's file: files with
either a changed size or a changed checksum are selected for transfer.
+ See also the `--sumfiles` option for a way to use cached checksum data.
+
Note that rsync always verifies that each _transferred_ file was correctly
reconstructed on the receiving side by checking a whole-file checksum that
is generated as the file is transferred, but that automatic
@@ -708,6 +711,38 @@ your home directory (remove the '=' for that).
can be overridden using either the `--checksum-choice` (`--cc`) option or an
environment variable that is discussed in that option's section.
+0. `--sumfiles=MODE`
+
+ This option tells rsync to make use of any cached checksum information it
+ finds in per-directory .rsyncsums files when the current transfer is using
+ the `--checksum` option. If the checksum data is up-to-date, it is used
+ instead of recomputing it, saving both disk I/O and CPU time. If the
+ checksum data is missing or outdated, the checksum is computed just as it
+ would be if `--sumfiles` was not specified.
+
+ The MODE value is either "lax", for relaxed checking (which compares size
+ and mtime), "strict" (which also compares ctime and inode), or "none" to
+ ignore any .rsyncsums files ("none" is the default). Rsync does not create
+ or update these files, but there is a perl script in the support directory
+ named "rsyncsums" that can be used for that.
+
+ This option has no effect unless `--checksum`, `-c` was also specified. It
+ also only affects the current side of the transfer, so if you want the
+ remote side to parse its own .rsyncsums files, specify the option via
+ `--remote-option` (`-M`) (e.g. "`-M--sumfiles=lax`").
+
+ To avoid transferring the system's checksum files, you can use an exclude
+ (e.g. `--exclude=.rsyncsums`). To make this easier to type, you can use a
+ popt alias. For instance, adding the following line in your ~/.popt file
+ defines a `--cs` option that enables lax checksum files and excludes the
+ checksum files:
+
+ > rsync alias --cs -c --sumfiles=lax -M--sumfiles=lax -f-_.rsyncsums
+
+ An rsync daemon does not allow the client to control this setting, so see
+ the "checksum files" daemon parameter for information on how to make a
+ daemon use cached checksum data.
+
0. `--archive`, `-a`
This is equivalent to `-rlptgoD`. It is a quick way of saying you want
diff --git a/rsync.h b/rsync.h
--- a/rsync.h
+++ b/rsync.h
@@ -882,6 +882,10 @@ extern int xattrs_ndx;
#define F_SUM(f) ((char*)OPT_EXTRA(f, START_BUMP(f) + HLINK_BUMP(f) \
+ SUM_EXTRA_CNT - 1))
+/* These are only valid on an entry derived from a checksum file. */
+#define F_CTIME(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT)->unum
+#define F_INODE(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT + 1)->unum
+
/* Some utility defines: */
#define F_IS_ACTIVE(f) (f)->basename[0]
#define F_IS_HLINKED(f) ((f)->flags & FLAG_HLINKED)
@@ -1094,6 +1098,13 @@ typedef struct {
#define RELNAMECACHE_LEN (offsetof(relnamecache, fname))
#endif
+#define CSF_ENABLE (1<<1)
+#define CSF_LAX (1<<2)
+
+#define CSF_IGNORE_FILES 0
+#define CSF_LAX_MODE (CSF_ENABLE|CSF_LAX)
+#define CSF_STRICT_MODE (CSF_ENABLE)
+
#include "byteorder.h"
#include "lib/mdigest.h"
#include "lib/wildmatch.h"
diff --git a/rsyncd.conf.5.md b/rsyncd.conf.5.md
--- a/rsyncd.conf.5.md
+++ b/rsyncd.conf.5.md
@@ -419,6 +419,19 @@ the values of parameters. See the GLOBAL PARAMETERS section for more details.
the max connections limit is not exceeded for the modules sharing the lock
file. The default is `/var/run/rsyncd.lock`.
+0. `checksum files`
+
+ This parameter tells rsync to make use of any cached checksum information
+ it finds in per-directory .rsyncsums files when the current transfer is
+ using the `--checksum` option. The value can be set to either "lax",
+ "strict", or "none". See the client's `--sumfiles` option for what these
+ choices do.
+
+ Note also that the client's command-line option, `--sumfiles`, has no
+ effect on a daemon. A daemon will only access checksum files if this
+ config option tells it to. See also the `exclude` directive for a way to
+ hide the .rsyncsums files from the user.
+
0. `read only`
This parameter determines whether clients will be able to upload files or
diff --git a/support/rsyncsums b/support/rsyncsums
new file mode 100755
--- /dev/null
+++ b/support/rsyncsums
@@ -0,0 +1,201 @@
+#!/usr/bin/perl -w
+use strict;
+
+use Getopt::Long;
+use Cwd qw(abs_path cwd);
+use Digest::MD4;
+use Digest::MD5;
+
+our $SUMS_FILE = '.rsyncsums';
+
+&Getopt::Long::Configure('bundling');
+&usage if !&GetOptions(
+ 'recurse|r' => \( my $recurse_opt ),
+ 'mode|m=s' => \( my $cmp_mode = 'strict' ),
+ 'check|c' => \( my $check_opt ),
+ 'verbose|v+' => \( my $verbosity = 0 ),
+ 'help|h' => \( my $help_opt ),
+);
+&usage if $help_opt || $cmp_mode !~ /^(lax|strict)$/;
+
+my $ignore_ctime_and_inode = $cmp_mode eq 'lax' ? 0 : 1;
+
+my $start_dir = cwd();
+
+my @dirs = @ARGV;
+@dirs = '.' unless @dirs;
+foreach (@dirs) {
+ $_ = abs_path($_);
+}
+
+$| = 1;
+
+my $exit_code = 0;
+
+my $md4 = Digest::MD4->new;
+my $md5 = Digest::MD5->new;
+
+while (@dirs) {
+ my $dir = shift @dirs;
+
+ if (!chdir($dir)) {
+ warn "Unable to chdir to $dir: $!\n";
+ next;
+ }
+ if (!opendir(DP, '.')) {
+ warn "Unable to opendir $dir: $!\n";
+ next;
+ }
+
+ my $reldir = $dir;
+ $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
+ if ($verbosity) {
+ print "$reldir ... ";
+ print "\n" if $check_opt;
+ }
+
+ my %cache;
+ my $f_cnt = 0;
+ if (open(FP, '<', $SUMS_FILE)) {
+ while (<FP>) {
+ chomp;
+ my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
+ $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
+ $f_cnt++;
+ }
+ close FP;
+ }
+
+ my @subdirs;
+ my $d_cnt = 0;
+ my $update_cnt = 0;
+ while (defined(my $fn = readdir(DP))) {
+ next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
+ if (-d _) {
+ push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
+ next;
+ }
+ next unless -f _;
+
+ my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
+ $ctime &= 0xFFFFFFFF;
+ $inode &= 0xFFFFFFFF;
+ my $ref = $cache{$fn};
+ $d_cnt++;
+
+ if (!$check_opt) {
+ if (defined $ref) {
+ $$ref[0] = 1;
+ if ($$ref[3] == $size
+ && $$ref[4] == $mtime
+ && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
+ && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
+ next;
+ }
+ }
+ if (!$update_cnt++) {
+ print "UPDATING\n" if $verbosity;
+ }
+ }
+
+ if (!open(IN, $fn)) {
+ print STDERR "Unable to read $fn: $!\n";
+ if (defined $ref) {
+ delete $cache{$fn};
+ $f_cnt--;
+ }
+ next;
+ }
+
+ my($sum4, $sum5);
+ while (1) {
+ while (sysread(IN, $_, 64*1024)) {
+ $md4->add($_);
+ $md5->add($_);
+ }
+ $sum4 = $md4->hexdigest;
+ $sum5 = $md5->hexdigest;
+ print " $sum4 $sum5" if $verbosity > 2;
+ print " $fn" if $verbosity > 1;
+ my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
+ $ctime2 &= 0xFFFFFFFF;
+ $inode2 &= 0xFFFFFFFF;
+ last if $size == $size2 && $mtime == $mtime2
+ && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
+ $size = $size2;
+ $mtime = $mtime2;
+ $ctime = $ctime2;
+ $inode = $inode2;
+ sysseek(IN, 0, 0);
+ print " REREADING\n" if $verbosity > 1;
+ }
+
+ close IN;
+
+ if ($check_opt) {
+ my $dif;
+ if (!defined $ref) {
+ $dif = 'MISSING';
+ } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
+ $dif = 'FAILED';
+ } else {
+ print " OK\n" if $verbosity > 1;
+ next;
+ }
+ if ($verbosity < 2) {
+ print $verbosity ? ' ' : "$reldir/";
+ print $fn;
+ }
+ print " $dif\n";
+ $exit_code = 1;
+ } else {
+ print "\n" if $verbosity > 1;
+ $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime, $inode ];
+ }
+ }
+
+ closedir DP;
+
+ unshift(@dirs, sort @subdirs) if $recurse_opt;
+
+ if ($check_opt) {
+ ;
+ } elsif ($d_cnt == 0) {
+ if ($f_cnt) {
+ print "(removed $SUMS_FILE) " if $verbosity;
+ unlink($SUMS_FILE);
+ }
+ print "empty\n" if $verbosity;
+ } elsif ($update_cnt || $d_cnt != $f_cnt) {
+ print "UPDATING\n" if $verbosity && !$update_cnt;
+ open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
+
+ foreach my $fn (sort keys %cache) {
+ my $ref = $cache{$fn};
+ my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
+ next unless $found;
+ printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
+ }
+ close FP;
+ } else {
+ print "ok\n" if $verbosity;
+ }
+}
+
+exit $exit_code;
+
+sub usage
+{
+ die <<EOT;
+Usage: rsyncsums [OPTIONS] [DIRS]
+
+Options:
+ -r, --recurse Update $SUMS_FILE files in subdirectories too.
+ -m, --mode=MODE Compare entries in either "lax" or "strict" mode. Using
+ "lax" compares size and mtime, while "strict" additionally
+ compares ctime and inode. Default: strict.
+ -c, --check Check if the checksums are right (doesn't update).
+ -v, --verbose Mention what we're doing. Repeat for more info.
+ -h, --help Display this help message.
+EOT
+}
diff -Nurp a/rsync.1 b/rsync.1
--- a/rsync.1
+++ b/rsync.1
@@ -414,6 +414,7 @@ detailed description below for a complet
--quiet, -q suppress non-error messages
--no-motd suppress daemon-mode MOTD
--checksum, -c skip based on checksum, not mod-time & size
+--sumfiles=MODE use .rsyncsums to speedup --checksum mode
--archive, -a archive mode; equals -rlptgoD (no -H,-A,-X)
--no-OPTION turn off an implied OPTION (e.g. --no-D)
--recursive, -r recurse into directories
@@ -766,6 +767,8 @@ its checksums when it is scanning for ch
file that has the same size as the corresponding sender's file: files with
either a changed size or a changed checksum are selected for transfer.
.IP
+See also the \fB\-\-sumfiles\fP option for a way to use cached checksum data.
+.IP
Note that rsync always verifies that each \fItransferred\fP file was correctly
reconstructed on the receiving side by checking a whole-file checksum that
is generated as the file is transferred, but that automatic
@@ -775,6 +778,40 @@ before-the-transfer "Does this file need
The checksum used is auto-negotiated between the client and the server, but
can be overridden using either the \fB\-\-checksum-choice\fP (\fB\-\-cc\fP) option or an
environment variable that is discussed in that option's section.
+.IP "\fB\-\-sumfiles=MODE\fP"
+This option tells rsync to make use of any cached checksum information it
+finds in per-directory .rsyncsums files when the current transfer is using
+the \fB\-\-checksum\fP option. If the checksum data is up-to-date, it is used
+instead of recomputing it, saving both disk I/O and CPU time. If the
+checksum data is missing or outdated, the checksum is computed just as it
+would be if \fB\-\-sumfiles\fP was not specified.
+.IP
+The MODE value is either "lax", for relaxed checking (which compares size
+and mtime), "strict" (which also compares ctime and inode), or "none" to
+ignore any .rsyncsums files ("none" is the default). Rsync does not create
+or update these files, but there is a perl script in the support directory
+named "rsyncsums" that can be used for that.
+.IP
+This option has no effect unless \fB\-\-checksum\fP, \fB\-c\fP was also specified. It
+also only affects the current side of the transfer, so if you want the
+remote side to parse its own .rsyncsums files, specify the option via
+\fB\-\-remote-option\fP (\fB\-M\fP) (e.g. "\fB\-M\-\-sumfiles=lax\fP").
+.IP
+To avoid transferring the system's checksum files, you can use an exclude
+(e.g. \fB\-\-exclude=.rsyncsums\fP). To make this easier to type, you can use a
+popt alias. For instance, adding the following line in your ~/.popt file
+defines a \fB\-\-cs\fP option that enables lax checksum files and excludes the
+checksum files:
+.RS 4
+.IP
+.nf
+rsync alias --cs -c --sumfiles=lax -M--sumfiles=lax -f-_.rsyncsums
+.fi
+.RE
+.IP
+An rsync daemon does not allow the client to control this setting, so see
+the "checksum files" daemon parameter for information on how to make a
+daemon use cached checksum data.
.IP "\fB\-\-archive\fP, \fB\-a\fP"
This is equivalent to \fB\-rlptgoD\fP. It is a quick way of saying you want
recursion and want to preserve almost everything (with \fB\-H\fP being a notable
diff -Nurp a/rsync.1.html b/rsync.1.html
--- a/rsync.1.html
+++ b/rsync.1.html
@@ -329,6 +329,7 @@ detailed description below for a complet
--quiet, -q suppress non-error messages
--no-motd suppress daemon-mode MOTD
--checksum, -c skip based on checksum, not mod-time & size
+--sumfiles=MODE use .rsyncsums to speedup --checksum mode
--archive, -a archive mode; equals -rlptgoD (no -H,-A,-X)
--no-OPTION turn off an implied OPTION (e.g. --no-D)
--recursive, -r recurse into directories
@@ -681,6 +682,7 @@ scan that builds the list of the availab
its checksums when it is scanning for changed files, and will checksum any
file that has the same size as the corresponding sender's file: files with
either a changed size or a changed checksum are selected for transfer.</p>
+<p>See also the <code>--sumfiles</code> option for a way to use cached checksum data.</p>
<p>Note that rsync always verifies that each <u>transferred</u> file was correctly
reconstructed on the receiving side by checking a whole-file checksum that
is generated as the file is transferred, but that automatic
@@ -691,6 +693,36 @@ can be overridden using either the <code
environment variable that is discussed in that option's section.</p>
</dd>
+<dt><code>--sumfiles=MODE</code></dt><dd>
+<p>This option tells rsync to make use of any cached checksum information it
+finds in per-directory .rsyncsums files when the current transfer is using
+the <code>--checksum</code> option. If the checksum data is up-to-date, it is used
+instead of recomputing it, saving both disk I/O and CPU time. If the
+checksum data is missing or outdated, the checksum is computed just as it
+would be if <code>--sumfiles</code> was not specified.</p>
+<p>The MODE value is either "lax", for relaxed checking (which compares size
+and mtime), "strict" (which also compares ctime and inode), or "none" to
+ignore any .rsyncsums files ("none" is the default). Rsync does not create
+or update these files, but there is a perl script in the support directory
+named "rsyncsums" that can be used for that.</p>
+<p>This option has no effect unless <code>--checksum</code>, <code>-c</code> was also specified. It
+also only affects the current side of the transfer, so if you want the
+remote side to parse its own .rsyncsums files, specify the option via
+<code>--remote-option</code> (<code>-M</code>) (e.g. "<code>-M--sumfiles=lax</code>").</p>
+<p>To avoid transferring the system's checksum files, you can use an exclude
+(e.g. <code>--exclude=.rsyncsums</code>). To make this easier to type, you can use a
+popt alias. For instance, adding the following line in your ~/.popt file
+defines a <code>--cs</code> option that enables lax checksum files and excludes the
+checksum files:</p>
+<blockquote>
+<pre><code>rsync alias --cs -c --sumfiles=lax -M--sumfiles=lax -f-_.rsyncsums
+</code></pre>
+</blockquote>
+<p>An rsync daemon does not allow the client to control this setting, so see
+the "checksum files" daemon parameter for information on how to make a
+daemon use cached checksum data.</p>
+</dd>
+
<dt><code>--archive</code>, <code>-a</code></dt><dd>
<p>This is equivalent to <code>-rlptgoD</code>. It is a quick way of saying you want
recursion and want to preserve almost everything (with <code>-H</code> being a notable
diff -Nurp a/rsyncd.conf.5 b/rsyncd.conf.5
--- a/rsyncd.conf.5
+++ b/rsyncd.conf.5
@@ -394,6 +394,17 @@ This parameter specifies the file to use
parameter. The rsync daemon uses record locking on this file to ensure that
the max connections limit is not exceeded for the modules sharing the lock
file. The default is \fB/var/run/rsyncd.lock\fP.
+.IP "\fBchecksum\ files\fP"
+This parameter tells rsync to make use of any cached checksum information
+it finds in per-directory .rsyncsums files when the current transfer is
+using the \fB\-\-checksum\fP option. The value can be set to either "lax",
+"strict", or "none". See the client's \fB\-\-sumfiles\fP option for what these
+choices do.
+.IP
+Note also that the client's command-line option, \fB\-\-sumfiles\fP, has no
+effect on a daemon. A daemon will only access checksum files if this
+config option tells it to. See also the \fBexclude\fP directive for a way to
+hide the .rsyncsums files from the user.
.IP "\fBread\ only\fP"
This parameter determines whether clients will be able to upload files or
not. If "read only" is true then any attempted uploads will fail. If
diff -Nurp a/rsyncd.conf.5.html b/rsyncd.conf.5.html
--- a/rsyncd.conf.5.html
+++ b/rsyncd.conf.5.html
@@ -405,6 +405,18 @@ the max connections limit is not exceede
file. The default is <code>/var/run/rsyncd.lock</code>.</p>
</dd>
+<dt><code>checksum files</code></dt><dd>
+<p>This parameter tells rsync to make use of any cached checksum information
+it finds in per-directory .rsyncsums files when the current transfer is
+using the <code>--checksum</code> option. The value can be set to either "lax",
+"strict", or "none". See the client's <code>--sumfiles</code> option for what these
+choices do.</p>
+<p>Note also that the client's command-line option, <code>--sumfiles</code>, has no
+effect on a daemon. A daemon will only access checksum files if this
+config option tells it to. See also the <code>exclude</code> directive for a way to
+hide the .rsyncsums files from the user.</p>
+</dd>
+
<dt><code>read only</code></dt><dd>
<p>This parameter determines whether clients will be able to upload files or
not. If "read only" is true then any attempted uploads will fail. If
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>