1 Optimize the --checksum option using externally created .rsyncsums files.
3 This adds a new option, --sumfiles=MODE, that allows you to use a cache of
4 checksums when performing a --checksum transfer. These checksum files
5 (.rsyncsums) must be created by some other process -- see the perl script,
6 rsyncsums, in the support dir for one way.
8 This option can be particularly helpful to a public mirror that wants to
9 pre-compute their .rsyncsums files, set the "checksum files = strict" option
10 in their daemon config file, and thus make it quite efficient for a client
11 rsync to make use of the --checksum option on their server.
13 To use this patch, run these commands for a successful build:
15 patch -p1 <patches/checksum-reading.diff
16 ./configure (optional if already run)
19 based-on: 9a06b2edb0ea1a226bcc642682c07bacd2ea47d3
20 diff --git a/clientserver.c b/clientserver.c
23 @@ -44,6 +44,8 @@ extern int numeric_ids;
24 extern int filesfrom_fd;
25 extern int remote_protocol;
26 extern int protocol_version;
27 +extern int always_checksum;
28 +extern int checksum_files;
29 extern int io_timeout;
31 extern int write_batch;
32 @@ -1106,6 +1108,9 @@ static int rsync_module(int f_in, int f_out, int i, const char *addr, const char
33 } else if (am_root < 0) /* Treat --fake-super from client as --super. */
36 + checksum_files = always_checksum ? lp_checksum_files(i)
39 if (filesfrom_fd == 0)
42 diff --git a/daemon-parm.txt b/daemon-parm.txt
45 @@ -49,6 +49,7 @@ INTEGER max_connections 0
46 INTEGER max_verbosity 1
49 +ENUM checksum_files CSF_IGNORE_FILES
50 ENUM syslog_facility LOG_DAEMON
53 diff --git a/flist.c b/flist.c
64 @@ -33,6 +34,7 @@ extern int am_sender;
65 extern int am_generator;
66 extern int inc_recurse;
67 extern int always_checksum;
68 +extern int basis_dir_cnt;
70 extern int ignore_errors;
71 extern int numeric_ids;
72 @@ -62,6 +64,7 @@ extern int implied_dirs;
73 extern int ignore_perishable;
74 extern int non_perishable_cnt;
75 extern int prune_empty_dirs;
76 +extern int checksum_files;
77 extern int copy_links;
78 extern int copy_unsafe_links;
79 extern int protocol_version;
80 @@ -74,6 +77,7 @@ extern int output_needs_newline;
81 extern int sender_keeps_checksum;
82 extern int trust_sender_filter;
83 extern int unsort_ndx;
84 +extern char *basis_dir[];
86 extern struct stats stats;
87 extern char *filesfrom_host;
88 @@ -92,6 +96,20 @@ extern int filesfrom_convert;
89 extern iconv_t ic_send, ic_recv;
92 +#ifdef HAVE_UTIMENSAT
93 +#ifdef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
94 +#define ST_MTIME_NSEC st_mtim.tv_nsec
95 +#elif defined(HAVE_STRUCT_STAT_ST_MTIMENSEC)
96 +#define ST_MTIME_NSEC st_mtimensec
100 +#define RSYNCSUMS_FILE ".rsyncsums"
101 +#define RSYNCSUMS_LEN (sizeof RSYNCSUMS_FILE-1)
103 +#define CLEAN_STRIP_ROOT (1<<0)
104 +#define CLEAN_KEEP_LAST (1<<1)
106 #define PTR_SIZE (sizeof (struct file_struct *))
109 @@ -136,8 +154,12 @@ static char empty_sum[MAX_DIGEST_LEN];
110 static int flist_count_offset; /* for --delete --progress */
111 static int show_filelist_progress;
113 +static struct csum_cache {
114 + struct file_list *flist;
115 +} *csum_cache = NULL;
117 static struct file_list *flist_new(int flags, const char *msg);
118 -static void flist_sort_and_clean(struct file_list *flist, int strip_root);
119 +static void flist_sort_and_clean(struct file_list *flist, int flags);
120 static void output_flist(struct file_list *flist);
122 void init_flist(void)
123 @@ -329,6 +351,235 @@ static void flist_done_allocating(struct file_list *flist)
124 flist->pool_boundary = ptr;
127 +void reset_checksum_cache()
129 + int slot, slots = am_sender ? 1 : basis_dir_cnt + 1;
132 + csum_cache = new_array0(struct csum_cache, slots);
134 + for (slot = 0; slot < slots; slot++) {
135 + struct file_list *flist = csum_cache[slot].flist;
138 + /* Reset the pool memory and empty the file-list array. */
139 + pool_free_old(flist->file_pool,
140 + pool_boundary(flist->file_pool, 0));
143 + flist = csum_cache[slot].flist = flist_new(FLIST_TEMP, "reset_checksum_cache");
147 + flist->next = NULL;
151 +/* The basename_len count is the length of the basename + 1 for the '\0'. */
152 +static int add_checksum(struct file_list *flist, const char *dirname,
153 + const char *basename, int basename_len, OFF_T file_length,
154 + time_t mtime, uint32 ctime, uint32 inode,
157 + struct file_struct *file;
158 + int alloc_len, extra_len;
161 + if (basename_len == RSYNCSUMS_LEN+1 && *basename == '.'
162 + && strcmp(basename, RSYNCSUMS_FILE) == 0)
165 + /* "2" is for a 32-bit ctime num and an 32-bit inode num. */
166 + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT + 2)
168 +#if EXTRA_ROUNDING > 0
169 + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
170 + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
172 + alloc_len = FILE_STRUCT_LEN + extra_len + basename_len;
173 + bp = pool_alloc(flist->file_pool, alloc_len, "add_checksum");
175 + memset(bp, 0, extra_len + FILE_STRUCT_LEN);
177 + file = (struct file_struct *)bp;
178 + bp += FILE_STRUCT_LEN;
180 + memcpy(bp, basename, basename_len);
182 + file->mode = S_IFREG;
183 + file->modtime = mtime;
184 + file->len32 = (uint32)file_length;
185 + if (file_length > 0xFFFFFFFFu) {
186 + file->flags |= FLAG_LENGTH64;
187 + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
189 + file->dirname = dirname;
190 + F_CTIME(file) = ctime;
191 + F_INODE(file) = inode;
193 + memcpy(bp, sum, flist_csum_len);
195 + flist_expand(flist, 1);
196 + flist->files[flist->used++] = file;
198 + flist->sorted = flist->files;
203 +/* The "dirname" arg's data must remain unchanged during the lifespan of
204 + * the created csum_cache[].flist object because we use it directly. */
205 +static void read_checksums(int slot, struct file_list *flist, const char *dirname)
207 + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
213 + uint32 ctime, inode;
214 + int dlen = dirname ? strlcpy(fbuf, dirname, sizeof fbuf) : 0;
216 + if (dlen >= (int)(sizeof fbuf - 1 - RSYNCSUMS_LEN))
219 + fbuf[dlen++] = '/';
222 + strlcpy(fbuf+dlen, RSYNCSUMS_FILE, sizeof fbuf - dlen);
224 + pathjoin(line, sizeof line, basis_dir[slot-1], fbuf);
228 + if (!(fp = fopen(cp, "r")))
231 + while (fgets(line, sizeof line, fp)) {
233 + if (file_sum_nni->num == CSUM_MD5) {
234 + char *alt_sum = cp;
236 + while (*++cp == '=') {}
238 + while (isHexDigit(cp)) cp++;
239 + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
241 + while (*++cp == ' ') {}
247 + for (i = 0; i < flist_csum_len*2; i++, cp++) {
249 + if (isHexDigit(cp)) {
253 + x = (*cp & 0xF) + 9;
266 + while (*++cp == ' ') {}
268 + if (file_sum_nni->num < CSUM_MD5) {
269 + char *alt_sum = cp;
271 + while (*++cp == '=') {}
273 + while (isHexDigit(cp)) cp++;
274 + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
276 + while (*++cp == ' ') {}
280 + while (isDigit(cp))
281 + file_length = file_length * 10 + *cp++ - '0';
284 + while (*++cp == ' ') {}
287 + while (isDigit(cp))
288 + mtime = mtime * 10 + *cp++ - '0';
291 + while (*++cp == ' ') {}
294 + while (isDigit(cp))
295 + ctime = ctime * 10 + *cp++ - '0';
298 + while (*++cp == ' ') {}
301 + while (isDigit(cp))
302 + inode = inode * 10 + *cp++ - '0';
305 + while (*++cp == ' ') {}
308 + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
312 + cp[len++] = '\0'; /* len now counts the null */
313 + if (strchr(cp, '/'))
315 + if (len > MAXPATHLEN)
318 + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
320 + add_checksum(flist, dirname, cp, len, file_length,
321 + mtime, ctime, inode,
326 + flist_sort_and_clean(flist, CLEAN_KEEP_LAST);
329 +void get_cached_checksum(int slot, const char *fname, struct file_struct *file,
330 + STRUCT_STAT *stp, char *sum_buf)
332 + struct file_list *flist = csum_cache[slot].flist;
335 + if (!flist->next) {
336 + flist->next = cur_flist; /* next points from checksum flist to file flist */
337 + read_checksums(slot, flist, file->dirname);
340 + if ((j = flist_find(flist, file)) >= 0) {
341 + struct file_struct *fp = flist->sorted[j];
343 + if (F_LENGTH(fp) == stp->st_size
344 + && fp->modtime == stp->st_mtime
345 + && (checksum_files & CSF_LAX
346 + || (F_CTIME(fp) == (uint32)stp->st_ctime
347 + && F_INODE(fp) == (uint32)stp->st_ino))) {
348 + memcpy(sum_buf, F_SUM(fp), MAX_DIGEST_LEN);
353 + file_checksum(fname, stp, sum_buf);
356 /* Call this with EITHER (1) "file, NULL, 0" to chdir() to the file's
357 * F_PATHNAME(), or (2) "NULL, dir, dirlen" to chdir() to the supplied dir,
358 * with dir == NULL taken to be the starting directory, and dirlen < 0
359 @@ -1231,7 +1482,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
360 STRUCT_STAT *stp, int flags, int filter_level)
362 static char *lastdir;
363 - static int lastdir_len = -1;
364 + static int lastdir_len = -2;
365 struct file_struct *file;
366 char thisname[MAXPATHLEN];
367 char linkname[MAXPATHLEN];
368 @@ -1377,9 +1628,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
369 memcpy(lastdir, thisname, len);
372 + if (checksum_files && am_sender && flist)
373 + reset_checksum_cache();
378 + if (checksum_files && am_sender && flist && lastdir_len == -2) {
380 + reset_checksum_cache();
383 basename_len = strlen(basename) + 1; /* count the '\0' */
386 @@ -1409,11 +1667,8 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
387 extra_len += EXTRA_LEN;
390 - if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
391 - file_checksum(thisname, &st, tmp_sum);
392 - if (sender_keeps_checksum)
393 - extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
395 + if (sender_keeps_checksum && S_ISREG(st.st_mode))
396 + extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
398 #if EXTRA_ROUNDING > 0
399 if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
400 @@ -1502,8 +1757,14 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
404 - if (sender_keeps_checksum && S_ISREG(st.st_mode))
405 - memcpy(F_SUM(file), tmp_sum, flist_csum_len);
406 + if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
407 + if (flist && checksum_files)
408 + get_cached_checksum(0, thisname, file, &st, tmp_sum);
410 + file_checksum(thisname, &st, tmp_sum);
411 + if (sender_keeps_checksum)
412 + memcpy(F_SUM(file), tmp_sum, flist_csum_len);
416 F_NDX(file) = stats.num_dirs;
417 @@ -2720,7 +2981,7 @@ struct file_list *recv_file_list(int f, int dir_ndx)
418 /* The --relative option sends paths with a leading slash, so we need
419 * to specify the strip_root option here. We rejected leading slashes
420 * for a non-relative transfer in recv_file_entry(). */
421 - flist_sort_and_clean(flist, relative_paths);
422 + flist_sort_and_clean(flist, relative_paths ? CLEAN_STRIP_ROOT : 0);
424 if (protocol_version < 30) {
425 /* Recv the io_error flag */
426 @@ -2965,7 +3226,7 @@ void flist_free(struct file_list *flist)
428 /* This routine ensures we don't have any duplicate names in our file list.
429 * duplicate names can cause corruption because of the pipelining. */
430 -static void flist_sort_and_clean(struct file_list *flist, int strip_root)
431 +static void flist_sort_and_clean(struct file_list *flist, int flags)
433 char fbuf[MAXPATHLEN];
435 @@ -3016,7 +3277,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
436 /* If one is a dir and the other is not, we want to
437 * keep the dir because it might have contents in the
438 * list. Otherwise keep the first one. */
439 - if (S_ISDIR(file->mode)) {
440 + if (S_ISDIR(file->mode) || flags & CLEAN_KEEP_LAST) {
441 struct file_struct *fp = flist->sorted[j];
442 if (!S_ISDIR(fp->mode))
444 @@ -3032,8 +3293,8 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
449 - if (DEBUG_GTE(DUP, 1)) {
450 + if (!am_sender || flags & CLEAN_KEEP_LAST) {
451 + if (DEBUG_GTE(DUP, 1) && !(flags & CLEAN_KEEP_LAST)) {
453 "removing duplicate name %s from file list (%d)\n",
454 f_name(file, fbuf), drop + flist->ndx_start);
455 @@ -3055,7 +3316,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
457 flist->high = prev_i;
460 + if (flags & CLEAN_STRIP_ROOT) {
461 /* We need to strip off the leading slashes for relative
462 * paths, but this must be done _after_ the sorting phase. */
463 for (i = flist->low; i <= flist->high; i++) {
464 diff --git a/generator.c b/generator.c
467 @@ -54,6 +54,7 @@ extern int delete_after;
468 extern int missing_args;
469 extern int msgdone_cnt;
470 extern int ignore_errors;
471 +extern int checksum_files;
472 extern int remove_source_files;
473 extern int delay_updates;
474 extern int update_only;
475 @@ -614,7 +615,7 @@ static enum filetype get_file_type(mode_t mode)
478 /* Perform our quick-check heuristic for determining if a file is unchanged. */
479 -int quick_check_ok(enum filetype ftype, const char *fn, struct file_struct *file, STRUCT_STAT *st)
480 +int quick_check_ok(enum filetype ftype, const char *fn, struct file_struct *file, STRUCT_STAT *st, int slot)
484 @@ -625,7 +626,10 @@ int quick_check_ok(enum filetype ftype, const char *fn, struct file_struct *file
485 * of the file mtime to determine whether to sync. */
486 if (always_checksum > 0) {
487 char sum[MAX_DIGEST_LEN];
488 - file_checksum(fn, st, sum);
489 + if (checksum_files && slot >= 0)
490 + get_cached_checksum(slot, fn, file, st, sum);
492 + file_checksum(fn, st, sum);
493 return memcmp(sum, F_SUM(file), flist_csum_len) == 0;
496 @@ -956,7 +960,7 @@ static int try_dests_reg(struct file_struct *file, char *fname, int ndx,
500 - if (!quick_check_ok(FT_REG, cmpbuf, file, &sxp->st))
501 + if (!quick_check_ok(FT_REG, cmpbuf, file, &sxp->st, j+1))
503 if (match_level == 1) {
505 @@ -1079,7 +1083,7 @@ static int try_dests_non(struct file_struct *file, char *fname, int ndx,
509 - if (!quick_check_ok(ftype, cmpbuf, file, &sxp->st))
510 + if (!quick_check_ok(ftype, cmpbuf, file, &sxp->st, j+1))
512 if (match_level < 2) {
514 @@ -1215,7 +1219,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
515 * --ignore-non-existing, daemon exclude, or mkdir failure. */
516 static struct file_struct *skip_dir = NULL;
517 static struct file_list *fuzzy_dirlist[MAX_BASIS_DIRS+1];
518 - static int need_fuzzy_dirlist = 0;
519 + static int need_new_dirscan = 0;
520 struct file_struct *fuzzy_file = NULL;
521 int fd = -1, f_copy = -1;
523 @@ -1332,8 +1336,9 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
524 fuzzy_dirlist[i] = NULL;
527 - need_fuzzy_dirlist = 1;
529 + need_new_dirscan = 1;
530 + } else if (checksum_files)
531 + need_new_dirscan = 1;
534 dflt_perms = default_perms_for_dir(dn);
535 @@ -1341,6 +1346,24 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
539 + if (need_new_dirscan && ftype == FT_REG) {
541 + strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
542 + for (i = 0; i < fuzzy_basis; i++) {
543 + if (i && pathjoin(fnamecmpbuf, MAXPATHLEN, basis_dir[i-1], dn) >= MAXPATHLEN)
545 + fuzzy_dirlist[i] = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES | GDL_PERHAPS_DIR);
546 + if (fuzzy_dirlist[i] && fuzzy_dirlist[i]->used == 0) {
547 + flist_free(fuzzy_dirlist[i]);
548 + fuzzy_dirlist[i] = NULL;
551 + if (checksum_files) {
552 + reset_checksum_cache();
554 + need_new_dirscan = 0;
557 statret = link_stat(fname, &sx.st, keep_dirlinks && is_dir);
560 @@ -1387,7 +1410,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
561 if (INFO_GTE(SKIP, 2)) {
563 suf = " (type change)";
564 - else if (!quick_check_ok(ftype, fname, file, &sx.st))
565 + else if (!quick_check_ok(ftype, fname, file, &sx.st, 0))
566 suf = always_checksum ? " (sum change)" : " (file change)";
567 else if (!unchanged_attrs(fname, file, &sx))
568 suf = " (attr change)";
569 @@ -1558,7 +1581,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
573 - if (stype == FT_SYMLINK && quick_check_ok(stype, fname, file, &sx.st)) {
574 + if (stype == FT_SYMLINK && quick_check_ok(stype, fname, file, &sx.st, 0)) {
575 /* The link is pointing to the right place. */
576 set_file_attrs(fname, file, &sx, NULL, maybe_ATTRS_REPORT);
578 @@ -1627,7 +1650,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
582 - else if (quick_check_ok(ftype, fname, file, &sx.st)) {
583 + else if (quick_check_ok(ftype, fname, file, &sx.st, 0)) {
584 /* The device or special file is identical. */
585 set_file_attrs(fname, file, &sx, NULL, maybe_ATTRS_REPORT);
587 @@ -1752,22 +1775,6 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
590 if (statret != 0 && fuzzy_basis) {
591 - if (need_fuzzy_dirlist) {
592 - const char *dn = file->dirname ? file->dirname : ".";
594 - strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
595 - for (i = 0; i < fuzzy_basis; i++) {
596 - if (i && pathjoin(fnamecmpbuf, MAXPATHLEN, basis_dir[i-1], dn) >= MAXPATHLEN)
598 - fuzzy_dirlist[i] = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES | GDL_PERHAPS_DIR);
599 - if (fuzzy_dirlist[i] && fuzzy_dirlist[i]->used == 0) {
600 - flist_free(fuzzy_dirlist[i]);
601 - fuzzy_dirlist[i] = NULL;
604 - need_fuzzy_dirlist = 0;
607 /* Sets fnamecmp_type to FNAMECMP_FUZZY or above. */
608 fuzzy_file = find_fuzzy(file, fuzzy_dirlist, &fnamecmp_type);
610 @@ -1806,7 +1813,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
612 else if (fnamecmp_type >= FNAMECMP_FUZZY)
614 - else if (quick_check_ok(FT_REG, fnamecmp, file, &sx.st)) {
615 + else if (quick_check_ok(FT_REG, fnamecmp, file, &sx.st, fnamecmp_type == FNAMECMP_FNAME ? 0 : -1)) {
617 do_unlink(partialptr);
618 handle_partial_dir(partialptr, PDIR_DELETE);
619 diff --git a/hlink.c b/hlink.c
622 @@ -406,7 +406,7 @@ int hard_link_check(struct file_struct *file, int ndx, char *fname,
626 - if (!quick_check_ok(FT_REG, cmpbuf, file, &alt_sx.st))
627 + if (!quick_check_ok(FT_REG, cmpbuf, file, &alt_sx.st, j+1))
630 if (unchanged_attrs(cmpbuf, file, &alt_sx))
631 diff --git a/loadparm.c b/loadparm.c
634 @@ -162,6 +162,13 @@ static struct enum_list enum_syslog_facility[] = {
638 +static struct enum_list enum_checksum_files[] = {
639 + { CSF_IGNORE_FILES, "none" },
640 + { CSF_LAX_MODE, "lax" },
641 + { CSF_STRICT_MODE, "strict" },
645 /* Expand %VAR% references. Any unknown vars or unrecognized
646 * syntax leaves the raw chars unchanged. */
647 static char *expand_vars(const char *str)
648 diff --git a/options.c b/options.c
651 @@ -126,6 +126,7 @@ size_t bwlimit_writemax = 0;
652 int ignore_existing = 0;
653 int ignore_non_existing = 0;
654 int need_messages_from_generator = 0;
655 +int checksum_files = CSF_IGNORE_FILES;
656 int max_delete = INT_MIN;
659 @@ -582,7 +583,7 @@ enum {OPT_SERVER = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
660 OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
661 OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
662 OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG, OPT_BLOCK_SIZE,
663 - OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, OPT_STDERR,
664 + OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, OPT_STDERR, OPT_SUMFILES,
665 OPT_OLD_COMPRESS, OPT_NEW_COMPRESS, OPT_NO_COMPRESS, OPT_OLD_ARGS,
666 OPT_STOP_AFTER, OPT_STOP_AT,
667 OPT_REFUSED_BASE = 9000};
668 @@ -739,6 +740,7 @@ static struct poptOption long_options[] = {
669 {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
670 {"checksum-choice", 0, POPT_ARG_STRING, &checksum_choice, 0, 0, 0 },
671 {"cc", 0, POPT_ARG_STRING, &checksum_choice, 0, 0, 0 },
672 + {"sumfiles", 0, POPT_ARG_STRING, 0, OPT_SUMFILES, 0, 0 },
673 {"block-size", 'B', POPT_ARG_STRING, 0, OPT_BLOCK_SIZE, 0, 0 },
674 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
675 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
676 @@ -1751,6 +1753,23 @@ int parse_arguments(int *argc_p, const char ***argv_p)
681 + arg = poptGetOptArg(pc);
682 + checksum_files = 0;
683 + if (strcmp(arg, "lax") == 0)
684 + checksum_files |= CSF_LAX_MODE;
685 + else if (strcmp(arg, "strict") == 0)
686 + checksum_files |= CSF_STRICT_MODE;
687 + else if (strcmp(arg, "none") == 0)
688 + checksum_files = CSF_IGNORE_FILES;
690 + snprintf(err_buf, sizeof err_buf,
691 + "Invalid argument passed to --sumfiles (%s)\n",
698 arg = poptGetOptArg(pc);
699 parse_output_words(info_words, info_levels, arg, USER_PRIORITY);
700 @@ -2116,6 +2135,9 @@ int parse_arguments(int *argc_p, const char ***argv_p)
704 + if (!always_checksum)
705 + checksum_files = CSF_IGNORE_FILES;
707 if (write_batch && read_batch) {
708 snprintf(err_buf, sizeof err_buf,
709 "--write-batch and --read-batch can not be used together\n");
710 diff --git a/rsync.1.md b/rsync.1.md
713 @@ -422,6 +422,7 @@ has its own detailed description later in this manpage.
714 --quiet, -q suppress non-error messages
715 --no-motd suppress daemon-mode MOTD
716 --checksum, -c skip based on checksum, not mod-time & size
717 +--sumfiles=MODE use .rsyncsums to speedup --checksum mode
718 --archive, -a archive mode is -rlptgoD (no -A,-X,-U,-N,-H)
719 --no-OPTION turn off an implied OPTION (e.g. --no-D)
720 --recursive, -r recurse into directories
721 @@ -814,6 +815,8 @@ expand it.
722 file that has the same size as the corresponding sender's file: files with
723 either a changed size or a changed checksum are selected for transfer.
725 + See also the [`--sumfiles`](#opt) option for a way to use cached checksum data.
727 Note that rsync always verifies that each _transferred_ file was correctly
728 reconstructed on the receiving side by checking a whole-file checksum that
729 is generated as the file is transferred, but that automatic
730 @@ -825,6 +828,38 @@ expand it.
731 option or an environment variable that is discussed in that option's
734 +0. `--sumfiles=MODE`
736 + This option tells rsync to make use of any cached checksum information it
737 + finds in per-directory .rsyncsums files when the current transfer is using
738 + the [`--checksum`](#opt) option. If the checksum data is up-to-date, it is
739 + used instead of recomputing it, saving both disk I/O and CPU time. If the
740 + checksum data is missing or outdated, the checksum is computed just as it
741 + would be if `--sumfiles` was not specified.
743 + The MODE value is either "lax", for relaxed checking (which compares size
744 + and mtime), "strict" (which also compares ctime and inode), or "none" to
745 + ignore any .rsyncsums files ("none" is the default). Rsync does not create
746 + or update these files, but there is a perl script in the support directory
747 + named "rsyncsums" that can be used for that.
749 + This option has no effect unless [`--checksum`](#opt) (`-c`) was also
750 + specified. It also only affects the current side of the transfer, so if
751 + you want the remote side to parse its own .rsyncsums files, specify the
752 + option via [`--remote-option`](#opt) (`-M`) (e.g. "`-M--sumfiles=lax`").
754 + To avoid transferring the system's checksum files, you can use an exclude
755 + (e.g. [`--exclude=.rsyncsums`](#opt)). To make this easier to type, you
756 + can use a popt alias. For instance, adding the following line in your
757 + ~/.popt file defines a `--cs` option that enables lax checksum files and
758 + excludes the checksum files:
760 + > rsync alias --cs -c --sumfiles=lax -M--sumfiles=lax -f-_.rsyncsums
762 + An rsync daemon does not allow the client to control this setting, so see
763 + the "checksum files" daemon parameter for information on how to make a
764 + daemon use cached checksum data.
768 This is equivalent to `-rlptgoD`. It is a quick way of saying you want
769 diff --git a/rsync.h b/rsync.h
772 @@ -897,6 +897,10 @@ extern int file_sum_extra_cnt;
773 #define F_SUM(f) ((char*)OPT_EXTRA(f, START_BUMP(f) + HLINK_BUMP(f) \
774 + SUM_EXTRA_CNT - 1))
776 +/* These are only valid on an entry derived from a checksum file. */
777 +#define F_CTIME(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT)->unum
778 +#define F_INODE(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT + 1)->unum
780 /* Some utility defines: */
781 #define F_IS_ACTIVE(f) (f)->basename[0]
782 #define F_IS_HLINKED(f) ((f)->flags & FLAG_HLINKED)
783 @@ -1111,6 +1115,13 @@ typedef struct {
784 #define RELNAMECACHE_LEN (offsetof(relnamecache, fname))
787 +#define CSF_ENABLE (1<<1)
788 +#define CSF_LAX (1<<2)
790 +#define CSF_IGNORE_FILES 0
791 +#define CSF_LAX_MODE (CSF_ENABLE|CSF_LAX)
792 +#define CSF_STRICT_MODE (CSF_ENABLE)
794 #include "byteorder.h"
795 #include "lib/mdigest.h"
796 #include "lib/wildmatch.h"
797 diff --git a/rsyncd.conf.5.md b/rsyncd.conf.5.md
798 --- a/rsyncd.conf.5.md
799 +++ b/rsyncd.conf.5.md
800 @@ -453,6 +453,19 @@ in the values of parameters. See that section for details.
801 the max connections limit is not exceeded for the modules sharing the lock
802 file. The default is `/var/run/rsyncd.lock`.
806 + This parameter tells rsync to make use of any cached checksum information
807 + it finds in per-directory .rsyncsums files when the current transfer is
808 + using the `--checksum` option. The value can be set to either "lax",
809 + "strict", or "none". See the client's `--sumfiles` option for what these
812 + Note also that the client's command-line option, `--sumfiles`, has no
813 + effect on a daemon. A daemon will only access checksum files if this
814 + config option tells it to. See also the `exclude` directive for a way to
815 + hide the .rsyncsums files from the user.
819 This parameter determines whether clients will be able to upload files or
820 diff --git a/support/rsyncsums b/support/rsyncsums
823 +++ b/support/rsyncsums
829 +use Cwd qw(abs_path cwd);
833 +our $SUMS_FILE = '.rsyncsums';
835 +&Getopt::Long::Configure('bundling');
836 +&usage if !&GetOptions(
837 + 'recurse|r' => \( my $recurse_opt ),
838 + 'mode|m=s' => \( my $cmp_mode = 'strict' ),
839 + 'check|c' => \( my $check_opt ),
840 + 'verbose|v+' => \( my $verbosity = 0 ),
841 + 'help|h' => \( my $help_opt ),
843 +&usage if $help_opt || $cmp_mode !~ /^(lax|strict)$/;
845 +my $ignore_ctime_and_inode = $cmp_mode eq 'lax' ? 0 : 1;
847 +my $start_dir = cwd();
850 +@dirs = '.' unless @dirs;
859 +my $md4 = Digest::MD4->new;
860 +my $md5 = Digest::MD5->new;
863 + my $dir = shift @dirs;
865 + if (!chdir($dir)) {
866 + warn "Unable to chdir to $dir: $!\n";
869 + if (!opendir(DP, '.')) {
870 + warn "Unable to opendir $dir: $!\n";
875 + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
877 + print "$reldir ... ";
878 + print "\n" if $check_opt;
883 + if (open(FP, '<', $SUMS_FILE)) {
886 + my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
887 + $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
895 + my $update_cnt = 0;
896 + while (defined(my $fn = readdir(DP))) {
897 + next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
899 + push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
904 + my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
905 + $ctime &= 0xFFFFFFFF;
906 + $inode &= 0xFFFFFFFF;
907 + my $ref = $cache{$fn};
911 + if (defined $ref) {
913 + if ($$ref[3] == $size
914 + && $$ref[4] == $mtime
915 + && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
916 + && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
920 + if (!$update_cnt++) {
921 + print "UPDATING\n" if $verbosity;
925 + if (!open(IN, $fn)) {
926 + print STDERR "Unable to read $fn: $!\n";
927 + if (defined $ref) {
928 + delete $cache{$fn};
936 + while (sysread(IN, $_, 64*1024)) {
940 + $sum4 = $md4->hexdigest;
941 + $sum5 = $md5->hexdigest;
942 + print " $sum4 $sum5" if $verbosity > 2;
943 + print " $fn" if $verbosity > 1;
944 + my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
945 + $ctime2 &= 0xFFFFFFFF;
946 + $inode2 &= 0xFFFFFFFF;
947 + last if $size == $size2 && $mtime == $mtime2
948 + && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
954 + print " REREADING\n" if $verbosity > 1;
961 + if (!defined $ref) {
963 + } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
966 + print " OK\n" if $verbosity > 1;
969 + if ($verbosity < 2) {
970 + print $verbosity ? ' ' : "$reldir/";
976 + print "\n" if $verbosity > 1;
977 + $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime, $inode ];
983 + unshift(@dirs, sort @subdirs) if $recurse_opt;
987 + } elsif ($d_cnt == 0) {
989 + print "(removed $SUMS_FILE) " if $verbosity;
990 + unlink($SUMS_FILE);
992 + print "empty\n" if $verbosity;
993 + } elsif ($update_cnt || $d_cnt != $f_cnt) {
994 + print "UPDATING\n" if $verbosity && !$update_cnt;
995 + open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
997 + foreach my $fn (sort keys %cache) {
998 + my $ref = $cache{$fn};
999 + my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
1000 + next unless $found;
1001 + printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
1005 + print "ok\n" if $verbosity;
1014 +Usage: rsyncsums [OPTIONS] [DIRS]
1017 + -r, --recurse Update $SUMS_FILE files in subdirectories too.
1018 + -m, --mode=MODE Compare entries in either "lax" or "strict" mode. Using
1019 + "lax" compares size and mtime, while "strict" additionally
1020 + compares ctime and inode. Default: strict.
1021 + -c, --check Check if the checksums are right (doesn't update).
1022 + -v, --verbose Mention what we're doing. Repeat for more info.
1023 + -h, --help Display this help message.