1 Optimize the --checksum option using externally created .rsyncsums files.
3 This adds a new option, --sumfiles=MODE, that allows you to use a cache of
4 checksums when performing a --checksum transfer. These checksum files
5 (.rsyncsums) must be created by some other process -- see the perl script,
6 rsyncsums, in the support dir for one way.
8 This option can be particularly helpful to a public mirror that wants to
9 pre-compute their .rsyncsums files, set the "checksum files = strict" option
10 in their daemon config file, and thus make it quite efficient for a client
11 rsync to make use of the --checksum option on their server.
13 To use this patch, run these commands for a successful build:
15 patch -p1 <patches/checksum-reading.diff
16 ./configure (optional if already run)
19 based-on: 7c8f180900432e646c0a4bd02e2c4033068dbb7c
20 diff --git a/checksum.c b/checksum.c
23 @@ -98,7 +98,7 @@ void get_checksum2(char *buf, int32 len, char *sum)
27 -void file_checksum(char *fname, char *sum, OFF_T size)
28 +void file_checksum(const char *fname, OFF_T size, char *sum)
30 struct map_struct *buf;
32 diff --git a/clientserver.c b/clientserver.c
35 @@ -42,6 +42,8 @@ extern int numeric_ids;
36 extern int filesfrom_fd;
37 extern int remote_protocol;
38 extern int protocol_version;
39 +extern int always_checksum;
40 +extern int checksum_files;
41 extern int io_timeout;
43 extern int write_batch;
44 @@ -911,6 +913,9 @@ static int rsync_module(int f_in, int f_out, int i, const char *addr, const char
45 } else if (am_root < 0) /* Treat --fake-super from client as --super. */
48 + checksum_files = always_checksum ? lp_checksum_files(i)
51 if (filesfrom_fd == 0)
54 diff --git a/flist.c b/flist.c
65 @@ -33,6 +34,7 @@ extern int am_sender;
66 extern int am_generator;
67 extern int inc_recurse;
68 extern int always_checksum;
69 +extern int basis_dir_cnt;
71 extern int ignore_errors;
72 extern int numeric_ids;
73 @@ -58,6 +60,7 @@ extern int implied_dirs;
74 extern int ignore_perishable;
75 extern int non_perishable_cnt;
76 extern int prune_empty_dirs;
77 +extern int checksum_files;
78 extern int copy_links;
79 extern int copy_unsafe_links;
80 extern int protocol_version;
81 @@ -69,6 +72,7 @@ extern int sender_symlink_iconv;
82 extern int output_needs_newline;
83 extern int sender_keeps_checksum;
84 extern int unsort_ndx;
85 +extern char *basis_dir[];
87 extern struct stats stats;
88 extern char *filesfrom_host;
89 @@ -86,6 +90,20 @@ extern int filesfrom_convert;
90 extern iconv_t ic_send, ic_recv;
93 +#ifdef HAVE_UTIMENSAT
94 +#ifdef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
95 +#define ST_MTIME_NSEC st_mtim.tv_nsec
96 +#elif defined(HAVE_STRUCT_STAT_ST_MTIMENSEC)
97 +#define ST_MTIME_NSEC st_mtimensec
101 +#define RSYNCSUMS_FILE ".rsyncsums"
102 +#define RSYNCSUMS_LEN (sizeof RSYNCSUMS_FILE-1)
104 +#define CLEAN_STRIP_ROOT (1<<0)
105 +#define CLEAN_KEEP_LAST (1<<1)
107 #define PTR_SIZE (sizeof (struct file_struct *))
110 @@ -127,7 +145,11 @@ static char tmp_sum[MAX_DIGEST_LEN];
111 static char empty_sum[MAX_DIGEST_LEN];
112 static int flist_count_offset; /* for --delete --progress */
114 -static void flist_sort_and_clean(struct file_list *flist, int strip_root);
115 +static struct csum_cache {
116 + struct file_list *flist;
117 +} *csum_cache = NULL;
119 +static void flist_sort_and_clean(struct file_list *flist, int flags);
120 static void output_flist(struct file_list *flist);
122 void init_flist(void)
123 @@ -342,6 +364,238 @@ static void flist_done_allocating(struct file_list *flist)
124 flist->pool_boundary = ptr;
127 +void reset_checksum_cache()
129 + int slot, slots = am_sender ? 1 : basis_dir_cnt + 1;
132 + csum_cache = new_array0(struct csum_cache, slots);
134 + out_of_memory("reset_checksum_cache");
137 + for (slot = 0; slot < slots; slot++) {
138 + struct file_list *flist = csum_cache[slot].flist;
141 + /* Reset the pool memory and empty the file-list array. */
142 + pool_free_old(flist->file_pool,
143 + pool_boundary(flist->file_pool, 0));
146 + flist = csum_cache[slot].flist = flist_new(FLIST_TEMP, "reset_checksum_cache");
150 + flist->next = NULL;
154 +/* The basename_len count is the length of the basename + 1 for the '\0'. */
155 +static int add_checksum(struct file_list *flist, const char *dirname,
156 + const char *basename, int basename_len, OFF_T file_length,
157 + time_t mtime, uint32 ctime, uint32 inode,
160 + struct file_struct *file;
161 + int alloc_len, extra_len;
164 + if (basename_len == RSYNCSUMS_LEN+1 && *basename == '.'
165 + && strcmp(basename, RSYNCSUMS_FILE) == 0)
168 + /* "2" is for a 32-bit ctime num and an 32-bit inode num. */
169 + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT + 2)
171 +#if EXTRA_ROUNDING > 0
172 + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
173 + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
175 + alloc_len = FILE_STRUCT_LEN + extra_len + basename_len;
176 + bp = pool_alloc(flist->file_pool, alloc_len, "add_checksum");
178 + memset(bp, 0, extra_len + FILE_STRUCT_LEN);
180 + file = (struct file_struct *)bp;
181 + bp += FILE_STRUCT_LEN;
183 + memcpy(bp, basename, basename_len);
185 + file->mode = S_IFREG;
186 + file->modtime = mtime;
187 + file->len32 = (uint32)file_length;
188 + if (file_length > 0xFFFFFFFFu) {
189 + file->flags |= FLAG_LENGTH64;
190 + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
192 + file->dirname = dirname;
193 + F_CTIME(file) = ctime;
194 + F_INODE(file) = inode;
196 + memcpy(bp, sum, checksum_len);
198 + flist_expand(flist, 1);
199 + flist->files[flist->used++] = file;
201 + flist->sorted = flist->files;
206 +/* The "dirname" arg's data must remain unchanged during the lifespan of
207 + * the created csum_cache[].flist object because we use it directly. */
208 +static void read_checksums(int slot, struct file_list *flist, const char *dirname)
210 + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
216 + uint32 ctime, inode;
217 + int dlen = dirname ? strlcpy(fbuf, dirname, sizeof fbuf) : 0;
219 + if (dlen >= (int)(sizeof fbuf - 1 - RSYNCSUMS_LEN))
222 + fbuf[dlen++] = '/';
225 + strlcpy(fbuf+dlen, RSYNCSUMS_FILE, sizeof fbuf - dlen);
227 + pathjoin(line, sizeof line, basis_dir[slot-1], fbuf);
231 + if (!(fp = fopen(cp, "r")))
234 + while (fgets(line, sizeof line, fp)) {
236 + if (protocol_version >= 30) {
237 + char *alt_sum = cp;
239 + while (*++cp == '=') {}
241 + while (isXDigit(cp)) cp++;
242 + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
244 + while (*++cp == ' ') {}
250 + for (i = 0; i < checksum_len*2; i++, cp++) {
252 + if (isXDigit(cp)) {
256 + x = (*cp & 0xF) + 9;
269 + while (*++cp == ' ') {}
271 + if (protocol_version < 30) {
272 + char *alt_sum = cp;
274 + while (*++cp == '=') {}
276 + while (isXDigit(cp)) cp++;
277 + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
279 + while (*++cp == ' ') {}
283 + while (isDigit(cp))
284 + file_length = file_length * 10 + *cp++ - '0';
287 + while (*++cp == ' ') {}
290 + while (isDigit(cp))
291 + mtime = mtime * 10 + *cp++ - '0';
294 + while (*++cp == ' ') {}
297 + while (isDigit(cp))
298 + ctime = ctime * 10 + *cp++ - '0';
301 + while (*++cp == ' ') {}
304 + while (isDigit(cp))
305 + inode = inode * 10 + *cp++ - '0';
308 + while (*++cp == ' ') {}
311 + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
315 + cp[len++] = '\0'; /* len now counts the null */
316 + if (strchr(cp, '/'))
318 + if (len > MAXPATHLEN)
321 + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
323 + add_checksum(flist, dirname, cp, len, file_length,
324 + mtime, ctime, inode,
329 + flist_sort_and_clean(flist, CLEAN_KEEP_LAST);
332 +void get_cached_checksum(int slot, const char *fname, struct file_struct *file,
333 + STRUCT_STAT *stp, char *sum_buf)
335 + struct file_list *flist = csum_cache[slot].flist;
338 + if (!flist->next) {
339 + flist->next = cur_flist; /* next points from checksum flist to file flist */
340 + read_checksums(slot, flist, file->dirname);
343 + if ((j = flist_find(flist, file)) >= 0) {
344 + struct file_struct *fp = flist->sorted[j];
346 + if (F_LENGTH(fp) == stp->st_size
347 + && fp->modtime == stp->st_mtime
348 + && (checksum_files & CSF_LAX
349 + || (F_CTIME(fp) == (uint32)stp->st_ctime
350 + && F_INODE(fp) == (uint32)stp->st_ino))) {
351 + memcpy(sum_buf, F_SUM(fp), MAX_DIGEST_LEN);
356 + file_checksum(fname, stp->st_size, sum_buf);
359 /* Call this with EITHER (1) "file, NULL, 0" to chdir() to the file's
360 * F_PATHNAME(), or (2) "NULL, dir, dirlen" to chdir() to the supplied dir,
361 * with dir == NULL taken to be the starting directory, and dirlen < 0
362 @@ -1135,7 +1389,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
363 STRUCT_STAT *stp, int flags, int filter_level)
365 static char *lastdir;
366 - static int lastdir_len = -1;
367 + static int lastdir_len = -2;
368 struct file_struct *file;
369 char thisname[MAXPATHLEN];
370 char linkname[MAXPATHLEN];
371 @@ -1281,9 +1535,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
372 memcpy(lastdir, thisname, len);
375 + if (checksum_files && am_sender && flist)
376 + reset_checksum_cache();
381 + if (checksum_files && am_sender && flist && lastdir_len == -2) {
383 + reset_checksum_cache();
386 basename_len = strlen(basename) + 1; /* count the '\0' */
389 @@ -1301,11 +1562,8 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
390 extra_len += EXTRA_LEN;
393 - if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
394 - file_checksum(thisname, tmp_sum, st.st_size);
395 - if (sender_keeps_checksum)
396 - extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
398 + if (sender_keeps_checksum && S_ISREG(st.st_mode))
399 + extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
401 #if EXTRA_ROUNDING > 0
402 if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
403 @@ -1390,8 +1648,14 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
407 - if (sender_keeps_checksum && S_ISREG(st.st_mode))
408 - memcpy(F_SUM(file), tmp_sum, checksum_len);
409 + if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
410 + if (flist && checksum_files)
411 + get_cached_checksum(0, thisname, file, &st, tmp_sum);
413 + file_checksum(thisname, st.st_size, tmp_sum);
414 + if (sender_keeps_checksum)
415 + memcpy(F_SUM(file), tmp_sum, checksum_len);
419 F_NDX(file) = stats.num_dirs;
420 @@ -2548,7 +2812,7 @@ struct file_list *recv_file_list(int f)
421 rprintf(FINFO, "[%s] flist_eof=1\n", who_am_i());
424 - flist_sort_and_clean(flist, relative_paths);
425 + flist_sort_and_clean(flist, relative_paths ? CLEAN_STRIP_ROOT : 0);
427 if (protocol_version < 30) {
428 /* Recv the io_error flag */
429 @@ -2771,7 +3035,7 @@ void flist_free(struct file_list *flist)
431 /* This routine ensures we don't have any duplicate names in our file list.
432 * duplicate names can cause corruption because of the pipelining. */
433 -static void flist_sort_and_clean(struct file_list *flist, int strip_root)
434 +static void flist_sort_and_clean(struct file_list *flist, int flags)
436 char fbuf[MAXPATHLEN];
438 @@ -2822,7 +3086,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
439 /* If one is a dir and the other is not, we want to
440 * keep the dir because it might have contents in the
441 * list. Otherwise keep the first one. */
442 - if (S_ISDIR(file->mode)) {
443 + if (S_ISDIR(file->mode) || flags & CLEAN_KEEP_LAST) {
444 struct file_struct *fp = flist->sorted[j];
445 if (!S_ISDIR(fp->mode))
447 @@ -2838,8 +3102,8 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
452 - if (DEBUG_GTE(DUP, 1)) {
453 + if (!am_sender || flags & CLEAN_KEEP_LAST) {
454 + if (DEBUG_GTE(DUP, 1) && !(flags & CLEAN_KEEP_LAST)) {
456 "removing duplicate name %s from file list (%d)\n",
457 f_name(file, fbuf), drop + flist->ndx_start);
458 @@ -2861,7 +3125,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
460 flist->high = prev_i;
463 + if (flags & CLEAN_STRIP_ROOT) {
464 /* We need to strip off the leading slashes for relative
465 * paths, but this must be done _after_ the sorting phase. */
466 for (i = flist->low; i <= flist->high; i++) {
467 diff --git a/generator.c b/generator.c
470 @@ -51,6 +51,7 @@ extern int delete_after;
471 extern int missing_args;
472 extern int msgdone_cnt;
473 extern int ignore_errors;
474 +extern int checksum_files;
475 extern int remove_source_files;
476 extern int delay_updates;
477 extern int update_only;
478 @@ -564,7 +565,7 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre
481 /* Perform our quick-check heuristic for determining if a file is unchanged. */
482 -int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
483 +int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st, int slot)
485 if (st->st_size != F_LENGTH(file))
487 @@ -573,7 +574,10 @@ int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
488 of the file time to determine whether to sync */
489 if (always_checksum > 0 && S_ISREG(st->st_mode)) {
490 char sum[MAX_DIGEST_LEN];
491 - file_checksum(fn, sum, st->st_size);
492 + if (checksum_files && slot >= 0)
493 + get_cached_checksum(slot, fn, file, st, sum);
495 + file_checksum(fn, st->st_size, sum);
496 return memcmp(sum, F_SUM(file), checksum_len) == 0;
499 @@ -843,7 +847,7 @@ static int try_dests_reg(struct file_struct *file, char *fname, int ndx,
503 - if (!unchanged_file(cmpbuf, file, &sxp->st))
504 + if (!unchanged_file(cmpbuf, file, &sxp->st, j+1))
508 @@ -1129,7 +1133,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
509 * --ignore-non-existing, daemon exclude, or mkdir failure. */
510 static struct file_struct *skip_dir = NULL;
511 static struct file_list *fuzzy_dirlist = NULL;
512 - static int need_fuzzy_dirlist = 0;
513 + static int need_new_dirscan = 0;
514 struct file_struct *fuzzy_file = NULL;
515 int fd = -1, f_copy = -1;
517 @@ -1213,8 +1217,8 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
518 flist_free(fuzzy_dirlist);
519 fuzzy_dirlist = NULL;
522 - need_fuzzy_dirlist = 1;
523 + if (fuzzy_basis || checksum_files)
524 + need_new_dirscan = 1;
527 dflt_perms = default_perms_for_dir(dn);
528 @@ -1222,10 +1226,15 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
532 - if (need_fuzzy_dirlist && S_ISREG(file->mode)) {
533 - strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
534 - fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES);
535 - need_fuzzy_dirlist = 0;
536 + if (need_new_dirscan && S_ISREG(file->mode)) {
538 + strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
539 + fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES);
541 + if (checksum_files) {
542 + reset_checksum_cache();
544 + need_new_dirscan = 0;
547 statret = link_stat(fname, &sx.st, keep_dirlinks && is_dir);
548 @@ -1663,7 +1672,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
550 else if (fnamecmp_type == FNAMECMP_FUZZY)
552 - else if (unchanged_file(fnamecmp, file, &sx.st)) {
553 + else if (unchanged_file(fnamecmp, file, &sx.st, fnamecmp_type == FNAMECMP_FNAME ? 0 : -1)) {
555 do_unlink(partialptr);
556 handle_partial_dir(partialptr, PDIR_DELETE);
557 diff --git a/hlink.c b/hlink.c
560 @@ -410,7 +410,7 @@ int hard_link_check(struct file_struct *file, int ndx, char *fname,
564 - if (!unchanged_file(cmpbuf, file, &alt_sx.st))
565 + if (!unchanged_file(cmpbuf, file, &alt_sx.st, j+1))
568 if (unchanged_attrs(cmpbuf, file, &alt_sx))
569 diff --git a/itypes.h b/itypes.h
572 @@ -23,6 +23,12 @@ isDigit(const char *ptr)
576 +isXDigit(const char *ptr)
578 + return isxdigit(*(unsigned char *)ptr);
582 isPrint(const char *ptr)
584 return isprint(*(unsigned char *)ptr);
585 diff --git a/loadparm.c b/loadparm.c
588 @@ -134,6 +134,7 @@ typedef struct {
589 /* NOTE: update this macro if the last char* variable changes! */
590 #define LOCAL_STRING_COUNT() (offsetof(local_vars, uid) / sizeof (char*) + 1)
592 + int checksum_files;
596 @@ -208,6 +209,7 @@ static const all_vars Defaults = {
597 /* temp_dir; */ NULL,
600 + /* checksum_files; */ CSF_IGNORE_FILES,
601 /* max_connections; */ 0,
602 /* max_verbosity; */ 1,
603 /* syslog_facility; */ LOG_DAEMON,
604 @@ -310,6 +312,13 @@ static struct enum_list enum_facilities[] = {
608 +static struct enum_list enum_csum_modes[] = {
609 + { CSF_IGNORE_FILES, "none" },
610 + { CSF_LAX_MODE, "lax" },
611 + { CSF_STRICT_MODE, "strict" },
615 static struct parm_struct parm_table[] =
617 {"address", P_STRING, P_GLOBAL,&Vars.g.bind_address, NULL,0},
618 @@ -321,6 +330,7 @@ static struct parm_struct parm_table[] =
620 {"auth users", P_STRING, P_LOCAL, &Vars.l.auth_users, NULL,0},
621 {"charset", P_STRING, P_LOCAL, &Vars.l.charset, NULL,0},
622 + {"checksum files", P_ENUM, P_LOCAL, &Vars.l.checksum_files, enum_csum_modes,0},
623 {"comment", P_STRING, P_LOCAL, &Vars.l.comment, NULL,0},
624 {"dont compress", P_STRING, P_LOCAL, &Vars.l.dont_compress, NULL,0},
625 {"exclude from", P_STRING, P_LOCAL, &Vars.l.exclude_from, NULL,0},
626 @@ -477,6 +487,7 @@ FN_LOCAL_STRING(lp_secrets_file, secrets_file)
627 FN_LOCAL_STRING(lp_temp_dir, temp_dir)
628 FN_LOCAL_STRING(lp_uid, uid)
630 +FN_LOCAL_INTEGER(lp_checksum_files, checksum_files)
631 FN_LOCAL_INTEGER(lp_max_connections, max_connections)
632 FN_LOCAL_INTEGER(lp_max_verbosity, max_verbosity)
633 FN_LOCAL_INTEGER(lp_syslog_facility, syslog_facility)
634 diff --git a/options.c b/options.c
637 @@ -113,6 +113,7 @@ size_t bwlimit_writemax = 0;
638 int ignore_existing = 0;
639 int ignore_non_existing = 0;
640 int need_messages_from_generator = 0;
641 +int checksum_files = CSF_IGNORE_FILES;
642 int max_delete = INT_MIN;
645 @@ -669,6 +670,7 @@ void usage(enum logcode F)
646 rprintf(F," -q, --quiet suppress non-error messages\n");
647 rprintf(F," --no-motd suppress daemon-mode MOTD (see manpage caveat)\n");
648 rprintf(F," -c, --checksum skip based on checksum, not mod-time & size\n");
649 + rprintf(F," --sumfiles=MODE use .rsyncsums to speedup --checksum mode\n");
650 rprintf(F," -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)\n");
651 rprintf(F," --no-OPTION turn off an implied OPTION (e.g. --no-D)\n");
652 rprintf(F," -r, --recursive recurse into directories\n");
653 @@ -811,7 +813,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
654 OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, OPT_HELP,
655 OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
656 OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
657 - OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG,
658 + OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG, OPT_SUMFILES,
659 OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT,
660 OPT_SERVER, OPT_REFUSED_BASE = 9000};
662 @@ -951,6 +953,7 @@ static struct poptOption long_options[] = {
663 {"checksum", 'c', POPT_ARG_VAL, &always_checksum, 1, 0, 0 },
664 {"no-checksum", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
665 {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
666 + {"sumfiles", 0, POPT_ARG_STRING, 0, OPT_SUMFILES, 0, 0 },
667 {"block-size", 'B', POPT_ARG_LONG, &block_size, 0, 0, 0 },
668 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
669 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
670 @@ -1669,6 +1672,23 @@ int parse_arguments(int *argc_p, const char ***argv_p)
675 + arg = poptGetOptArg(pc);
676 + checksum_files = 0;
677 + if (strcmp(arg, "lax") == 0)
678 + checksum_files |= CSF_LAX_MODE;
679 + else if (strcmp(arg, "strict") == 0)
680 + checksum_files |= CSF_STRICT_MODE;
681 + else if (strcmp(arg, "none") == 0)
682 + checksum_files = CSF_IGNORE_FILES;
684 + snprintf(err_buf, sizeof err_buf,
685 + "Invalid argument passed to --sumfiles (%s)\n",
692 arg = poptGetOptArg(pc);
693 parse_output_words(info_words, info_levels, arg, USER_PRIORITY);
694 @@ -1883,6 +1903,9 @@ int parse_arguments(int *argc_p, const char ***argv_p)
698 + if (!always_checksum)
699 + checksum_files = CSF_IGNORE_FILES;
701 if (write_batch && read_batch) {
702 snprintf(err_buf, sizeof err_buf,
703 "--write-batch and --read-batch can not be used together\n");
704 diff --git a/rsync.h b/rsync.h
707 @@ -771,6 +771,10 @@ extern int xattrs_ndx;
708 #define F_SUM(f) ((char*)OPT_EXTRA(f, START_BUMP(f) + HLINK_BUMP(f) \
709 + SUM_EXTRA_CNT - 1))
711 +/* These are only valid on an entry read from a checksum file. */
712 +#define F_CTIME(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT)->unum
713 +#define F_INODE(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT + 1)->unum
715 /* Some utility defines: */
716 #define F_IS_ACTIVE(f) (f)->basename[0]
717 #define F_IS_HLINKED(f) ((f)->flags & FLAG_HLINKED)
718 @@ -967,6 +971,13 @@ typedef struct {
719 char fname[1]; /* has variable size */
722 +#define CSF_ENABLE (1<<1)
723 +#define CSF_LAX (1<<2)
725 +#define CSF_IGNORE_FILES 0
726 +#define CSF_LAX_MODE (CSF_ENABLE|CSF_LAX)
727 +#define CSF_STRICT_MODE (CSF_ENABLE)
729 #include "byteorder.h"
730 #include "lib/mdigest.h"
731 #include "lib/wildmatch.h"
732 diff --git a/rsync.yo b/rsync.yo
735 @@ -337,6 +337,7 @@ to the detailed description below for a complete description. verb(
736 -q, --quiet suppress non-error messages
737 --no-motd suppress daemon-mode MOTD (see caveat)
738 -c, --checksum skip based on checksum, not mod-time & size
739 + --sumfiles=MODE use .rsyncsums to speedup --checksum mode
740 -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)
741 --no-OPTION turn off an implied OPTION (e.g. --no-D)
742 -r, --recursive recurse into directories
743 @@ -600,9 +601,9 @@ uses a "quick check" that (by default) checks if each file's size and time
744 of last modification match between the sender and receiver. This option
745 changes this to compare a 128-bit checksum for each file that has a
746 matching size. Generating the checksums means that both sides will expend
747 -a lot of disk I/O reading all the data in the files in the transfer (and
748 -this is prior to any reading that will be done to transfer changed files),
749 -so this can slow things down significantly.
750 +a lot of disk I/O reading the data in all the files in the transfer, so
751 +this can slow things down significantly (and this is prior to any reading
752 +that will be done to transfer the files that have changed).
754 The sending side generates its checksums while it is doing the file-system
755 scan that builds the list of the available files. The receiver generates
756 @@ -610,6 +611,8 @@ its checksums when it is scanning for changed files, and will checksum any
757 file that has the same size as the corresponding sender's file: files with
758 either a changed size or a changed checksum are selected for transfer.
760 +See also the bf(--sumfiles) option for a way to use cached checksum data.
762 Note that rsync always verifies that each em(transferred) file was
763 correctly reconstructed on the receiving side by checking a whole-file
764 checksum that is generated as the file is transferred, but that
765 @@ -619,6 +622,36 @@ option's before-the-transfer "Does this file need to be updated?" check.
766 For protocol 30 and beyond (first supported in 3.0.0), the checksum used is
767 MD5. For older protocols, the checksum used is MD4.
769 +dit(bf(--sumfiles=MODE)) This option tells rsync to make use of any cached
770 +checksum information it finds in per-directory .rsyncsums files when the
771 +current transfer is using the bf(--checksum) option. If the checksum data
772 +is up-to-date, it is used instead of recomputing it, saving both disk I/O
773 +and CPU time. If the checksum data is missing or outdated, the checksum is
774 +computed just as it would be if bf(--sumfiles) was not specified.
776 +The MODE value is either "lax", for relaxed checking (which compares size
777 +and mtime), "strict" (which also compares ctime and inode), or "none" to
778 +ignore any .rsyncsums files ("none" is the default). Rsync does not create
779 +or update these files, but there is a perl script in the support directory
780 +named "rsyncsums" that can be used for that.
782 +This option has no effect unless bf(--checksum, -c) was also specified. It
783 +also only affects the current side of the transfer, so if you want the
784 +remote side to parse its own .rsyncsums files, specify the option via the
785 +bf(--rsync-path) option (e.g. "--rsync-path="rsync --sumfiles=lax").
787 +To avoid transferring the system's checksum files, you can use an exclude
788 +(e.g. bf(--exclude=.rsyncsums)). To make this easier to type, you can use
789 +a popt alias. For instance, adding the following line in your ~/.popt file
790 +defines a bf(--cc) option that enables lax checksum files and excludes the
793 +verb( rsync alias --cc -c --sumfiles=lax --exclude=.rsyncsums)
795 +An rsync daemon does not allow the client to control this setting, so see
796 +the "checksum files" daemon parameter for information on how to make a
797 +daemon use cached checksum data.
799 dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick
800 way of saying you want recursion and want to preserve almost
801 everything (with -H being a notable omission).
802 diff --git a/rsyncd.conf.yo b/rsyncd.conf.yo
805 @@ -323,6 +323,17 @@ locking on this file to ensure that the max connections limit is not
806 exceeded for the modules sharing the lock file.
807 The default is tt(/var/run/rsyncd.lock).
809 +dit(bf(checksum files)) This parameter tells rsync to make use of any cached
810 +checksum information it finds in per-directory .rsyncsums files when the
811 +current transfer is using the bf(--checksum) option. The value can be set
812 +to either "lax", "strict", or "none" -- see the client's bf(--sumfiles)
813 +option for what these choices do.
815 +Note also that the client's command-line option, bf(--sumfiles), has no
816 +effect on a daemon. A daemon will only access checksum files if this
817 +config option tells it to. See also the bf(exclude) directive for a way
818 +to hide the .rsyncsums files from the user.
820 dit(bf(read only)) This parameter determines whether clients
821 will be able to upload files or not. If "read only" is true then any
822 attempted uploads will fail. If "read only" is false then uploads will
823 diff --git a/support/rsyncsums b/support/rsyncsums
826 +++ b/support/rsyncsums
832 +use Cwd qw(abs_path cwd);
836 +our $SUMS_FILE = '.rsyncsums';
838 +&Getopt::Long::Configure('bundling');
839 +&usage if !&GetOptions(
840 + 'recurse|r' => \( my $recurse_opt ),
841 + 'mode|m=s' => \( my $cmp_mode = 'strict' ),
842 + 'check|c' => \( my $check_opt ),
843 + 'verbose|v+' => \( my $verbosity = 0 ),
844 + 'help|h' => \( my $help_opt ),
846 +&usage if $help_opt || $cmp_mode !~ /^(lax|strict)$/;
848 +my $ignore_ctime_and_inode = $cmp_mode eq 'lax' ? 0 : 1;
850 +my $start_dir = cwd();
853 +@dirs = '.' unless @dirs;
862 +my $md4 = Digest::MD4->new;
863 +my $md5 = Digest::MD5->new;
866 + my $dir = shift @dirs;
868 + if (!chdir($dir)) {
869 + warn "Unable to chdir to $dir: $!\n";
872 + if (!opendir(DP, '.')) {
873 + warn "Unable to opendir $dir: $!\n";
878 + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
880 + print "$reldir ... ";
881 + print "\n" if $check_opt;
886 + if (open(FP, '<', $SUMS_FILE)) {
889 + my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
890 + $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
898 + my $update_cnt = 0;
899 + while (defined(my $fn = readdir(DP))) {
900 + next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
902 + push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
907 + my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
908 + $ctime &= 0xFFFFFFFF;
909 + $inode &= 0xFFFFFFFF;
910 + my $ref = $cache{$fn};
914 + if (defined $ref) {
916 + if ($$ref[3] == $size
917 + && $$ref[4] == $mtime
918 + && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
919 + && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
923 + if (!$update_cnt++) {
924 + print "UPDATING\n" if $verbosity;
928 + if (!open(IN, $fn)) {
929 + print STDERR "Unable to read $fn: $!\n";
930 + if (defined $ref) {
931 + delete $cache{$fn};
939 + while (sysread(IN, $_, 64*1024)) {
943 + $sum4 = $md4->hexdigest;
944 + $sum5 = $md5->hexdigest;
945 + print " $sum4 $sum5" if $verbosity > 2;
946 + print " $fn" if $verbosity > 1;
947 + my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
948 + $ctime2 &= 0xFFFFFFFF;
949 + $inode2 &= 0xFFFFFFFF;
950 + last if $size == $size2 && $mtime == $mtime2
951 + && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
957 + print " REREADING\n" if $verbosity > 1;
964 + if (!defined $ref) {
966 + } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
969 + print " OK\n" if $verbosity > 1;
972 + if ($verbosity < 2) {
973 + print $verbosity ? ' ' : "$reldir/";
979 + print "\n" if $verbosity > 1;
980 + $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime, $inode ];
986 + unshift(@dirs, sort @subdirs) if $recurse_opt;
990 + } elsif ($d_cnt == 0) {
992 + print "(removed $SUMS_FILE) " if $verbosity;
993 + unlink($SUMS_FILE);
995 + print "empty\n" if $verbosity;
996 + } elsif ($update_cnt || $d_cnt != $f_cnt) {
997 + print "UPDATING\n" if $verbosity && !$update_cnt;
998 + open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
1000 + foreach my $fn (sort keys %cache) {
1001 + my $ref = $cache{$fn};
1002 + my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
1003 + next unless $found;
1004 + printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
1008 + print "ok\n" if $verbosity;
1017 +Usage: rsyncsums [OPTIONS] [DIRS]
1020 + -r, --recurse Update $SUMS_FILE files in subdirectories too.
1021 + -m, --mode=MODE Compare entries in either "lax" or "strict" mode. Using
1022 + "lax" compares size and mtime, while "strict" additionally
1023 + compares ctime and inode. Default: strict.
1024 + -c, --check Check if the checksums are right (doesn't update).
1025 + -v, --verbose Mention what we're doing. Repeat for more info.
1026 + -h, --help Display this help message.