From 48b51d0004922cb029c55fe921f5e7df1c0bff23 Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Tue, 22 Nov 2011 08:14:01 -0800 Subject: [PATCH] make repeated --fuzzy option look into alt-dest dirs. --- generator.c | 134 ++++++++++++++++++++++++++++++++-------------------- main.c | 35 +++++++------- options.c | 17 +++++-- receiver.c | 17 ++++--- rsync.yo | 4 ++ 5 files changed, 131 insertions(+), 76 deletions(-) diff --git a/generator.c b/generator.c index 25648ce7..df690da6 100644 --- a/generator.c +++ b/generator.c @@ -733,56 +733,75 @@ static int generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy) /* Try to find a filename in the same dir as "fname" with a similar name. */ -static int find_fuzzy(struct file_struct *file, struct file_list *dirlist) +static struct file_struct *find_fuzzy(struct file_struct *file, struct file_list *dirlist_array[], uchar *fnamecmp_type_ptr) { int fname_len, fname_suf_len; const char *fname_suf, *fname = file->basename; uint32 lowest_dist = 25 << 16; /* ignore a distance greater than 25 */ - int j, lowest_j = -1; + int i, j; + struct file_struct *lowest_fp = NULL; fname_len = strlen(fname); fname_suf = find_filename_suffix(fname, fname_len, &fname_suf_len); - for (j = 0; j < dirlist->used; j++) { - struct file_struct *fp = dirlist->files[j]; - const char *suf, *name; - int len, suf_len; - uint32 dist; + /* Try to find an exact size+mtime match first. */ + for (i = 0; i < fuzzy_basis; i++) { + struct file_list *dirlist = dirlist_array[i]; - if (!S_ISREG(fp->mode) || !F_LENGTH(fp) - || fp->flags & FLAG_FILE_SENT) + if (!dirlist) continue; - name = fp->basename; + for (j = 0; j < dirlist->used; j++) { + struct file_struct *fp = dirlist->files[j]; - if (F_LENGTH(fp) == F_LENGTH(file) - && cmp_time(fp->modtime, file->modtime) == 0) { - if (DEBUG_GTE(FUZZY, 2)) { - rprintf(FINFO, - "fuzzy size/modtime match for %s\n", - name); + if (!S_ISREG(fp->mode) || !F_LENGTH(fp) || fp->flags & FLAG_FILE_SENT) + continue; + + if (F_LENGTH(fp) == F_LENGTH(file) && cmp_time(fp->modtime, file->modtime) == 0) { + if (DEBUG_GTE(FUZZY, 2)) + rprintf(FINFO, "fuzzy size/modtime match for %s\n", f_name(fp, NULL)); + *fnamecmp_type_ptr = FNAMECMP_FUZZY + i; + return fp; } - return j; + } + } - len = strlen(name); - suf = find_filename_suffix(name, len, &suf_len); + for (i = 0; i < fuzzy_basis; i++) { + struct file_list *dirlist = dirlist_array[i]; - dist = fuzzy_distance(name, len, fname, fname_len); - /* Add some extra weight to how well the suffixes match. */ - dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len) - * 10; - if (DEBUG_GTE(FUZZY, 2)) { - rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n", - name, (int)(dist>>16), (int)(dist&0xFFFF)); - } - if (dist <= lowest_dist) { - lowest_dist = dist; - lowest_j = j; + if (!dirlist) + continue; + + for (j = 0; j < dirlist->used; j++) { + struct file_struct *fp = dirlist->files[j]; + const char *suf, *name; + int len, suf_len; + uint32 dist; + + if (!S_ISREG(fp->mode) || !F_LENGTH(fp) || fp->flags & FLAG_FILE_SENT) + continue; + + name = fp->basename; + len = strlen(name); + suf = find_filename_suffix(name, len, &suf_len); + + dist = fuzzy_distance(name, len, fname, fname_len); + /* Add some extra weight to how well the suffixes match. */ + dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len) * 10; + if (DEBUG_GTE(FUZZY, 2)) { + rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n", + f_name(fp, NULL), (int)(dist>>16), (int)(dist&0xFFFF)); + } + if (dist <= lowest_dist) { + lowest_dist = dist; + lowest_fp = fp; + *fnamecmp_type_ptr = FNAMECMP_FUZZY + i; + } } } - return lowest_j; + return lowest_fp; } /* Copy a file found in our --copy-dest handling. */ @@ -1128,7 +1147,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, /* Missing dir whose contents are skipped altogether due to * --ignore-non-existing, daemon exclude, or mkdir failure. */ static struct file_struct *skip_dir = NULL; - static struct file_list *fuzzy_dirlist = NULL; + static struct file_list *fuzzy_dirlist[MAX_BASIS_DIRS+1]; static int need_fuzzy_dirlist = 0; struct file_struct *fuzzy_file = NULL; int fd = -1, f_copy = -1; @@ -1187,10 +1206,13 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, } if (dry_run > 1 || (dry_missing_dir && is_below(file, dry_missing_dir))) { + int i; parent_is_dry_missing: - if (fuzzy_dirlist) { - flist_free(fuzzy_dirlist); - fuzzy_dirlist = NULL; + for (i = 0; i < fuzzy_basis; i++) { + if (fuzzy_dirlist[i]) { + flist_free(fuzzy_dirlist[i]); + fuzzy_dirlist[i] = NULL; + } } parent_dirname = ""; statret = -1; @@ -1209,12 +1231,16 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, full_fname(dn)); } } - if (fuzzy_dirlist) { - flist_free(fuzzy_dirlist); - fuzzy_dirlist = NULL; - } - if (fuzzy_basis) + if (fuzzy_basis) { + int i; + for (i = 0; i < fuzzy_basis; i++) { + if (fuzzy_dirlist[i]) { + flist_free(fuzzy_dirlist[i]); + fuzzy_dirlist[i] = NULL; + } + } need_fuzzy_dirlist = 1; + } #ifdef SUPPORT_ACLS if (!preserve_perms) dflt_perms = default_perms_for_dir(dn); @@ -1223,8 +1249,17 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, parent_dirname = dn; if (need_fuzzy_dirlist && S_ISREG(file->mode)) { + int i; strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf); - fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES); + for (i = 0; i < fuzzy_basis; i++) { + if (i && pathjoin(fnamecmpbuf, MAXPATHLEN, basis_dir[i-1], dn) >= MAXPATHLEN) + continue; + fuzzy_dirlist[i] = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES); + if (fuzzy_dirlist[i] && fuzzy_dirlist[i]->used == 0) { + flist_free(fuzzy_dirlist[i]); + fuzzy_dirlist[i] = NULL; + } + } need_fuzzy_dirlist = 0; } @@ -1629,10 +1664,10 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, } else partialptr = NULL; - if (statret != 0 && fuzzy_dirlist) { - int j = find_fuzzy(file, fuzzy_dirlist); - if (j >= 0) { - fuzzy_file = fuzzy_dirlist->files[j]; + if (statret != 0 && fuzzy_basis) { + /* Sets fnamecmp_type to FNAMECMP_FUZZY or above. */ + fuzzy_file = find_fuzzy(file, fuzzy_dirlist, &fnamecmp_type); + if (fuzzy_file) { f_name(fuzzy_file, fnamecmpbuf); if (DEBUG_GTE(FUZZY, 1)) { rprintf(FINFO, "fuzzy basis selected for %s: %s\n", @@ -1641,7 +1676,6 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, sx.st.st_size = F_LENGTH(fuzzy_file); statret = 0; fnamecmp = fnamecmpbuf; - fnamecmp_type = FNAMECMP_FUZZY; } } @@ -1717,10 +1751,10 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, goto notify_others; } - if (fuzzy_dirlist) { - int j = flist_find(fuzzy_dirlist, file); + if (fuzzy_dirlist[0]) { + int j = flist_find(fuzzy_dirlist[0], file); if (j >= 0) /* don't use changing file as future fuzzy basis */ - fuzzy_dirlist->files[j]->flags |= FLAG_FILE_SENT; + fuzzy_dirlist[0]->files[j]->flags |= FLAG_FILE_SENT; } /* open the file */ @@ -1790,7 +1824,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, iflags |= ITEM_REPORT_CHANGE; if (fnamecmp_type != FNAMECMP_FNAME) iflags |= ITEM_BASIS_TYPE_FOLLOWS; - if (fnamecmp_type == FNAMECMP_FUZZY) + if (fnamecmp_type >= FNAMECMP_FUZZY) iflags |= ITEM_XNAME_FOLLOWS; itemize(fnamecmp, file, -1, real_ret, &real_sx, iflags, fnamecmp_type, fuzzy_file ? fuzzy_file->basename : NULL); diff --git a/main.c b/main.c index 17ba62d6..93cd50d3 100644 --- a/main.c +++ b/main.c @@ -76,6 +76,7 @@ extern size_t bwlimit_writemax; extern unsigned int module_dirlen; extern BOOL flist_receiving_enabled; extern BOOL shutting_down; +extern int basis_dir_cnt; extern struct stats stats; extern char *stdout_format; extern char *logfile_format; @@ -705,33 +706,35 @@ static char *get_local_name(struct file_list *flist, char *dest_path) static void check_alt_basis_dirs(void) { STRUCT_STAT st; - char **dir_p, *slash = strrchr(curr_dir, '/'); - - for (dir_p = basis_dir; *dir_p; dir_p++) { - if (dry_run > 1 && **dir_p != '/') { - int len = curr_dir_len + 1 + strlen(*dir_p) + 1; + char *slash = strrchr(curr_dir, '/'); + int j; + + for (j = 0; j < basis_dir_cnt; j++) { + char *bdir = basis_dir[j]; + int bd_len = strlen(bdir); + if (bd_len > 1 && bdir[bd_len-1] == '/') + bdir[--bd_len] = '\0'; + if (dry_run > 1 && *bdir != '/') { + int len = curr_dir_len + 1 + bd_len + 1; char *new = new_array(char, len); if (!new) out_of_memory("check_alt_basis_dirs"); - if (slash && strncmp(*dir_p, "../", 3) == 0) { + if (slash && strncmp(bdir, "../", 3) == 0) { /* We want to remove only one leading "../" prefix for * the directory we couldn't create in dry-run mode: * this ensures that any other ".." references get * evaluated the same as they would for a live copy. */ *slash = '\0'; - pathjoin(new, len, curr_dir, *dir_p + 3); + pathjoin(new, len, curr_dir, bdir + 3); *slash = '/'; } else - pathjoin(new, len, curr_dir, *dir_p); - *dir_p = new; - } - if (do_stat(*dir_p, &st) < 0) { - rprintf(FWARNING, "%s arg does not exist: %s\n", - dest_option, *dir_p); - } else if (!S_ISDIR(st.st_mode)) { - rprintf(FWARNING, "%s arg is not a dir: %s\n", - dest_option, *dir_p); + pathjoin(new, len, curr_dir, bdir); + basis_dir[j] = bdir = new; } + if (do_stat(bdir, &st) < 0) + rprintf(FWARNING, "%s arg does not exist: %s\n", dest_option, bdir); + else if (!S_ISDIR(st.st_mode)) + rprintf(FWARNING, "%s arg is not a dir: %s\n", dest_option, bdir); } } diff --git a/options.c b/options.c index e8db07cf..9e95c86a 100644 --- a/options.c +++ b/options.c @@ -955,7 +955,7 @@ static struct poptOption long_options[] = { {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 }, - {"fuzzy", 'y', POPT_ARG_VAL, &fuzzy_basis, 1, 0, 0 }, + {"fuzzy", 'y', POPT_ARG_NONE, 0, 'y', 0, 0 }, {"no-fuzzy", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 }, {"no-y", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 }, {"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 }, @@ -1500,6 +1500,10 @@ int parse_arguments(int *argc_p, const char ***argv_p) verbose++; break; + case 'y': + fuzzy_basis++; + break; + case 'q': quiet++; break; @@ -1845,6 +1849,9 @@ int parse_arguments(int *argc_p, const char ***argv_p) } #endif + if (fuzzy_basis > 1) + fuzzy_basis = basis_dir_cnt + 1; + if (protect_args == 1 && am_server) return 1; @@ -2342,6 +2349,11 @@ void server_options(char **args, int *argc_p) argstr[x++] = 'O'; if (omit_link_times) argstr[x++] = 'J'; + if (fuzzy_basis) { + argstr[x++] = 'y'; + if (fuzzy_basis > 1) + argstr[x++] = 'y'; + } } else { if (copy_links) argstr[x++] = 'L'; @@ -2680,9 +2692,6 @@ void server_options(char **args, int *argc_p) if (relative_paths && !implied_dirs && (!am_sender || protocol_version >= 30)) args[ac++] = "--no-implied-dirs"; - if (fuzzy_basis && am_sender) - args[ac++] = "--fuzzy"; - if (remove_source_files == 1) args[ac++] = "--remove-source-files"; else if (remove_source_files) diff --git a/receiver.c b/receiver.c index 1819830a..3ab893d0 100644 --- a/receiver.c +++ b/receiver.c @@ -699,21 +699,26 @@ int recv_files(int f_in, int f_out, char *local_name) break; case FNAMECMP_FUZZY: if (file->dirname) { - pathjoin(fnamecmpbuf, MAXPATHLEN, - file->dirname, xname); + pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, file->dirname, xname); fnamecmp = fnamecmpbuf; } else fnamecmp = xname; break; default: - if (fnamecmp_type >= basis_dir_cnt) { + if (fnamecmp_type > FNAMECMP_FUZZY && fnamecmp_type-FNAMECMP_FUZZY <= basis_dir_cnt) { + fnamecmp_type -= FNAMECMP_FUZZY + 1; + if (file->dirname) { + stringjoin(fnamecmpbuf, sizeof fnamecmpbuf, + basis_dir[fnamecmp_type], "/", file->dirname, "/", xname, NULL); + } else + pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, basis_dir[fnamecmp_type], xname); + } else if (fnamecmp_type >= basis_dir_cnt) { rprintf(FERROR, "invalid basis_dir index: %d.\n", fnamecmp_type); exit_cleanup(RERR_PROTOCOL); - } - pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, - basis_dir[fnamecmp_type], fname); + } else + pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, basis_dir[fnamecmp_type], fname); fnamecmp = fnamecmpbuf; break; } diff --git a/rsync.yo b/rsync.yo index 3c0bfc00..43f264d2 100644 --- a/rsync.yo +++ b/rsync.yo @@ -1748,6 +1748,10 @@ looks in the same directory as the destination file for either a file that has an identical size and modified-time, or a similarly-named file. If found, rsync uses the fuzzy basis file to try to speed up the transfer. +If the option is repeated, the fuzzy scan will also be done in any alternate +destination directories that are specified via bf(--compare-dest), +bf(--copy-dest), or bf(--link-dest). + Note that the use of the bf(--delete) option might get rid of any potential fuzzy-match files, so either use bf(--delete-after) or specify some filename exclusions if you need to prevent this. -- 2.34.1