make repeated --fuzzy option look into alt-dest dirs.
authorWayne Davison <wayned@samba.org>
Tue, 22 Nov 2011 16:14:01 +0000 (08:14 -0800)
committerWayne Davison <wayned@samba.org>
Wed, 23 Nov 2011 20:29:25 +0000 (12:29 -0800)
generator.c
main.c
options.c
receiver.c
rsync.yo

index 25648ce793750e89aea151b923625d4109c16a00..df690da66ba8e2402e8a94abc6888b69c8093022 100644 (file)
@@ -733,56 +733,75 @@ static int generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy)
 
 
 /* Try to find a filename in the same dir as "fname" with a similar name. */
-static int find_fuzzy(struct file_struct *file, struct file_list *dirlist)
+static struct file_struct *find_fuzzy(struct file_struct *file, struct file_list *dirlist_array[], uchar *fnamecmp_type_ptr)
 {
        int fname_len, fname_suf_len;
        const char *fname_suf, *fname = file->basename;
        uint32 lowest_dist = 25 << 16; /* ignore a distance greater than 25 */
-       int j, lowest_j = -1;
+       int i, j;
+       struct file_struct *lowest_fp = NULL;
 
        fname_len = strlen(fname);
        fname_suf = find_filename_suffix(fname, fname_len, &fname_suf_len);
 
-       for (j = 0; j < dirlist->used; j++) {
-               struct file_struct *fp = dirlist->files[j];
-               const char *suf, *name;
-               int len, suf_len;
-               uint32 dist;
+       /* Try to find an exact size+mtime match first. */
+       for (i = 0; i < fuzzy_basis; i++) {
+               struct file_list *dirlist = dirlist_array[i];
 
-               if (!S_ISREG(fp->mode) || !F_LENGTH(fp)
-                || fp->flags & FLAG_FILE_SENT)
+               if (!dirlist)
                        continue;
 
-               name = fp->basename;
+               for (j = 0; j < dirlist->used; j++) {
+                       struct file_struct *fp = dirlist->files[j];
 
-               if (F_LENGTH(fp) == F_LENGTH(file)
-                   && cmp_time(fp->modtime, file->modtime) == 0) {
-                       if (DEBUG_GTE(FUZZY, 2)) {
-                               rprintf(FINFO,
-                                       "fuzzy size/modtime match for %s\n",
-                                       name);
+                       if (!S_ISREG(fp->mode) || !F_LENGTH(fp) || fp->flags & FLAG_FILE_SENT)
+                               continue;
+
+                       if (F_LENGTH(fp) == F_LENGTH(file) && cmp_time(fp->modtime, file->modtime) == 0) {
+                               if (DEBUG_GTE(FUZZY, 2))
+                                       rprintf(FINFO, "fuzzy size/modtime match for %s\n", f_name(fp, NULL));
+                               *fnamecmp_type_ptr = FNAMECMP_FUZZY + i;
+                               return fp;
                        }
-                       return j;
+
                }
+       }
 
-               len = strlen(name);
-               suf = find_filename_suffix(name, len, &suf_len);
+       for (i = 0; i < fuzzy_basis; i++) {
+               struct file_list *dirlist = dirlist_array[i];
 
-               dist = fuzzy_distance(name, len, fname, fname_len);
-               /* Add some extra weight to how well the suffixes match. */
-               dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len)
-                     * 10;
-               if (DEBUG_GTE(FUZZY, 2)) {
-                       rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n",
-                               name, (int)(dist>>16), (int)(dist&0xFFFF));
-               }
-               if (dist <= lowest_dist) {
-                       lowest_dist = dist;
-                       lowest_j = j;
+               if (!dirlist)
+                       continue;
+
+               for (j = 0; j < dirlist->used; j++) {
+                       struct file_struct *fp = dirlist->files[j];
+                       const char *suf, *name;
+                       int len, suf_len;
+                       uint32 dist;
+
+                       if (!S_ISREG(fp->mode) || !F_LENGTH(fp) || fp->flags & FLAG_FILE_SENT)
+                               continue;
+
+                       name = fp->basename;
+                       len = strlen(name);
+                       suf = find_filename_suffix(name, len, &suf_len);
+
+                       dist = fuzzy_distance(name, len, fname, fname_len);
+                       /* Add some extra weight to how well the suffixes match. */
+                       dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len) * 10;
+                       if (DEBUG_GTE(FUZZY, 2)) {
+                               rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n",
+                                       f_name(fp, NULL), (int)(dist>>16), (int)(dist&0xFFFF));
+                       }
+                       if (dist <= lowest_dist) {
+                               lowest_dist = dist;
+                               lowest_fp = fp;
+                               *fnamecmp_type_ptr = FNAMECMP_FUZZY + i;
+                       }
                }
        }
 
-       return lowest_j;
+       return lowest_fp;
 }
 
 /* Copy a file found in our --copy-dest handling. */
@@ -1128,7 +1147,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
        /* Missing dir whose contents are skipped altogether due to
         * --ignore-non-existing, daemon exclude, or mkdir failure. */
        static struct file_struct *skip_dir = NULL;
-       static struct file_list *fuzzy_dirlist = NULL;
+       static struct file_list *fuzzy_dirlist[MAX_BASIS_DIRS+1];
        static int need_fuzzy_dirlist = 0;
        struct file_struct *fuzzy_file = NULL;
        int fd = -1, f_copy = -1;
@@ -1187,10 +1206,13 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
        }
 
        if (dry_run > 1 || (dry_missing_dir && is_below(file, dry_missing_dir))) {
+               int i;
          parent_is_dry_missing:
-               if (fuzzy_dirlist) {
-                       flist_free(fuzzy_dirlist);
-                       fuzzy_dirlist = NULL;
+               for (i = 0; i < fuzzy_basis; i++) {
+                       if (fuzzy_dirlist[i]) {
+                               flist_free(fuzzy_dirlist[i]);
+                               fuzzy_dirlist[i] = NULL;
+                       }
                }
                parent_dirname = "";
                statret = -1;
@@ -1209,12 +1231,16 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                                                full_fname(dn));
                                }
                        }
-                       if (fuzzy_dirlist) {
-                               flist_free(fuzzy_dirlist);
-                               fuzzy_dirlist = NULL;
-                       }
-                       if (fuzzy_basis)
+                       if (fuzzy_basis) {
+                               int i;
+                               for (i = 0; i < fuzzy_basis; i++) {
+                                       if (fuzzy_dirlist[i]) {
+                                               flist_free(fuzzy_dirlist[i]);
+                                               fuzzy_dirlist[i] = NULL;
+                                       }
+                               }
                                need_fuzzy_dirlist = 1;
+                       }
 #ifdef SUPPORT_ACLS
                        if (!preserve_perms)
                                dflt_perms = default_perms_for_dir(dn);
@@ -1223,8 +1249,17 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                parent_dirname = dn;
 
                if (need_fuzzy_dirlist && S_ISREG(file->mode)) {
+                       int i;
                        strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
-                       fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES);
+                       for (i = 0; i < fuzzy_basis; i++) {
+                               if (i && pathjoin(fnamecmpbuf, MAXPATHLEN, basis_dir[i-1], dn) >= MAXPATHLEN)
+                                       continue;
+                               fuzzy_dirlist[i] = get_dirlist(fnamecmpbuf, -1, GDL_IGNORE_FILTER_RULES);
+                               if (fuzzy_dirlist[i] && fuzzy_dirlist[i]->used == 0) {
+                                       flist_free(fuzzy_dirlist[i]);
+                                       fuzzy_dirlist[i] = NULL;
+                               }
+                       }
                        need_fuzzy_dirlist = 0;
                }
 
@@ -1629,10 +1664,10 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
        } else
                partialptr = NULL;
 
-       if (statret != 0 && fuzzy_dirlist) {
-               int j = find_fuzzy(file, fuzzy_dirlist);
-               if (j >= 0) {
-                       fuzzy_file = fuzzy_dirlist->files[j];
+       if (statret != 0 && fuzzy_basis) {
+               /* Sets fnamecmp_type to FNAMECMP_FUZZY or above. */
+               fuzzy_file = find_fuzzy(file, fuzzy_dirlist, &fnamecmp_type);
+               if (fuzzy_file) {
                        f_name(fuzzy_file, fnamecmpbuf);
                        if (DEBUG_GTE(FUZZY, 1)) {
                                rprintf(FINFO, "fuzzy basis selected for %s: %s\n",
@@ -1641,7 +1676,6 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                        sx.st.st_size = F_LENGTH(fuzzy_file);
                        statret = 0;
                        fnamecmp = fnamecmpbuf;
-                       fnamecmp_type = FNAMECMP_FUZZY;
                }
        }
 
@@ -1717,10 +1751,10 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                goto notify_others;
        }
 
-       if (fuzzy_dirlist) {
-               int j = flist_find(fuzzy_dirlist, file);
+       if (fuzzy_dirlist[0]) {
+               int j = flist_find(fuzzy_dirlist[0], file);
                if (j >= 0) /* don't use changing file as future fuzzy basis */
-                       fuzzy_dirlist->files[j]->flags |= FLAG_FILE_SENT;
+                       fuzzy_dirlist[0]->files[j]->flags |= FLAG_FILE_SENT;
        }
 
        /* open the file */
@@ -1790,7 +1824,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
                        iflags |= ITEM_REPORT_CHANGE;
                if (fnamecmp_type != FNAMECMP_FNAME)
                        iflags |= ITEM_BASIS_TYPE_FOLLOWS;
-               if (fnamecmp_type == FNAMECMP_FUZZY)
+               if (fnamecmp_type >= FNAMECMP_FUZZY)
                        iflags |= ITEM_XNAME_FOLLOWS;
                itemize(fnamecmp, file, -1, real_ret, &real_sx, iflags, fnamecmp_type,
                        fuzzy_file ? fuzzy_file->basename : NULL);
diff --git a/main.c b/main.c
index 17ba62d65e975784df52565b980439e7988912bb..93cd50d3938df666e004dfedc48af54bcb2d0060 100644 (file)
--- a/main.c
+++ b/main.c
@@ -76,6 +76,7 @@ extern size_t bwlimit_writemax;
 extern unsigned int module_dirlen;
 extern BOOL flist_receiving_enabled;
 extern BOOL shutting_down;
+extern int basis_dir_cnt;
 extern struct stats stats;
 extern char *stdout_format;
 extern char *logfile_format;
@@ -705,33 +706,35 @@ static char *get_local_name(struct file_list *flist, char *dest_path)
 static void check_alt_basis_dirs(void)
 {
        STRUCT_STAT st;
-       char **dir_p, *slash = strrchr(curr_dir, '/');
-
-       for (dir_p = basis_dir; *dir_p; dir_p++) {
-               if (dry_run > 1 && **dir_p != '/') {
-                       int len = curr_dir_len + 1 + strlen(*dir_p) + 1;
+       char *slash = strrchr(curr_dir, '/');
+       int j;
+
+       for (j = 0; j < basis_dir_cnt; j++) {
+               char *bdir = basis_dir[j];
+               int bd_len = strlen(bdir);
+               if (bd_len > 1 && bdir[bd_len-1] == '/')
+                       bdir[--bd_len] = '\0';
+               if (dry_run > 1 && *bdir != '/') {
+                       int len = curr_dir_len + 1 + bd_len + 1;
                        char *new = new_array(char, len);
                        if (!new)
                                out_of_memory("check_alt_basis_dirs");
-                       if (slash && strncmp(*dir_p, "../", 3) == 0) {
+                       if (slash && strncmp(bdir, "../", 3) == 0) {
                            /* We want to remove only one leading "../" prefix for
                             * the directory we couldn't create in dry-run mode:
                             * this ensures that any other ".." references get
                             * evaluated the same as they would for a live copy. */
                            *slash = '\0';
-                           pathjoin(new, len, curr_dir, *dir_p + 3);
+                           pathjoin(new, len, curr_dir, bdir + 3);
                            *slash = '/';
                        } else
-                           pathjoin(new, len, curr_dir, *dir_p);
-                       *dir_p = new;
-               }
-               if (do_stat(*dir_p, &st) < 0) {
-                       rprintf(FWARNING, "%s arg does not exist: %s\n",
-                               dest_option, *dir_p);
-               } else if (!S_ISDIR(st.st_mode)) {
-                       rprintf(FWARNING, "%s arg is not a dir: %s\n",
-                               dest_option, *dir_p);
+                           pathjoin(new, len, curr_dir, bdir);
+                       basis_dir[j] = bdir = new;
                }
+               if (do_stat(bdir, &st) < 0)
+                       rprintf(FWARNING, "%s arg does not exist: %s\n", dest_option, bdir);
+               else if (!S_ISDIR(st.st_mode))
+                       rprintf(FWARNING, "%s arg is not a dir: %s\n", dest_option, bdir);
        }
 }
 
index e8db07cf515cb0ccf5231db84f2e70288c14ccbf..9e95c86ac049ad5f33a6e365764371b75f45a433 100644 (file)
--- a/options.c
+++ b/options.c
@@ -955,7 +955,7 @@ static struct poptOption long_options[] = {
   {"compare-dest",     0,  POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
   {"copy-dest",        0,  POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
   {"link-dest",        0,  POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
-  {"fuzzy",           'y', POPT_ARG_VAL,    &fuzzy_basis, 1, 0, 0 },
+  {"fuzzy",           'y', POPT_ARG_NONE,   0, 'y', 0, 0 },
   {"no-fuzzy",         0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
   {"no-y",             0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
   {"compress",        'z', POPT_ARG_NONE,   0, 'z', 0, 0 },
@@ -1500,6 +1500,10 @@ int parse_arguments(int *argc_p, const char ***argv_p)
                        verbose++;
                        break;
 
+               case 'y':
+                       fuzzy_basis++;
+                       break;
+
                case 'q':
                        quiet++;
                        break;
@@ -1845,6 +1849,9 @@ int parse_arguments(int *argc_p, const char ***argv_p)
        }
 #endif
 
+       if (fuzzy_basis > 1)
+               fuzzy_basis = basis_dir_cnt + 1;
+
        if (protect_args == 1 && am_server)
                return 1;
 
@@ -2342,6 +2349,11 @@ void server_options(char **args, int *argc_p)
                        argstr[x++] = 'O';
                if (omit_link_times)
                        argstr[x++] = 'J';
+               if (fuzzy_basis) {
+                       argstr[x++] = 'y';
+                       if (fuzzy_basis > 1)
+                               argstr[x++] = 'y';
+               }
        } else {
                if (copy_links)
                        argstr[x++] = 'L';
@@ -2680,9 +2692,6 @@ void server_options(char **args, int *argc_p)
        if (relative_paths && !implied_dirs && (!am_sender || protocol_version >= 30))
                args[ac++] = "--no-implied-dirs";
 
-       if (fuzzy_basis && am_sender)
-               args[ac++] = "--fuzzy";
-
        if (remove_source_files == 1)
                args[ac++] = "--remove-source-files";
        else if (remove_source_files)
index 1819830a627c51d2f586e671d67d21216b9e81dd..3ab893d070e8ff9432b71b9831ed7915f3560156 100644 (file)
@@ -699,21 +699,26 @@ int recv_files(int f_in, int f_out, char *local_name)
                                break;
                        case FNAMECMP_FUZZY:
                                if (file->dirname) {
-                                       pathjoin(fnamecmpbuf, MAXPATHLEN,
-                                                file->dirname, xname);
+                                       pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, file->dirname, xname);
                                        fnamecmp = fnamecmpbuf;
                                } else
                                        fnamecmp = xname;
                                break;
                        default:
-                               if (fnamecmp_type >= basis_dir_cnt) {
+                               if (fnamecmp_type > FNAMECMP_FUZZY && fnamecmp_type-FNAMECMP_FUZZY <= basis_dir_cnt) {
+                                       fnamecmp_type -= FNAMECMP_FUZZY + 1;
+                                       if (file->dirname) {
+                                               stringjoin(fnamecmpbuf, sizeof fnamecmpbuf,
+                                                          basis_dir[fnamecmp_type], "/", file->dirname, "/", xname, NULL);
+                                       } else
+                                               pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, basis_dir[fnamecmp_type], xname);
+                               } else if (fnamecmp_type >= basis_dir_cnt) {
                                        rprintf(FERROR,
                                                "invalid basis_dir index: %d.\n",
                                                fnamecmp_type);
                                        exit_cleanup(RERR_PROTOCOL);
-                               }
-                               pathjoin(fnamecmpbuf, sizeof fnamecmpbuf,
-                                        basis_dir[fnamecmp_type], fname);
+                               } else
+                                       pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, basis_dir[fnamecmp_type], fname);
                                fnamecmp = fnamecmpbuf;
                                break;
                        }
index 3c0bfc00fb4c6fbd7932b0b04bc6920733c65c71..43f264d2c9a67ab408619e62abb3991394b8641c 100644 (file)
--- a/rsync.yo
+++ b/rsync.yo
@@ -1748,6 +1748,10 @@ looks in the same directory as the destination file for either a file that
 has an identical size and modified-time, or a similarly-named file.  If
 found, rsync uses the fuzzy basis file to try to speed up the transfer.
 
+If the option is repeated, the fuzzy scan will also be done in any alternate
+destination directories that are specified via bf(--compare-dest),
+bf(--copy-dest), or bf(--link-dest).
+
 Note that the use of the bf(--delete) option might get rid of any potential
 fuzzy-match files, so either use bf(--delete-after) or specify some
 filename exclusions if you need to prevent this.