The patches for 3.0.9.
[rsync.git/patches.git] / link-by-hash.diff
1 Jason M. Felice wrote:
2
3 This patch adds the --link-by-hash=DIR option, which hard links received
4 files in a link farm arranged by MD4 file hash.  The result is that the system
5 will only store one copy of the unique contents of each file, regardless of
6 the file's name.
7
8 To use this patch, run these commands for a successful build:
9
10     patch -p1 <patches/link-by-hash.diff
11     ./prepare-source
12     ./configure                         (optional if already run)
13     make
14
15 based-on: 40afd365cc8ca968fd16e161d24df5b8a8a520cc
16 diff --git a/Makefile.in b/Makefile.in
17 --- a/Makefile.in
18 +++ b/Makefile.in
19 @@ -36,7 +36,7 @@ OBJS1=flist.o rsync.o generator.o receiver.o cleanup.o sender.o exclude.o \
20         util.o main.o checksum.o match.o syscall.o log.o backup.o
21  OBJS2=options.o io.o compat.o hlink.o token.o uidlist.o socket.o hashtable.o \
22         fileio.o batch.o clientname.o chmod.o acls.o xattrs.o
23 -OBJS3=progress.o pipe.o
24 +OBJS3=progress.o pipe.o hashlink.o
25  DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
26  popt_OBJS=popt/findme.o  popt/popt.o  popt/poptconfig.o \
27         popt/popthelp.o popt/poptparse.o
28 diff --git a/flist.c b/flist.c
29 --- a/flist.c
30 +++ b/flist.c
31 @@ -70,6 +70,7 @@ extern int unsort_ndx;
32  extern uid_t our_uid;
33  extern struct stats stats;
34  extern char *filesfrom_host;
35 +extern char *link_by_hash_dir;
36  
37  extern char curr_dir[MAXPATHLEN];
38  
39 @@ -854,7 +855,7 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x
40                 extra_len += EXTRA_LEN;
41  #endif
42  
43 -       if (always_checksum && S_ISREG(mode))
44 +       if ((always_checksum || link_by_hash_dir) && S_ISREG(mode))
45                 extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
46  
47  #if SIZEOF_INT64 >= 8
48 diff --git a/hashlink.c b/hashlink.c
49 new file mode 100644
50 --- /dev/null
51 +++ b/hashlink.c
52 @@ -0,0 +1,340 @@
53 +/*
54 +   Copyright (C) Cronosys, LLC 2004
55 +
56 +   This program is free software; you can redistribute it and/or modify
57 +   it under the terms of the GNU General Public License as published by
58 +   the Free Software Foundation; either version 2 of the License, or
59 +   (at your option) any later version.
60 +
61 +   This program is distributed in the hope that it will be useful,
62 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
63 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
64 +   GNU General Public License for more details.
65 +
66 +   You should have received a copy of the GNU General Public License
67 +   along with this program; if not, write to the Free Software
68 +   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
69 +*/
70 +
71 +/* This file contains code used by the --link-by-hash option. */
72 +
73 +#include "rsync.h"
74 +
75 +extern char *link_by_hash_dir;
76 +
77 +#ifdef HAVE_LINK
78 +
79 +char *make_hash_name(struct file_struct *file)
80 +{
81 +       char hash[4*2 + 1 + 12*2 + 1], *dst;
82 +       uchar c, *src = (uchar*)F_SUM(file);
83 +       int i;
84 +
85 +       for (dst = hash, i = 0; i < 4; i++, src++) {
86 +               c = *src >> 4;
87 +               *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0');
88 +               c = *src & 0x0f;
89 +               *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0');
90 +       }
91 +       *dst++ = '/';
92 +       for (i = 0; i < 12; i++, src++) {
93 +               c = *src >> 4;
94 +               *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0');
95 +               c = *src & 0x0f;
96 +               *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0');
97 +       }
98 +       *dst = '\0';
99 +
100 +       if (asprintf(&dst, "%s/%s", link_by_hash_dir, hash) < 0)
101 +               out_of_memory("make_hash_name");
102 +
103 +       return dst;
104 +}
105 +
106 +
107 +void kill_hashfile(struct hashfile_struct *hashfile)
108 +{
109 +       if (!hashfile)
110 +               return;
111 +       free(hashfile->name);
112 +       close(hashfile->fd);
113 +       free(hashfile);
114 +}
115 +
116 +
117 +void kill_hashfiles(struct hashfile_struct *hashfiles)
118 +{
119 +       struct hashfile_struct *iter, *next;
120 +       if ((iter = hashfiles) != NULL) {
121 +               do {
122 +                       next = iter->next;
123 +                       kill_hashfile(iter);
124 +                       iter = next;
125 +               } while (iter != hashfiles);
126 +       }
127 +}
128 +
129 +
130 +struct hashfile_struct *find_hashfiles(char *hashname, int64 size, long *fnbr)
131 +{
132 +       DIR *d;
133 +       struct dirent *di;
134 +       struct hashfile_struct *hashfiles = NULL, *hashfile;
135 +       STRUCT_STAT st;
136 +       long this_fnbr;
137 +
138 +       *fnbr = 0;
139 +
140 +       /* Build a list of potential candidates and open
141 +        * them. */
142 +       if ((d = opendir(hashname)) == NULL) {
143 +               rsyserr(FERROR, errno, "opendir failed: \"%s\"", hashname);
144 +               free(hashname);
145 +               return NULL;
146 +       }
147 +       while ((di = readdir(d)) != NULL) {
148 +               if (!strcmp(di->d_name,".") || !strcmp(di->d_name,"..")) {
149 +                       continue;
150 +               }
151 +
152 +               /* We need to have the largest fnbr in case we need to store
153 +                * a new file. */
154 +               this_fnbr = atol(di->d_name);
155 +               if (this_fnbr > *fnbr)
156 +                       *fnbr = this_fnbr;
157 +
158 +               hashfile = new_array(struct hashfile_struct, 1);
159 +               if (asprintf(&hashfile->name,"%s/%s",hashname, di->d_name) < 0)
160 +                       out_of_memory("find_hashfiles");
161 +               if (do_stat(hashfile->name,&st) == -1) {
162 +                       rsyserr(FERROR, errno, "stat failed: %s", hashfile->name);
163 +                       kill_hashfile(hashfile);
164 +                       continue;
165 +               }
166 +               if (st.st_size != size) {
167 +                       kill_hashfile(hashfile);
168 +                       continue;
169 +               }
170 +               hashfile->nlink = st.st_nlink;
171 +               hashfile->fd = open(hashfile->name,O_RDONLY|O_BINARY);
172 +               if (hashfile->fd == -1) {
173 +                       rsyserr(FERROR, errno, "open failed: %s", hashfile->name);
174 +                       kill_hashfile(hashfile);
175 +                       continue;
176 +               }
177 +               if (hashfiles == NULL)
178 +                       hashfiles = hashfile->next = hashfile->prev = hashfile;
179 +               else {
180 +                       hashfile->next = hashfiles;
181 +                       hashfile->prev = hashfiles->prev;
182 +                       hashfile->next->prev = hashfile;
183 +                       hashfile->prev->next = hashfile;
184 +               }
185 +       }
186 +       closedir(d);
187 +
188 +       return hashfiles;
189 +}
190 +
191 +
192 +struct hashfile_struct *compare_hashfiles(int fd,struct hashfile_struct *files)
193 +{
194 +       int amt, hamt;
195 +       char buffer[BUFSIZ], cmpbuffer[BUFSIZ];
196 +       struct hashfile_struct *iter, *next, *best;
197 +       uint32 nlink;
198 +
199 +       if (!files)
200 +               return NULL;
201 +
202 +       iter = files; /* in case files are 0 bytes */
203 +       while ((amt = read(fd, buffer, BUFSIZ)) > 0) {
204 +               iter = files;
205 +               do {
206 +                       /* Icky bit to resync when we steal the first node. */
207 +                       if (!files)
208 +                               files = iter;
209 +
210 +                       next = iter->next;
211 +
212 +                       hamt = read(iter->fd, cmpbuffer, BUFSIZ);
213 +                       if (amt != hamt || memcmp(buffer, cmpbuffer, amt)) {
214 +                               if (iter == files) {
215 +                                       files = files->prev;
216 +                               }
217 +                               if (iter->next == iter) {
218 +                                       files = next = NULL;
219 +                               } else {
220 +                                       next = iter->next;
221 +                                       if (iter == files) {
222 +                                               /* So we know to resync */
223 +                                               files = NULL;
224 +                                       }
225 +                               }
226 +                               iter->next->prev = iter->prev;
227 +                               iter->prev->next = iter->next;
228 +                               kill_hashfile(iter);
229 +                       }
230 +
231 +                       iter = next;
232 +               } while (iter != files);
233 +
234 +               if (iter == NULL && files == NULL) {
235 +                       /* There are no matches. */
236 +                       return NULL;
237 +               }
238 +       }
239 +
240 +       if (amt == -1) {
241 +               rsyserr(FERROR, errno, "read failed in compare_hashfiles()");
242 +               kill_hashfiles(files);
243 +               return NULL;
244 +       }
245 +
246 +       /* If we only have one file left, use it. */
247 +       if (files == files->next) {
248 +               return files;
249 +       }
250 +
251 +       /* All files which remain in the list are identical and should have
252 +        * the same size.  We pick the one with the lowest link count (we
253 +        * may have rolled over because we hit the maximum link count for
254 +        * the filesystem). */
255 +       best = iter = files;
256 +       nlink = iter->nlink;
257 +       do {
258 +               if (iter->nlink < nlink) {
259 +                       nlink = iter->nlink;
260 +                       best = iter;
261 +               }
262 +               iter = iter->next;
263 +       } while (iter != files);
264 +
265 +       best->next->prev = best->prev;
266 +       best->prev->next = best->next;
267 +       if (files == best)
268 +               files = files->next;
269 +       kill_hashfiles(files);
270 +       return best;
271 +}
272 +
273 +
274 +int link_by_hash(const char *fnametmp, const char *fname, struct file_struct *file)
275 +{
276 +       STRUCT_STAT st;
277 +       char *hashname = make_hash_name(file);
278 +       int first = 0, rc;
279 +       char *linkname;
280 +       long last_fnbr;
281 +
282 +       if (F_LENGTH(file) == 0)
283 +               return robust_rename(fnametmp, fname, NULL, 0644);
284 +
285 +       if (do_stat(hashname, &st) == -1) {
286 +               char *dirname;
287 +
288 +               /* Directory does not exist. */
289 +               dirname = strdup(hashname);
290 +               *strrchr(dirname,'/') = 0;
291 +               if (do_mkdir(dirname, 0755) == -1 && errno != EEXIST) {
292 +                       rsyserr(FERROR, errno, "mkdir failed: %s", dirname);
293 +                       free(hashname);
294 +                       free(dirname);
295 +                       return robust_rename(fnametmp, fname, NULL, 0644);
296 +               }
297 +               free(dirname);
298 +
299 +               if (do_mkdir(hashname, 0755) == -1 && errno != EEXIST) {
300 +                       rsyserr(FERROR, errno, "mkdir failed: %s", hashname);
301 +                       free(hashname);
302 +                       return robust_rename(fnametmp, fname, NULL, 0644);
303 +               }
304 +
305 +               first = 1;
306 +               if (asprintf(&linkname,"%s/0",hashname) < 0)
307 +                       out_of_memory("link_by_hash");
308 +               rprintf(FINFO, "(1) linkname = %s\n", linkname);
309 +       } else {
310 +               struct hashfile_struct *hashfiles, *hashfile;
311 +
312 +               if (do_stat(fnametmp,&st) == -1) {
313 +                       rsyserr(FERROR, errno, "stat failed: %s", fname);
314 +                       return -1;
315 +               }
316 +               hashfiles = find_hashfiles(hashname, st.st_size, &last_fnbr);
317 +
318 +               if (hashfiles == NULL) {
319 +                       first = 1;
320 +                       if (asprintf(&linkname,"%s/0",hashname) < 0)
321 +                               out_of_memory("link_by_hash");
322 +                       rprintf(FINFO, "(2) linkname = %s\n", linkname);
323 +               } else {
324 +                       int fd;
325 +                       /* Search for one identical to us. */
326 +                       if ((fd = open(fnametmp,O_RDONLY|O_BINARY)) == -1) {
327 +                               rsyserr(FERROR, errno, "open failed: %s", fnametmp);
328 +                               kill_hashfiles(hashfiles);
329 +                               return -1;
330 +                       }
331 +                       hashfile = compare_hashfiles(fd, hashfiles);
332 +                       hashfiles = NULL;
333 +                       close(fd);
334 +
335 +                       if (hashfile) {
336 +                               first = 0;
337 +                               linkname = strdup(hashfile->name);
338 +                               rprintf(FINFO, "(3) linkname = %s\n", linkname);
339 +                               kill_hashfile(hashfile);
340 +                       } else {
341 +                               first = 1;
342 +                               if (asprintf(&linkname, "%s/%ld", hashname, last_fnbr + 1) < 0)
343 +                                       out_of_memory("link_by_hash");
344 +                               rprintf(FINFO, "(4) linkname = %s\n", linkname);
345 +                       }
346 +               }
347 +       }
348 +
349 +       if (!first) {
350 +               rprintf(FINFO, "link-by-hash (existing): \"%s\" -> %s\n",
351 +                               linkname, full_fname(fname));
352 +               robust_unlink(fname);
353 +               rc = do_link(linkname, fname);
354 +               if (rc == -1) {
355 +                       if (errno == EMLINK) {
356 +                               first = 1;
357 +                               free(linkname);
358 +                               if (asprintf(&linkname,"%s/%ld",hashname, last_fnbr + 1) < 0)
359 +                                       out_of_memory("link_by_hash");
360 +                               rprintf(FINFO, "(5) linkname = %s\n", linkname);
361 +                               rprintf(FINFO,"link-by-hash: max link count exceeded, starting new file \"%s\".\n", linkname);
362 +                       } else {
363 +                               rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"",
364 +                                       linkname, full_fname(fname));
365 +                               rc = robust_rename(fnametmp, fname, NULL, 0644);
366 +                       }
367 +               } else {
368 +                       do_unlink(fnametmp);
369 +               }
370 +       }
371 +
372 +       if (first) {
373 +               rprintf(FINFO, "link-by-hash (new): %s -> \"%s\"\n",
374 +                               full_fname(fname),linkname);
375 +
376 +               rc = robust_rename(fnametmp, fname, NULL, 0644);
377 +               if (rc != 0) {
378 +                       rsyserr(FERROR, errno, "rename \"%s\" -> \"%s\"",
379 +                               full_fname(fnametmp), full_fname(fname));
380 +               }
381 +               rc = do_link(fname,linkname);
382 +               if (rc != 0) {
383 +                       rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"",
384 +                               full_fname(fname), linkname);
385 +               }
386 +       }
387 +
388 +       free(linkname);
389 +       free(hashname);
390 +       return rc;
391 +}
392 +#endif
393 diff --git a/options.c b/options.c
394 --- a/options.c
395 +++ b/options.c
396 @@ -155,6 +155,7 @@ char *backup_suffix = NULL;
397  char *tmpdir = NULL;
398  char *partial_dir = NULL;
399  char *basis_dir[MAX_BASIS_DIRS+1];
400 +char *link_by_hash_dir = NULL;
401  char *config_file = NULL;
402  char *shell_cmd = NULL;
403  char *logfile_name = NULL;
404 @@ -393,6 +394,7 @@ void usage(enum logcode F)
405    rprintf(F,"     --compare-dest=DIR      also compare destination files relative to DIR\n");
406    rprintf(F,"     --copy-dest=DIR         ... and include copies of unchanged files\n");
407    rprintf(F,"     --link-dest=DIR         hardlink to files in DIR when unchanged\n");
408 +  rprintf(F,"     --link-by-hash=DIR      create hardlinks by hash into DIR\n");
409    rprintf(F," -z, --compress              compress file data during the transfer\n");
410    rprintf(F,"     --compress-level=NUM    explicitly set compression level\n");
411    rprintf(F,"     --skip-compress=LIST    skip compressing files with a suffix in LIST\n");
412 @@ -445,7 +447,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
413        OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, OPT_HELP,
414        OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
415        OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
416 -      OPT_NO_D, OPT_APPEND, OPT_NO_ICONV,
417 +      OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_LINK_BY_HASH,
418        OPT_SERVER, OPT_REFUSED_BASE = 9000};
419  
420  static struct poptOption long_options[] = {
421 @@ -577,6 +579,7 @@ static struct poptOption long_options[] = {
422    {"compare-dest",     0,  POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
423    {"copy-dest",        0,  POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
424    {"link-dest",        0,  POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
425 +  {"link-by-hash",     0,  POPT_ARG_STRING, 0, OPT_LINK_BY_HASH, 0, 0},
426    {"fuzzy",           'y', POPT_ARG_VAL,    &fuzzy_basis, 1, 0, 0 },
427    {"no-fuzzy",         0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
428    {"no-y",             0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
429 @@ -1259,6 +1262,21 @@ int parse_arguments(int *argc_p, const char ***argv_p)
430                         return 0;
431  #endif
432  
433 +                case OPT_LINK_BY_HASH:
434 +#ifdef HAVE_LINK
435 +                       arg = poptGetOptArg(pc);
436 +                       if (sanitize_paths)
437 +                               arg = sanitize_path(NULL, arg, NULL, 0, SP_DEFAULT);
438 +                       link_by_hash_dir = (char *)arg;
439 +                       break;
440 +#else
441 +                       snprintf(err_buf, sizeof err_buf,
442 +                                "hard links are not supported on this %s\n",
443 +                                am_server ? "server" : "client");
444 +                       rprintf(FERROR, "ERROR: %s", err_buf);
445 +                       return 0;
446 +#endif
447 +
448                 default:
449                         /* A large opt value means that set_refuse_options()
450                          * turned this option off. */
451 @@ -2049,6 +2067,11 @@ void server_options(char **args, int *argc_p)
452         } else if (inplace)
453                 args[ac++] = "--inplace";
454  
455 +       if (link_by_hash_dir && am_sender) {
456 +               args[ac++] = "--link-by-hash";
457 +               args[ac++] = link_by_hash_dir;
458 +       }
459 +
460         if (files_from && (!am_sender || filesfrom_host)) {
461                 if (filesfrom_host) {
462                         args[ac++] = "--files-from";
463 diff --git a/receiver.c b/receiver.c
464 --- a/receiver.c
465 +++ b/receiver.c
466 @@ -183,12 +183,14 @@ int open_tmpfile(char *fnametmp, const char *fname, struct file_struct *file)
467  }
468  
469  static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
470 -                       const char *fname, int fd, OFF_T total_size)
471 +                       const char *fname, int fd, OFF_T total_size,
472 +                       const char *md4)
473  {
474         static char file_sum1[MAX_DIGEST_LEN];
475         static char file_sum2[MAX_DIGEST_LEN];
476         struct map_struct *mapbuf;
477         struct sum_struct sum;
478 +       md_context mdfour_data;
479         int32 len, sum_len;
480         OFF_T offset = 0;
481         OFF_T offset2;
482 @@ -208,6 +210,9 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
483         } else
484                 mapbuf = NULL;
485  
486 +       if (md4)
487 +               mdfour_begin(&mdfour_data);
488 +
489         sum_init(checksum_seed);
490  
491         if (append_mode > 0) {
492 @@ -252,6 +257,8 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
493                         cleanup_got_literal = 1;
494  
495                         sum_update(data, i);
496 +                       if (md4)
497 +                               mdfour_update(&mdfour_data, (uchar*)data, i);
498  
499                         if (fd != -1 && write_file(fd,data,i) != i)
500                                 goto report_write_error;
501 @@ -279,6 +286,8 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
502  
503                         see_token(map, len);
504                         sum_update(map, len);
505 +                       if (md4)
506 +                               mdfour_update(&mdfour_data, (uchar*)map, len);
507                 }
508  
509                 if (updating_basis_or_equiv) {
510 @@ -323,6 +332,8 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
511         }
512  
513         sum_len = sum_end(file_sum1);
514 +       if (md4)
515 +               mdfour_result(&mdfour_data, (uchar*)md4);
516  
517         if (mapbuf)
518                 unmap_file(mapbuf);
519 @@ -338,7 +349,7 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
520  
521  static void discard_receive_data(int f_in, OFF_T length)
522  {
523 -       receive_data(f_in, NULL, -1, 0, NULL, -1, length);
524 +       receive_data(f_in, NULL, -1, 0, NULL, -1, length, NULL);
525  }
526  
527  static void handle_delayed_updates(char *local_name)
528 @@ -740,7 +751,7 @@ int recv_files(int f_in, char *local_name)
529  
530                 /* recv file data */
531                 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
532 -                                      fname, fd2, F_LENGTH(file));
533 +                                      fname, fd2, F_LENGTH(file), F_SUM(file));
534  
535                 log_item(log_code, file, &initial_stats, iflags, NULL);
536  
537 diff --git a/rsync.c b/rsync.c
538 --- a/rsync.c
539 +++ b/rsync.c
540 @@ -47,6 +47,7 @@ extern int inplace;
541  extern int flist_eof;
542  extern int keep_dirlinks;
543  extern int make_backups;
544 +extern char *link_by_hash_dir;
545  extern struct file_list *cur_flist, *first_flist, *dir_flist;
546  extern struct chmod_mode_struct *daemon_chmod_modes;
547  #ifdef ICONV_OPTION
548 @@ -583,8 +584,15 @@ int finish_transfer(const char *fname, const char *fnametmp,
549         /* move tmp file over real file */
550         if (verbose > 2)
551                 rprintf(FINFO, "renaming %s to %s\n", fnametmp, fname);
552 -       ret = robust_rename(fnametmp, fname, temp_copy_name,
553 -                           file->mode & INITACCESSPERMS);
554 +#ifdef HAVE_LINK
555 +       if (link_by_hash_dir)
556 +               ret = link_by_hash(fnametmp, fname, file);
557 +       else
558 +#endif
559 +       {
560 +               ret = robust_rename(fnametmp, fname, temp_copy_name,
561 +                                   file->mode & INITACCESSPERMS);
562 +       }
563         if (ret < 0) {
564                 rsyserr(FERROR_XFER, errno, "%s %s -> \"%s\"",
565                         ret == -2 ? "copy" : "rename",
566 diff --git a/rsync.h b/rsync.h
567 --- a/rsync.h
568 +++ b/rsync.h
569 @@ -850,6 +850,14 @@ struct stats {
570         int num_transferred_files;
571  };
572  
573 +struct hashfile_struct {
574 +       struct hashfile_struct *next;
575 +       struct hashfile_struct *prev;
576 +       char *name;
577 +       int fd;
578 +       uint32 nlink;
579 +};
580 +
581  struct chmod_mode_struct;
582  
583  struct flist_ndx_item {
584 diff --git a/rsync.yo b/rsync.yo
585 --- a/rsync.yo
586 +++ b/rsync.yo
587 @@ -405,6 +405,7 @@ to the detailed description below for a complete description.  verb(
588       --compare-dest=DIR      also compare received files relative to DIR
589       --copy-dest=DIR         ... and include copies of unchanged files
590       --link-dest=DIR         hardlink to files in DIR when unchanged
591 +     --link-by-hash=DIR      create hardlinks by hash into DIR
592   -z, --compress              compress file data during the transfer
593       --compress-level=NUM    explicitly set compression level
594       --skip-compress=LIST    skip compressing files with suffix in LIST