Updated to latest source.
[rsync-patches.git] / source-filter_dest-filter.diff
1 CAUTION:  This patch compiles, but is otherwise totally untested!
2
3 This patch also implements --times-only.
4
5 Implementation details for the --source-filter and -dest-filter options:
6
7  - These options open a *HUGE* security hole in daemon mode unless they
8    are refused in your rsyncd.conf!
9
10  - Filtering disables rsync alogrithm. (This should be fixed.)
11
12  - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14  - If source filter fails, data is send unfiltered. (Should be changed
15    to abort.)
16
17  - Failure of destination filter, causes data loss!!! (Should be changed
18    to abort.)
19
20  - If filter changes size of file, you should use --times-only option to
21    prevent repeated transfers of unchanged files.
22
23  - If the COMMAND contains single quotes, option-passing breaks.  (Needs
24    to be fixed.)
25
26 To use this patch, run these commands for a successful build:
27
28     patch -p1 <patches/source-filter_dest-filter.diff
29     ./prepare-source
30     ./configure                                (optional if already run)
31     make
32
33 based-on: 5b19cf787515fc2388dd070a85e86ced5d80510b
34 diff --git a/generator.c b/generator.c
35 --- a/generator.c
36 +++ b/generator.c
37 @@ -64,6 +64,7 @@ extern int append_mode;
38  extern int make_backups;
39  extern int csum_length;
40  extern int ignore_times;
41 +extern int times_only;
42  extern int size_only;
43  extern OFF_T max_size;
44  extern OFF_T min_size;
45 @@ -580,7 +581,7 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre
46  /* Perform our quick-check heuristic for determining if a file is unchanged. */
47  int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
48  {
49 -       if (st->st_size != F_LENGTH(file))
50 +       if (!times_only && st->st_size != F_LENGTH(file))
51                 return 0;
52  
53         /* if always checksum is set then we use the checksum instead
54 diff --git a/main.c b/main.c
55 --- a/main.c
56 +++ b/main.c
57 @@ -180,7 +180,7 @@ int shell_exec(const char *cmd)
58  }
59  
60  /* Wait for a process to exit, calling io_flush while waiting. */
61 -static void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
62 +void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
63  {
64         pid_t waited_pid;
65         int status;
66 diff --git a/options.c b/options.c
67 --- a/options.c
68 +++ b/options.c
69 @@ -111,6 +111,7 @@ int safe_symlinks = 0;
70  int copy_unsafe_links = 0;
71  int munge_symlinks = 0;
72  int size_only = 0;
73 +int times_only = 0;
74  int daemon_bwlimit = 0;
75  int bwlimit = 0;
76  int fuzzy_basis = 0;
77 @@ -170,6 +171,8 @@ char *logfile_name = NULL;
78  char *logfile_format = NULL;
79  char *stdout_format = NULL;
80  char *password_file = NULL;
81 +char *source_filter = NULL;
82 +char *dest_filter = NULL;
83  char *rsync_path = RSYNC_PATH;
84  char *backup_dir = NULL;
85  char backup_dir_buf[MAXPATHLEN];
86 @@ -823,6 +826,7 @@ void usage(enum logcode F)
87    rprintf(F," -I, --ignore-times          don't skip files that match in size and mod-time\n");
88    rprintf(F," -M, --remote-option=OPTION  send OPTION to the remote side only\n");
89    rprintf(F,"     --size-only             skip files that match in size\n");
90 +  rprintf(F,"     --times-only            skip files that match in mod-time\n");
91    rprintf(F," -@, --modify-window=NUM     set the accuracy for mod-time comparisons\n");
92    rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
93    rprintf(F," -y, --fuzzy                 find similar file for basis if no dest file\n");
94 @@ -866,6 +870,8 @@ void usage(enum logcode F)
95    rprintf(F,"     --write-batch=FILE      write a batched update to FILE\n");
96    rprintf(F,"     --only-write-batch=FILE like --write-batch but w/o updating destination\n");
97    rprintf(F,"     --read-batch=FILE       read a batched update from FILE\n");
98 +  rprintf(F,"     --source-filter=COMMAND filter file through COMMAND at source\n");
99 +  rprintf(F,"     --dest-filter=COMMAND   filter file through COMMAND at destination\n");
100    rprintf(F,"     --protocol=NUM          force an older protocol version to be used\n");
101  #ifdef ICONV_OPTION
102    rprintf(F,"     --iconv=CONVERT_SPEC    request charset conversion of filenames\n");
103 @@ -988,6 +994,7 @@ static struct poptOption long_options[] = {
104    {"chmod",            0,  POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 },
105    {"ignore-times",    'I', POPT_ARG_NONE,   &ignore_times, 0, 0, 0 },
106    {"size-only",        0,  POPT_ARG_NONE,   &size_only, 0, 0, 0 },
107 +  {"times-only",       0,  POPT_ARG_NONE,   &times_only , 0, 0, 0 },
108    {"one-file-system", 'x', POPT_ARG_NONE,   0, 'x', 0, 0 },
109    {"no-one-file-system",0, POPT_ARG_VAL,    &one_file_system, 0, 0, 0 },
110    {"no-x",             0,  POPT_ARG_VAL,    &one_file_system, 0, 0, 0 },
111 @@ -1116,6 +1123,8 @@ static struct poptOption long_options[] = {
112    {"password-file",    0,  POPT_ARG_STRING, &password_file, 0, 0, 0 },
113    {"blocking-io",      0,  POPT_ARG_VAL,    &blocking_io, 1, 0, 0 },
114    {"no-blocking-io",   0,  POPT_ARG_VAL,    &blocking_io, 0, 0, 0 },
115 +  {"source-filter",    0,  POPT_ARG_STRING, &source_filter, 0, 0, 0 },
116 +  {"dest-filter",      0,  POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
117  #ifdef HAVE_SETVBUF
118    {"outbuf",           0,  POPT_ARG_STRING, &outbuf_mode, 0, 0, 0 },
119  #endif
120 @@ -2484,6 +2493,16 @@ int parse_arguments(int *argc_p, const char ***argv_p)
121                 }
122         }
123  
124 +       if (source_filter || dest_filter) {
125 +               if (whole_file == 0) {
126 +                       snprintf(err_buf, sizeof err_buf,
127 +                                "--no-whole-file cannot be used with --%s-filter\n",
128 +                                source_filter ? "source" : "dest");
129 +                       return 0;
130 +               }
131 +               whole_file = 1;
132 +       }
133 +
134         if (files_from) {
135                 char *h, *p;
136                 int q;
137 @@ -2868,6 +2887,25 @@ void server_options(char **args, int *argc_p)
138         else if (missing_args == 1 && !am_sender)
139                 args[ac++] = "--ignore-missing-args";
140  
141 +       if (times_only && am_sender)
142 +               args[ac++] = "--times-only";
143 +
144 +       if (source_filter && !am_sender) {
145 +               /* Need to single quote the arg to keep the remote shell
146 +                * from splitting it.  FIXME: breaks if command has single quotes. */
147 +               if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
148 +                       goto oom;
149 +               args[ac++] = arg;
150 +       }
151 +
152 +       if (dest_filter && am_sender) {
153 +               /* Need to single quote the arg to keep the remote shell
154 +                * from splitting it.  FIXME: breaks if command has single quotes. */
155 +               if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
156 +                       goto oom;
157 +               args[ac++] = arg;
158 +       }
159 +
160         if (modify_window_set && am_sender) {
161                 char *fmt = modify_window < 0 ? "-@%d" : "--modify-window=%d";
162                 if (asprintf(&arg, fmt, modify_window) < 0)
163 diff --git a/pipe.c b/pipe.c
164 --- a/pipe.c
165 +++ b/pipe.c
166 @@ -27,6 +27,7 @@ extern int am_server;
167  extern int blocking_io;
168  extern int filesfrom_fd;
169  extern int munge_symlinks;
170 +extern mode_t orig_umask;
171  extern char *logfile_name;
172  extern int remote_option_cnt;
173  extern const char **remote_options;
174 @@ -178,3 +179,77 @@ pid_t local_child(int argc, char **argv, int *f_in, int *f_out,
175  
176         return pid;
177  }
178 +
179 +pid_t run_filter(char *command[], int out, int *pipe_to_filter)
180 +{
181 +       pid_t pid;
182 +       int pipefds[2];
183 +
184 +       if (DEBUG_GTE(CMD, 1))
185 +               print_child_argv("opening connection using:", command);
186 +
187 +       if (pipe(pipefds) < 0) {
188 +               rsyserr(FERROR, errno, "pipe");
189 +               exit_cleanup(RERR_IPC);
190 +       }
191 +
192 +       pid = do_fork();
193 +       if (pid == -1) {
194 +               rsyserr(FERROR, errno, "fork");
195 +               exit_cleanup(RERR_IPC);
196 +       }
197 +
198 +       if (pid == 0) {
199 +               if (dup2(pipefds[0], STDIN_FILENO) < 0
200 +                || close(pipefds[1]) < 0
201 +                || dup2(out, STDOUT_FILENO) < 0) {
202 +                       rsyserr(FERROR, errno, "Failed dup/close");
203 +                       exit_cleanup(RERR_IPC);
204 +               }
205 +               umask(orig_umask);
206 +               set_blocking(STDIN_FILENO);
207 +               if (blocking_io)
208 +                       set_blocking(STDOUT_FILENO);
209 +               execvp(command[0], command);
210 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
211 +               exit_cleanup(RERR_IPC);
212 +       }
213 +
214 +       if (close(pipefds[0]) < 0) {
215 +               rsyserr(FERROR, errno, "Failed to close");
216 +               exit_cleanup(RERR_IPC);
217 +       }
218 +
219 +       *pipe_to_filter = pipefds[1];
220 +
221 +       return pid;
222 +}
223 +
224 +pid_t run_filter_on_file(char *command[], int out, int in)
225 +{
226 +       pid_t pid;
227 +
228 +       if (DEBUG_GTE(CMD, 1))
229 +               print_child_argv("opening connection using:", command);
230 +
231 +       pid = do_fork();
232 +       if (pid == -1) {
233 +               rsyserr(FERROR, errno, "fork");
234 +               exit_cleanup(RERR_IPC);
235 +       }
236 +
237 +       if (pid == 0) {
238 +               if (dup2(in, STDIN_FILENO) < 0
239 +                || dup2(out, STDOUT_FILENO) < 0) {
240 +                       rsyserr(FERROR, errno, "Failed to dup2");
241 +                       exit_cleanup(RERR_IPC);
242 +               }
243 +               if (blocking_io)
244 +                       set_blocking(STDOUT_FILENO);
245 +               execvp(command[0], command);
246 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
247 +               exit_cleanup(RERR_IPC);
248 +       }
249 +
250 +       return pid;
251 +}
252 diff --git a/receiver.c b/receiver.c
253 --- a/receiver.c
254 +++ b/receiver.c
255 @@ -60,6 +60,7 @@ extern BOOL want_progress_now;
256  extern mode_t orig_umask;
257  extern struct stats stats;
258  extern char *tmpdir;
259 +extern char *dest_filter;
260  extern char *partial_dir;
261  extern char *basis_dir[MAX_BASIS_DIRS+1];
262  extern char sender_file_sum[MAX_DIGEST_LEN];
263 @@ -522,6 +523,7 @@ int recv_files(int f_in, int f_out, char *local_name)
264         char *fnametmp, fnametmpbuf[MAXPATHLEN];
265         char *fnamecmp, *partialptr;
266         char fnamecmpbuf[MAXPATHLEN];
267 +       char *filter_argv[MAX_FILTER_ARGS + 1];
268         uchar fnamecmp_type;
269         struct file_struct *file;
270         int itemizing = am_server ? logfile_format_has_i : stdout_format_has_i;
271 @@ -532,6 +534,7 @@ int recv_files(int f_in, int f_out, char *local_name)
272         const char *parent_dirname = "";
273  #endif
274         int ndx, recv_ok, one_inplace;
275 +       pid_t pid = 0;
276  
277         if (DEBUG_GTE(RECV, 1))
278                 rprintf(FINFO, "recv_files(%d) starting\n", cur_flist->used);
279 @@ -539,6 +542,23 @@ int recv_files(int f_in, int f_out, char *local_name)
280         if (delay_updates)
281                 delayed_bits = bitbag_create(cur_flist->used + 1);
282  
283 +       if (dest_filter) {
284 +               char *p;
285 +               char *sep = " \t";
286 +               int i;
287 +               for (p = strtok(dest_filter, sep), i = 0;
288 +                    p && i < MAX_FILTER_ARGS;
289 +                    p = strtok(0, sep))
290 +                       filter_argv[i++] = p;
291 +               filter_argv[i] = NULL;
292 +               if (p) {
293 +                       rprintf(FERROR,
294 +                               "Too many arguments to dest-filter (> %d)\n",
295 +                               MAX_FILTER_ARGS);
296 +                       exit_cleanup(RERR_SYNTAX);
297 +               }
298 +       }
299 +
300         progress_init();
301  
302         while (1) {
303 @@ -865,6 +885,9 @@ int recv_files(int f_in, int f_out, char *local_name)
304                 else if (!am_server && INFO_GTE(NAME, 1) && INFO_EQ(PROGRESS, 1))
305                         rprintf(FINFO, "%s\n", fname);
306  
307 +               if (dest_filter)
308 +                       pid = run_filter(filter_argv, fd2, &fd2);
309 +
310                 /* recv file data */
311                 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size, fname, fd2, file, inplace || one_inplace);
312  
313 @@ -880,6 +903,16 @@ int recv_files(int f_in, int f_out, char *local_name)
314                         exit_cleanup(RERR_FILEIO);
315                 }
316  
317 +               if (dest_filter) {
318 +                       int status;
319 +                       wait_process_with_flush(pid, &status);
320 +                       if (status != 0) {
321 +                               rprintf(FERROR, "filter %s exited code: %d\n",
322 +                                       dest_filter, status);
323 +                               continue;
324 +                       }
325 +               }
326 +
327                 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
328                         if (partialptr == fname)
329                                 partialptr = NULL;
330 diff --git a/rsync.1.md b/rsync.1.md
331 --- a/rsync.1.md
332 +++ b/rsync.1.md
333 @@ -412,6 +412,7 @@ detailed description below for a complete description.
334  --contimeout=SECONDS        set daemon connection timeout in seconds
335  --ignore-times, -I          don't skip files that match size and time
336  --size-only                 skip files that match in size
337 +--times-only                skip files that match in mod-time
338  --modify-window=NUM, -@     set the accuracy for mod-time comparisons
339  --temp-dir=DIR, -T          create temporary files in directory DIR
340  --fuzzy, -y                 find similar file for basis if no dest file
341 @@ -454,6 +455,8 @@ detailed description below for a complete description.
342  --write-batch=FILE          write a batched update to FILE
343  --only-write-batch=FILE     like --write-batch but w/o updating dest
344  --read-batch=FILE           read a batched update from FILE
345 +--source-filter=COMMAND     filter file through COMMAND at source
346 +--dest-filter=COMMAND       filter file through COMMAND at destination
347  --protocol=NUM              force an older protocol version to be used
348  --iconv=CONVERT_SPEC        request charset conversion of filenames
349  --checksum-seed=NUM         set block/file checksum seed (advanced)
350 @@ -3008,6 +3011,36 @@ your home directory (remove the '=' for that).
351      `--write-batch`.  If _FILE_ is `-`, the batch data will be read from
352      standard input. See the "BATCH MODE" section for details.
353  
354 +0.  `--source-filter=COMMAND`
355 +
356 +    This option allows the user to specify a filter program that will be
357 +    applied to the contents of all transferred regular files before the data is
358 +    sent to destination.  COMMAND will receive the data on its standard input
359 +    and it should write the filtered data to standard output.  COMMAND should
360 +    exit non-zero if it cannot process the data or if it encounters an error
361 +    when writing the data to stdout.
362 +
363 +    Example: `--source-filter="gzip -9"` will cause remote files to be
364 +    compressed.  Use of `--source-filter` automatically enables `--whole-file`.
365 +    If your filter does not output the same number of bytes that it received on
366 +    input, you should use `--times-only` to disable size and content checks on
367 +    subsequent rsync runs.
368 +
369 +0.  `--dest-filter=COMMAND`
370 +
371 +    This option allows you to specify a filter program that will be applied to
372 +    the contents of all transferred regular files before the data is written to
373 +    disk.  COMMAND will receive the data on its standard input and it should
374 +    write the filtered data to standard output.  COMMAND should exit non-zero
375 +    if it cannot process the data or if it encounters an error when writing the
376 +    data to stdout.
377 +
378 +    Example: --dest-filter="gzip -9" will cause remote files to be compressed.
379 +    Use of --dest-filter automatically enables --whole-file.  If your filter
380 +    does not output the same number of bytes that it received on input, you
381 +    should use --times-only to disable size and content checks on subsequent
382 +    rsync runs.
383 +
384  0. `--protocol=NUM`
385  
386      Force an older protocol version to be used.  This is useful for creating a
387 diff --git a/rsync.h b/rsync.h
388 --- a/rsync.h
389 +++ b/rsync.h
390 @@ -158,6 +158,7 @@
391  #define IOERR_DEL_LIMIT (1<<2)
392  
393  #define MAX_ARGS 1000
394 +#define MAX_FILTER_ARGS 100
395  #define MAX_BASIS_DIRS 20
396  #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
397  
398 diff --git a/sender.c b/sender.c
399 --- a/sender.c
400 +++ b/sender.c
401 @@ -47,6 +47,7 @@ extern int batch_fd;
402  extern int write_batch;
403  extern int file_old_total;
404  extern BOOL want_progress_now;
405 +extern char *source_filter;
406  extern struct stats stats;
407  extern struct file_list *cur_flist, *first_flist, *dir_flist;
408  
409 @@ -204,6 +205,26 @@ void send_files(int f_in, int f_out)
410         int f_xfer = write_batch < 0 ? batch_fd : f_out;
411         int save_io_error = io_error;
412         int ndx, j;
413 +       char *filter_argv[MAX_FILTER_ARGS + 1];
414 +       char *tmp = 0;
415 +       int unlink_tmp = 0;
416 +
417 +       if (source_filter) {
418 +               char *p;
419 +               char *sep = " \t";
420 +               int i;
421 +               for (p = strtok(source_filter, sep), i = 0;
422 +                    p && i < MAX_FILTER_ARGS;
423 +                    p = strtok(0, sep))
424 +                       filter_argv[i++] = p;
425 +               filter_argv[i] = NULL;
426 +               if (p) {
427 +                       rprintf(FERROR,
428 +                               "Too many arguments to source-filter (> %d)\n",
429 +                               MAX_FILTER_ARGS);
430 +                       exit_cleanup(RERR_SYNTAX);
431 +               }
432 +       }
433  
434         if (DEBUG_GTE(SEND, 1))
435                 rprintf(FINFO, "send_files starting\n");
436 @@ -340,6 +361,7 @@ void send_files(int f_in, int f_out)
437                         exit_cleanup(RERR_PROTOCOL);
438                 }
439  
440 +               unlink_tmp = 0;
441                 fd = do_open(fname, O_RDONLY, 0);
442                 if (fd == -1) {
443                         if (errno == ENOENT) {
444 @@ -361,6 +383,33 @@ void send_files(int f_in, int f_out)
445                         continue;
446                 }
447  
448 +               if (source_filter) {
449 +                       int fd2;
450 +                       char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
451 +
452 +                       tmp = strdup(tmpl);
453 +                       fd2 = mkstemp(tmp);
454 +                       if (fd2 == -1) {
455 +                               rprintf(FERROR, "mkstemp %s failed: %s\n",
456 +                                       tmp, strerror(errno));
457 +                       } else {
458 +                               int status;
459 +                               pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
460 +                               close(fd);
461 +                               close(fd2);
462 +                               wait_process_with_flush(pid, &status);
463 +                               if (status != 0) {
464 +                                       rprintf(FERROR,
465 +                                           "bypassing source filter %s; exited with code: %d\n",
466 +                                           source_filter, status);
467 +                                       fd = do_open(fname, O_RDONLY, 0);
468 +                               } else {
469 +                                       fd = do_open(tmp, O_RDONLY, 0);
470 +                                       unlink_tmp = 1;
471 +                               }
472 +                       }
473 +               }
474 +
475                 /* map the local file */
476                 if (do_fstat(fd, &st) != 0) {
477                         io_error |= IOERR_GENERAL;
478 @@ -413,6 +462,8 @@ void send_files(int f_in, int f_out)
479                         }
480                 }
481                 close(fd);
482 +               if (unlink_tmp)
483 +                       unlink(tmp);
484  
485                 free_sums(s);
486