The patches for 3.3.0.
[rsync-patches.git] / source-filter_dest-filter.diff
1 CAUTION:  This patch compiles, but is otherwise totally untested!
2
3 This patch also implements --times-only.
4
5 Implementation details for the --source-filter and -dest-filter options:
6
7  - These options open a *HUGE* security hole in daemon mode unless they
8    are refused in your rsyncd.conf!
9
10  - Filtering disables rsync alogrithm. (This should be fixed.)
11
12  - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14  - If source filter fails, data is send unfiltered. (Should be changed
15    to abort.)
16
17  - Failure of destination filter, causes data loss!!! (Should be changed
18    to abort.)
19
20  - If filter changes size of file, you should use --times-only option to
21    prevent repeated transfers of unchanged files.
22
23  - If the COMMAND contains single quotes, option-passing breaks.  (Needs
24    to be fixed.)
25
26 To use this patch, run these commands for a successful build:
27
28     patch -p1 <patches/source-filter_dest-filter.diff
29     ./prepare-source
30     ./configure                                (optional if already run)
31     make
32
33 based-on: 6c8ca91c731b7bf2b081694bda85b7dadc2b7aff
34 diff --git a/generator.c b/generator.c
35 --- a/generator.c
36 +++ b/generator.c
37 @@ -67,6 +67,7 @@ extern int append_mode;
38  extern int make_backups;
39  extern int csum_length;
40  extern int ignore_times;
41 +extern int times_only;
42  extern int size_only;
43  extern OFF_T max_size;
44  extern OFF_T min_size;
45 @@ -618,7 +619,7 @@ int quick_check_ok(enum filetype ftype, const char *fn, struct file_struct *file
46  {
47         switch (ftype) {
48           case FT_REG:
49 -               if (st->st_size != F_LENGTH(file))
50 +               if (!times_only && st->st_size != F_LENGTH(file))
51                         return 0;
52  
53                 /* If always_checksum is set then we use the checksum instead
54 diff --git a/main.c b/main.c
55 --- a/main.c
56 +++ b/main.c
57 @@ -191,7 +191,7 @@ int shell_exec(const char *cmd)
58  }
59  
60  /* Wait for a process to exit, calling io_flush while waiting. */
61 -static void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
62 +void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
63  {
64         pid_t waited_pid;
65         int status;
66 diff --git a/options.c b/options.c
67 --- a/options.c
68 +++ b/options.c
69 @@ -119,6 +119,7 @@ int safe_symlinks = 0;
70  int copy_unsafe_links = 0;
71  int munge_symlinks = 0;
72  int size_only = 0;
73 +int times_only = 0;
74  int daemon_bwlimit = 0;
75  int bwlimit = 0;
76  int fuzzy_basis = 0;
77 @@ -179,6 +180,8 @@ char *logfile_name = NULL;
78  char *logfile_format = NULL;
79  char *stdout_format = NULL;
80  char *password_file = NULL;
81 +char *source_filter = NULL;
82 +char *dest_filter = NULL;
83  char *early_input_file = NULL;
84  char *rsync_path = RSYNC_PATH;
85  char *backup_dir = NULL;
86 @@ -689,6 +692,7 @@ static struct poptOption long_options[] = {
87    {"chmod",            0,  POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 },
88    {"ignore-times",    'I', POPT_ARG_NONE,   &ignore_times, 0, 0, 0 },
89    {"size-only",        0,  POPT_ARG_NONE,   &size_only, 0, 0, 0 },
90 +  {"times-only",       0,  POPT_ARG_NONE,   &times_only , 0, 0, 0 },
91    {"one-file-system", 'x', POPT_ARG_NONE,   0, 'x', 0, 0 },
92    {"no-one-file-system",0, POPT_ARG_VAL,    &one_file_system, 0, 0, 0 },
93    {"no-x",             0,  POPT_ARG_VAL,    &one_file_system, 0, 0, 0 },
94 @@ -829,6 +833,8 @@ static struct poptOption long_options[] = {
95    {"early-input",      0,  POPT_ARG_STRING, &early_input_file, 0, 0, 0 },
96    {"blocking-io",      0,  POPT_ARG_VAL,    &blocking_io, 1, 0, 0 },
97    {"no-blocking-io",   0,  POPT_ARG_VAL,    &blocking_io, 0, 0, 0 },
98 +  {"source-filter",    0,  POPT_ARG_STRING, &source_filter, 0, 0, 0 },
99 +  {"dest-filter",      0,  POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
100    {"outbuf",           0,  POPT_ARG_STRING, &outbuf_mode, 0, 0, 0 },
101    {"remote-option",   'M', POPT_ARG_STRING, 0, 'M', 0, 0 },
102    {"protocol",         0,  POPT_ARG_INT,    &protocol_version, 0, 0, 0 },
103 @@ -2444,6 +2450,16 @@ int parse_arguments(int *argc_p, const char ***argv_p)
104                 }
105         }
106  
107 +       if (source_filter || dest_filter) {
108 +               if (whole_file == 0) {
109 +                       snprintf(err_buf, sizeof err_buf,
110 +                                "--no-whole-file cannot be used with --%s-filter\n",
111 +                                source_filter ? "source" : "dest");
112 +                       return 0;
113 +               }
114 +               whole_file = 1;
115 +       }
116 +
117         if (files_from) {
118                 char *h, *p;
119                 int q;
120 @@ -2852,6 +2868,25 @@ void server_options(char **args, int *argc_p)
121         else if (missing_args == 1 && !am_sender)
122                 args[ac++] = "--ignore-missing-args";
123  
124 +       if (times_only && am_sender)
125 +               args[ac++] = "--times-only";
126 +
127 +       if (source_filter && !am_sender) {
128 +               /* Need to single quote the arg to keep the remote shell
129 +                * from splitting it.  FIXME: breaks if command has single quotes. */
130 +               if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
131 +                       goto oom;
132 +               args[ac++] = arg;
133 +       }
134 +
135 +       if (dest_filter && am_sender) {
136 +               /* Need to single quote the arg to keep the remote shell
137 +                * from splitting it.  FIXME: breaks if command has single quotes. */
138 +               if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
139 +                       goto oom;
140 +               args[ac++] = arg;
141 +       }
142 +
143         if (modify_window_set && am_sender) {
144                 char *fmt = modify_window < 0 ? "-@%d" : "--modify-window=%d";
145                 if (asprintf(&arg, fmt, modify_window) < 0)
146 diff --git a/pipe.c b/pipe.c
147 --- a/pipe.c
148 +++ b/pipe.c
149 @@ -27,6 +27,7 @@ extern int am_server;
150  extern int blocking_io;
151  extern int filesfrom_fd;
152  extern int munge_symlinks;
153 +extern mode_t orig_umask;
154  extern char *logfile_name;
155  extern int remote_option_cnt;
156  extern const char **remote_options;
157 @@ -176,3 +177,77 @@ pid_t local_child(int argc, char **argv, int *f_in, int *f_out,
158  
159         return pid;
160  }
161 +
162 +pid_t run_filter(char *command[], int out, int *pipe_to_filter)
163 +{
164 +       pid_t pid;
165 +       int pipefds[2];
166 +
167 +       if (DEBUG_GTE(CMD, 1))
168 +               print_child_argv("opening connection using:", command);
169 +
170 +       if (pipe(pipefds) < 0) {
171 +               rsyserr(FERROR, errno, "pipe");
172 +               exit_cleanup(RERR_IPC);
173 +       }
174 +
175 +       pid = do_fork();
176 +       if (pid == -1) {
177 +               rsyserr(FERROR, errno, "fork");
178 +               exit_cleanup(RERR_IPC);
179 +       }
180 +
181 +       if (pid == 0) {
182 +               if (dup2(pipefds[0], STDIN_FILENO) < 0
183 +                || close(pipefds[1]) < 0
184 +                || dup2(out, STDOUT_FILENO) < 0) {
185 +                       rsyserr(FERROR, errno, "Failed dup/close");
186 +                       exit_cleanup(RERR_IPC);
187 +               }
188 +               umask(orig_umask);
189 +               set_blocking(STDIN_FILENO);
190 +               if (blocking_io)
191 +                       set_blocking(STDOUT_FILENO);
192 +               execvp(command[0], command);
193 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
194 +               exit_cleanup(RERR_IPC);
195 +       }
196 +
197 +       if (close(pipefds[0]) < 0) {
198 +               rsyserr(FERROR, errno, "Failed to close");
199 +               exit_cleanup(RERR_IPC);
200 +       }
201 +
202 +       *pipe_to_filter = pipefds[1];
203 +
204 +       return pid;
205 +}
206 +
207 +pid_t run_filter_on_file(char *command[], int out, int in)
208 +{
209 +       pid_t pid;
210 +
211 +       if (DEBUG_GTE(CMD, 1))
212 +               print_child_argv("opening connection using:", command);
213 +
214 +       pid = do_fork();
215 +       if (pid == -1) {
216 +               rsyserr(FERROR, errno, "fork");
217 +               exit_cleanup(RERR_IPC);
218 +       }
219 +
220 +       if (pid == 0) {
221 +               if (dup2(in, STDIN_FILENO) < 0
222 +                || dup2(out, STDOUT_FILENO) < 0) {
223 +                       rsyserr(FERROR, errno, "Failed to dup2");
224 +                       exit_cleanup(RERR_IPC);
225 +               }
226 +               if (blocking_io)
227 +                       set_blocking(STDOUT_FILENO);
228 +               execvp(command[0], command);
229 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
230 +               exit_cleanup(RERR_IPC);
231 +       }
232 +
233 +       return pid;
234 +}
235 diff --git a/receiver.c b/receiver.c
236 --- a/receiver.c
237 +++ b/receiver.c
238 @@ -60,6 +60,7 @@ extern BOOL want_progress_now;
239  extern mode_t orig_umask;
240  extern struct stats stats;
241  extern char *tmpdir;
242 +extern char *dest_filter;
243  extern char *partial_dir;
244  extern char *basis_dir[MAX_BASIS_DIRS+1];
245  extern char sender_file_sum[MAX_DIGEST_LEN];
246 @@ -528,6 +529,7 @@ int recv_files(int f_in, int f_out, char *local_name)
247         char *fnametmp, fnametmpbuf[MAXPATHLEN];
248         char *fnamecmp, *partialptr;
249         char fnamecmpbuf[MAXPATHLEN];
250 +       char *filter_argv[MAX_FILTER_ARGS + 1];
251         uchar fnamecmp_type;
252         struct file_struct *file;
253         int itemizing = am_server ? logfile_format_has_i : stdout_format_has_i;
254 @@ -538,6 +540,7 @@ int recv_files(int f_in, int f_out, char *local_name)
255         const char *parent_dirname = "";
256  #endif
257         int ndx, recv_ok, one_inplace;
258 +       pid_t pid = 0;
259  
260         if (DEBUG_GTE(RECV, 1))
261                 rprintf(FINFO, "recv_files(%d) starting\n", cur_flist->used);
262 @@ -548,6 +551,23 @@ int recv_files(int f_in, int f_out, char *local_name)
263         if (whole_file < 0)
264                 whole_file = 0;
265  
266 +       if (dest_filter) {
267 +               char *p;
268 +               char *sep = " \t";
269 +               int i;
270 +               for (p = strtok(dest_filter, sep), i = 0;
271 +                    p && i < MAX_FILTER_ARGS;
272 +                    p = strtok(0, sep))
273 +                       filter_argv[i++] = p;
274 +               filter_argv[i] = NULL;
275 +               if (p) {
276 +                       rprintf(FERROR,
277 +                               "Too many arguments to dest-filter (> %d)\n",
278 +                               MAX_FILTER_ARGS);
279 +                       exit_cleanup(RERR_SYNTAX);
280 +               }
281 +       }
282 +
283         progress_init();
284  
285         while (1) {
286 @@ -873,6 +893,9 @@ int recv_files(int f_in, int f_out, char *local_name)
287                 else if (!am_server && INFO_GTE(NAME, 1) && INFO_EQ(PROGRESS, 1))
288                         rprintf(FINFO, "%s\n", fname);
289  
290 +               if (dest_filter)
291 +                       pid = run_filter(filter_argv, fd2, &fd2);
292 +
293                 /* recv file data */
294                 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size, fname, fd2, file, inplace || one_inplace);
295  
296 @@ -888,6 +911,16 @@ int recv_files(int f_in, int f_out, char *local_name)
297                         exit_cleanup(RERR_FILEIO);
298                 }
299  
300 +               if (dest_filter) {
301 +                       int status;
302 +                       wait_process_with_flush(pid, &status);
303 +                       if (status != 0) {
304 +                               rprintf(FERROR, "filter %s exited code: %d\n",
305 +                                       dest_filter, status);
306 +                               continue;
307 +                       }
308 +               }
309 +
310                 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
311                         if (partialptr == fname)
312                                 partialptr = NULL;
313 diff --git a/rsync.1.md b/rsync.1.md
314 --- a/rsync.1.md
315 +++ b/rsync.1.md
316 @@ -504,6 +504,7 @@ has its own detailed description later in this manpage.
317  --contimeout=SECONDS     set daemon connection timeout in seconds
318  --ignore-times, -I       don't skip files that match size and time
319  --size-only              skip files that match in size
320 +--times-only             skip files that match in mod-time
321  --modify-window=NUM, -@  set the accuracy for mod-time comparisons
322  --temp-dir=DIR, -T       create temporary files in directory DIR
323  --fuzzy, -y              find similar file for basis if no dest file
324 @@ -553,6 +554,8 @@ has its own detailed description later in this manpage.
325  --write-batch=FILE       write a batched update to FILE
326  --only-write-batch=FILE  like --write-batch but w/o updating dest
327  --read-batch=FILE        read a batched update from FILE
328 +--source-filter=COMMAND  filter file through COMMAND at source
329 +--dest-filter=COMMAND    filter file through COMMAND at destination
330  --protocol=NUM           force an older protocol version to be used
331  --iconv=CONVERT_SPEC     request charset conversion of filenames
332  --checksum-seed=NUM      set block/file checksum seed (advanced)
333 @@ -3713,6 +3716,36 @@ expand it.
334      [`--write-batch`](#opt).  If _FILE_ is `-`, the batch data will be read
335      from standard input. See the "BATCH MODE" section for details.
336  
337 +0.  `--source-filter=COMMAND`
338 +
339 +    This option allows the user to specify a filter program that will be
340 +    applied to the contents of all transferred regular files before the data is
341 +    sent to destination.  COMMAND will receive the data on its standard input
342 +    and it should write the filtered data to standard output.  COMMAND should
343 +    exit non-zero if it cannot process the data or if it encounters an error
344 +    when writing the data to stdout.
345 +
346 +    Example: `--source-filter="gzip -9"` will cause remote files to be
347 +    compressed.  Use of `--source-filter` automatically enables
348 +    [`--whole-file`](#opt).  If your filter does not output the same number of
349 +    bytes that it received on input, you should use `--times-only` to
350 +    disable size and content checks on subsequent rsync runs.
351 +
352 +0.  `--dest-filter=COMMAND`
353 +
354 +    This option allows you to specify a filter program that will be applied to
355 +    the contents of all transferred regular files before the data is written to
356 +    disk.  COMMAND will receive the data on its standard input and it should
357 +    write the filtered data to standard output.  COMMAND should exit non-zero
358 +    if it cannot process the data or if it encounters an error when writing the
359 +    data to stdout.
360 +
361 +    Example: --dest-filter="gzip -9" will cause remote files to be compressed.
362 +    Use of --dest-filter automatically enables --whole-file.  If your filter
363 +    does not output the same number of bytes that it received on input, you
364 +    should use --times-only to disable size and content checks on subsequent
365 +    rsync runs.
366 +
367  0.  `--protocol=NUM`
368  
369      Force an older protocol version to be used.  This is useful for creating a
370 diff --git a/rsync.h b/rsync.h
371 --- a/rsync.h
372 +++ b/rsync.h
373 @@ -169,6 +169,7 @@
374  #define IOERR_DEL_LIMIT (1<<2)
375  
376  #define MAX_ARGS 1000
377 +#define MAX_FILTER_ARGS 100
378  #define MAX_BASIS_DIRS 20
379  #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
380  
381 diff --git a/sender.c b/sender.c
382 --- a/sender.c
383 +++ b/sender.c
384 @@ -21,6 +21,7 @@
385  
386  #include "rsync.h"
387  #include "inums.h"
388 +#include "ifuncs.h"
389  
390  extern int do_xfers;
391  extern int am_server;
392 @@ -50,6 +51,7 @@ extern int batch_fd;
393  extern int write_batch;
394  extern int file_old_total;
395  extern BOOL want_progress_now;
396 +extern char *source_filter;
397  extern struct stats stats;
398  extern struct file_list *cur_flist, *first_flist, *dir_flist;
399  extern char num_dev_ino_buf[4 + 8 + 8];
400 @@ -211,6 +213,26 @@ void send_files(int f_in, int f_out)
401         int f_xfer = write_batch < 0 ? batch_fd : f_out;
402         int save_io_error = io_error;
403         int ndx, j;
404 +       char *filter_argv[MAX_FILTER_ARGS + 1];
405 +       char *tmp = 0;
406 +       int unlink_tmp = 0;
407 +
408 +       if (source_filter) {
409 +               char *p;
410 +               char *sep = " \t";
411 +               int i;
412 +               for (p = strtok(source_filter, sep), i = 0;
413 +                    p && i < MAX_FILTER_ARGS;
414 +                    p = strtok(0, sep))
415 +                       filter_argv[i++] = p;
416 +               filter_argv[i] = NULL;
417 +               if (p) {
418 +                       rprintf(FERROR,
419 +                               "Too many arguments to source-filter (> %d)\n",
420 +                               MAX_FILTER_ARGS);
421 +                       exit_cleanup(RERR_SYNTAX);
422 +               }
423 +       }
424  
425         if (DEBUG_GTE(SEND, 1))
426                 rprintf(FINFO, "send_files starting\n");
427 @@ -348,6 +370,7 @@ void send_files(int f_in, int f_out)
428                         exit_cleanup(RERR_PROTOCOL);
429                 }
430  
431 +               unlink_tmp = 0;
432                 fd = do_open(fname, O_RDONLY, 0);
433                 if (fd == -1) {
434                         if (errno == ENOENT) {
435 @@ -367,6 +390,33 @@ void send_files(int f_in, int f_out)
436                         continue;
437                 }
438  
439 +               if (source_filter) {
440 +                       int fd2;
441 +                       char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
442 +
443 +                       tmp = strdup(tmpl);
444 +                       fd2 = mkstemp(tmp);
445 +                       if (fd2 == -1) {
446 +                               rprintf(FERROR, "mkstemp %s failed: %s\n",
447 +                                       tmp, strerror(errno));
448 +                       } else {
449 +                               int status;
450 +                               pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
451 +                               close(fd);
452 +                               close(fd2);
453 +                               wait_process_with_flush(pid, &status);
454 +                               if (status != 0) {
455 +                                       rprintf(FERROR,
456 +                                           "bypassing source filter %s; exited with code: %d\n",
457 +                                           source_filter, status);
458 +                                       fd = do_open(fname, O_RDONLY, 0);
459 +                               } else {
460 +                                       fd = do_open(tmp, O_RDONLY, 0);
461 +                                       unlink_tmp = 1;
462 +                               }
463 +                       }
464 +               }
465 +
466                 /* map the local file */
467                 if (do_fstat(fd, &st) != 0) {
468                         io_error |= IOERR_GENERAL;
469 @@ -437,6 +487,8 @@ void send_files(int f_in, int f_out)
470                         }
471                 }
472                 close(fd);
473 +               if (unlink_tmp)
474 +                       unlink(tmp);
475  
476                 free_sums(s);
477