http://lists.samba.org/archive/rsync/2007-October/018996.html
-This patch is a COMPLETE HACK that covers the most common cases. Others
-are welcome to improve it.
-
To use this patch, run these commands for a successful build:
patch -p1 <patches/transliterate.diff
./configure (optional if already run)
make
+based-on: 6c8ca91c731b7bf2b081694bda85b7dadc2b7aff
diff --git a/flist.c b/flist.c
--- a/flist.c
+++ b/flist.c
-@@ -81,6 +81,9 @@ extern int filesfrom_convert;
- extern iconv_t ic_send, ic_recv;
- #endif
+@@ -78,6 +78,7 @@ extern uid_t our_uid;
+ extern struct stats stats;
+ extern char *filesfrom_host;
+ extern char *usermap, *groupmap;
++extern char *tr_opt;
-+extern char *tr_opt, *tr_left, *tr_right;
-+extern int tr_right_len;
-+
- #define PTR_SIZE (sizeof (struct file_struct *))
+ extern struct name_num_item *file_sum_nni;
+
+@@ -106,6 +107,8 @@ int file_old_total = 0; /* total of active items that will soon be gone */
+ int flist_eof = 0; /* all the file-lists are now known */
+ int xfer_flags_as_varint = 0;
- int io_error;
-@@ -605,6 +608,24 @@ static void send_file_entry(int f, struct file_struct *file, int ndx, int first_
++char tr_substitutions[256];
++
+ #define NORMAL_NAME 0
+ #define SLASH_ENDING_NAME 1
+ #define DOTDIR_NAME 2
+@@ -679,6 +682,23 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file,
stats.total_size += F_LENGTH(file);
}
-+static void transliterate(char *thisname)
++static void transliterate(char *path, int len)
+{
-+ char *p1, *p2, *pleft;
-+
-+ for (p1 = p2 = thisname; *p1; p1++) {
-+ /* Look up the current character in the left string. */
-+ pleft = strchr(tr_left, *p1);
-+ if (!pleft)
-+ /* Not found: no change. */
-+ *p2++ = *p1;
-+ else if (pleft - tr_left < tr_right_len)
-+ /* Store replacement from the right string. */
-+ *p2++ = tr_right[pleft - tr_left];
-+ /* Otherwise delete. */
++ while (1) {
++ /* Find position of any char in tr_opt in path, or the end of the path. */
++ int span = strcspn(path, tr_opt);
++ if ((len -= span) == 0)
++ return;
++ path += span;
++ if ((*path = tr_substitutions[*(uchar*)path]) == '\0')
++ memmove(path, path+1, len--); /* copies the trailing '\0' too. */
++ else {
++ path++;
++ len--;
++ }
+ }
-+ *p2 = '\0';
+}
+
- static struct file_struct *recv_file_entry(struct file_list *flist,
- int xflags, int f)
+ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int xflags)
{
-@@ -673,6 +694,9 @@ static struct file_struct *recv_file_entry(struct file_list *flist,
+ static int64 modtime, atime;
+@@ -750,9 +770,13 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x
+ outbuf.len = 0;
+ }
+ thisname[outbuf.len] = '\0';
++ basename_len = outbuf.len;
}
#endif
+ if (tr_opt)
-+ transliterate(thisname);
++ transliterate(thisname, basename_len);
+
- clean_fname(thisname, 0);
+ if (*thisname
+ && (clean_fname(thisname, CFN_REFUSE_DOT_DOT_DIRS) < 0 || (!relative_paths && *thisname == '/'))) {
+ rprintf(FERROR, "ABORTING due to unsafe pathname from sender: %s\n", thisname);
+@@ -2575,6 +2599,15 @@ struct file_list *recv_file_list(int f, int dir_ndx)
+ parse_name_map(usermap, True);
+ if (groupmap)
+ parse_name_map(groupmap, False);
++ if (tr_opt) { /* Parse FROM/TO string and populate tr_substitutions[] */
++ char *f, *t;
++ if ((t = strchr(tr_opt, '/')) != NULL)
++ *t++ = '\0';
++ else
++ t = "";
++ for (f = tr_opt; *f; f++)
++ tr_substitutions[*(uchar*)f] = *t ? *t++ : '\0';
++ }
+ }
- if (sanitize_paths)
+ start_read = stats.total_read;
diff --git a/options.c b/options.c
--- a/options.c
+++ b/options.c
-@@ -183,6 +183,8 @@ int logfile_format_has_i = 0;
+@@ -211,6 +211,7 @@ int logfile_format_has_i = 0;
int logfile_format_has_o_or_i = 0;
int always_checksum = 0;
int list_only = 0;
-+char *tr_opt = NULL, *tr_left = NULL, *tr_right = NULL;
-+int tr_right_len = 0;
++char *tr_opt = NULL;
#define MAX_BATCH_NAME_LEN 256 /* Must be less than MAXPATHLEN-13 */
char *batch_name = NULL;
-@@ -430,6 +432,7 @@ void usage(enum logcode F)
- #ifdef ICONV_OPTION
- rprintf(F," --iconv=CONVERT_SPEC request charset conversion of filenames\n");
- #endif
-+ rprintf(F," --tr=BAD/GOOD transliterate filenames\n");
- rprintf(F," -4, --ipv4 prefer IPv4\n");
- rprintf(F," -6, --ipv6 prefer IPv6\n");
- rprintf(F," --version print version number\n");
-@@ -620,6 +623,7 @@ static struct poptOption long_options[] = {
+@@ -813,6 +814,7 @@ static struct poptOption long_options[] = {
+ {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
{"iconv", 0, POPT_ARG_STRING, &iconv_opt, 0, 0, 0 },
{"no-iconv", 0, POPT_ARG_NONE, 0, OPT_NO_ICONV, 0, 0 },
- #endif
+ {"tr", 0, POPT_ARG_STRING, &tr_opt, 0, 0, 0 },
{"ipv4", '4', POPT_ARG_VAL, &default_af_hint, AF_INET, 0, 0 },
{"ipv6", '6', POPT_ARG_VAL, &default_af_hint, AF_INET6, 0, 0 },
- {"8-bit-output", '8', POPT_ARG_NONE, &allow_8bit_chars, 0, 0, 0 },
-@@ -1654,6 +1658,31 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain)
+ {"8-bit-output", '8', POPT_ARG_VAL, &allow_8bit_chars, 1, 0, 0 },
+@@ -2490,6 +2492,24 @@ int parse_arguments(int *argc_p, const char ***argv_p)
}
}
-+ /* Easiest way to get a local server right is to do this on both sides */
+ if (tr_opt) {
-+ if (*tr_opt) {
-+ char *p;
-+
++ if (*tr_opt == '/' && tr_opt[1]) {
++ snprintf(err_buf, sizeof err_buf,
++ "Do not start the --tr arg with a slash\n");
++ return 0;
++ }
++ if (*tr_opt && *tr_opt != '/') {
+ need_unsorted_flist = 1;
-+ /* Our mutation shouldn't interfere with transmission of the
-+ * original option to the server. */
-+ tr_left = strdup(tr_opt);
-+ p = strchr(tr_left, '/');
-+ if (p != NULL) {
-+ *p = '\0';
-+ p++;
-+ tr_right = p;
-+ tr_right_len = strlen(tr_right);
-+ if (strchr(tr_right, '/') != NULL) {
-+ snprintf(err_buf, sizeof err_buf,
-+ "--tr cannot transliterate slashes\n");
-+ return 0;
-+ }
++ arg = strchr(tr_opt, '/');
++ if (arg && strchr(arg+1, '/')) {
++ snprintf(err_buf, sizeof err_buf,
++ "--tr cannot transliterate slashes\n");
++ return 0;
+ }
+ } else
+ tr_opt = NULL;
+ }
+
- am_starting_up = 0;
-
- return 1;
-@@ -2022,6 +2051,12 @@ void server_options(char **args, int *argc_p)
- else if (remove_source_files)
- args[ac++] = "--remove-sent-files";
+ if (trust_sender || am_server || read_batch)
+ trust_sender_args = trust_sender_filter = 1;
+ else if (old_style_args || filesfrom_host != NULL)
+@@ -2958,6 +2978,12 @@ void server_options(char **args, int *argc_p)
+ if (relative_paths && !implied_dirs && (!am_sender || protocol_version >= 30))
+ args[ac++] = "--no-implied-dirs";
+ if (tr_opt) {
+ if (asprintf(&arg, "--tr=%s", tr_opt) < 0)
+ args[ac++] = arg;
+ }
+
- *argc_p = ac;
- return;
+ if (write_devices && am_sender)
+ args[ac++] = "--write-devices";
-diff --git a/rsync.yo b/rsync.yo
---- a/rsync.yo
-+++ b/rsync.yo
-@@ -423,6 +423,7 @@ to the detailed description below for a complete description. verb(
- --read-batch=FILE read a batched update from FILE
- --protocol=NUM force an older protocol version to be used
- --iconv=CONVERT_SPEC request charset conversion of filenames
-+ --tr=BAD/GOOD transliterate filenames
- --checksum-seed=NUM set block/file checksum seed (advanced)
- -4, --ipv4 prefer IPv4
- -6, --ipv6 prefer IPv6
-@@ -2061,6 +2062,22 @@ daemon uses the charset specified in its "charset" configuration parameter
- regardless of the remote charset you actually pass. Thus, you may feel free to
- specify just the local charset for a daemon transfer (e.g. bf(--iconv=utf8)).
+diff --git a/rsync.1.md b/rsync.1.md
+--- a/rsync.1.md
++++ b/rsync.1.md
+@@ -555,6 +555,7 @@ has its own detailed description later in this manpage.
+ --read-batch=FILE read a batched update from FILE
+ --protocol=NUM force an older protocol version to be used
+ --iconv=CONVERT_SPEC request charset conversion of filenames
++--tr=BAD/GOOD transliterate filenames
+ --checksum-seed=NUM set block/file checksum seed (advanced)
+ --ipv4, -4 prefer IPv4
+ --ipv6, -6 prefer IPv6
+@@ -3755,6 +3756,22 @@ expand it.
+ free to specify just the local charset for a daemon transfer (e.g.
+ `--iconv=utf8`).
-+dit(bf(--tr=BAD/GOOD)) Transliterates filenames on the receiver, after the
-+iconv conversion (if any). This can be used to remove characters illegal
-+on the destination filesystem. If you use this option, consider saving a
-+"find . -ls" listing of the source in the destination to help you determine
-+the original filenames in case of need.
++0. `--tr=BAD/GOOD`
+
-+The argument consists of a string of characters to remove, optionally
-+followed by a slash and a string of corresponding characters with which to
-+replace them. The second string may be shorter, in which case any leftover
-+characters in the first string are simply deleted. For example,
-+bf(--tr=':\/!') replaces colons with exclamation marks and deletes backslashes.
-+Slashes cannot be transliterated because it would cause havoc.
++ Transliterates filenames on the receiver, after the iconv conversion (if
++ any). This can be used to remove characters illegal on the destination
++ filesystem. If you use this option, consider saving a "find . -ls" listing
++ of the source in the destination to help you determine the original
++ filenames in case of need.
+
-+If the receiver is invoked over a remote shell, use bf(--protect-args) to
-+stop the shell from interpreting any nasty characters in the argument.
++ The argument consists of a string of characters to remove, optionally
++ followed by a slash and a string of corresponding characters with which to
++ replace them. The second string may be shorter, in which case any leftover
++ characters in the first string are simply deleted. For example,
++ `--tr=':\/!'` replaces colons with exclamation marks and deletes
++ backslashes. Slashes cannot be transliterated because it would cause
++ havoc.
+
- dit(bf(-4, --ipv4) or bf(-6, --ipv6)) Tells rsync to prefer IPv4/IPv6
- when creating sockets. This only affects sockets that rsync has direct
- control over, such as the outgoing socket when directly contacting an
+ 0. `--ipv4`, `-4` or `--ipv6`, `-6`
+
+ Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh. This