This patch adds an option --tr=BAD/GOOD to transliterate filenames. It
can be used to remove characters illegal on the destination filesystem.
Jeff Weber expressed interest in this:
http://lists.samba.org/archive/rsync/2007-October/018996.html
To use this patch, run these commands for a successful build:
patch -p1 <patches/transliterate.diff
./configure (optional if already run)
make
based-on: e94bad1c156fc3910f24e2b3b71a81b0b0bdeb70
diff --git a/flist.c b/flist.c
--- a/flist.c
+++ b/flist.c
@@ -77,6 +77,7 @@ extern uid_t our_uid;
extern struct stats stats;
extern char *filesfrom_host;
extern char *usermap, *groupmap;
+extern char *tr_opt;
extern char curr_dir[MAXPATHLEN];
@@ -104,6 +105,8 @@ int file_old_total = 0; /* total of active items that will soon be gone */
int flist_eof = 0; /* all the file-lists are now known */
int xfer_flags_as_varint = 0;
+char tr_substitutions[256];
+
#define NORMAL_NAME 0
#define SLASH_ENDING_NAME 1
#define DOTDIR_NAME 2
@@ -674,6 +677,23 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file,
stats.total_size += F_LENGTH(file);
}
+static void transliterate(char *path, int len)
+{
+ while (1) {
+ /* Find position of any char in tr_opt in path, or the end of the path. */
+ int span = strcspn(path, tr_opt);
+ if ((len -= span) == 0)
+ return;
+ path += span;
+ if ((*path = tr_substitutions[*(uchar*)path]) == '\0')
+ memmove(path, path+1, len--); /* copies the trailing '\0' too. */
+ else {
+ path++;
+ len--;
+ }
+ }
+}
+
static struct file_struct *recv_file_entry(int f, struct file_list *flist, int xflags)
{
static int64 modtime, atime;
@@ -744,9 +764,13 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x
outbuf.len = 0;
}
thisname[outbuf.len] = '\0';
+ basename_len = outbuf.len;
}
#endif
+ if (tr_opt)
+ transliterate(thisname, basename_len);
+
if (*thisname
&& (clean_fname(thisname, CFN_REFUSE_DOT_DOT_DIRS) < 0 || (!relative_paths && *thisname == '/'))) {
rprintf(FERROR, "ABORTING due to unsafe pathname from sender: %s\n", thisname);
@@ -2531,6 +2555,15 @@ struct file_list *recv_file_list(int f, int dir_ndx)
parse_name_map(usermap, True);
if (groupmap)
parse_name_map(groupmap, False);
+ if (tr_opt) { /* Parse FROM/TO string and populate tr_substitutions[] */
+ char *f, *t;
+ if ((t = strchr(tr_opt, '/')) != NULL)
+ *t++ = '\0';
+ else
+ t = "";
+ for (f = tr_opt; *f; f++)
+ tr_substitutions[*(uchar*)f] = *t ? *t++ : '\0';
+ }
}
start_read = stats.total_read;
diff --git a/options.c b/options.c
--- a/options.c
+++ b/options.c
@@ -201,6 +201,7 @@ int logfile_format_has_i = 0;
int logfile_format_has_o_or_i = 0;
int always_checksum = 0;
int list_only = 0;
+char *tr_opt = NULL;
#define MAX_BATCH_NAME_LEN 256 /* Must be less than MAXPATHLEN-13 */
char *batch_name = NULL;
@@ -797,6 +798,7 @@ static struct poptOption long_options[] = {
{"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
{"iconv", 0, POPT_ARG_STRING, &iconv_opt, 0, 0, 0 },
{"no-iconv", 0, POPT_ARG_NONE, 0, OPT_NO_ICONV, 0, 0 },
+ {"tr", 0, POPT_ARG_STRING, &tr_opt, 0, 0, 0 },
{"ipv4", '4', POPT_ARG_VAL, &default_af_hint, AF_INET, 0, 0 },
{"ipv6", '6', POPT_ARG_VAL, &default_af_hint, AF_INET6, 0, 0 },
{"8-bit-output", '8', POPT_ARG_VAL, &allow_8bit_chars, 1, 0, 0 },
@@ -2436,6 +2438,24 @@ int parse_arguments(int *argc_p, const char ***argv_p)
}
}
+ if (tr_opt) {
+ if (*tr_opt == '/' && tr_opt[1]) {
+ snprintf(err_buf, sizeof err_buf,
+ "Do not start the --tr arg with a slash\n");
+ return 0;
+ }
+ if (*tr_opt && *tr_opt != '/') {
+ need_unsorted_flist = 1;
+ arg = strchr(tr_opt, '/');
+ if (arg && strchr(arg+1, '/')) {
+ snprintf(err_buf, sizeof err_buf,
+ "--tr cannot transliterate slashes\n");
+ return 0;
+ }
+ } else
+ tr_opt = NULL;
+ }
+
am_starting_up = 0;
return 1;
@@ -2887,6 +2907,12 @@ void server_options(char **args, int *argc_p)
if (relative_paths && !implied_dirs && (!am_sender || protocol_version >= 30))
args[ac++] = "--no-implied-dirs";
+ if (tr_opt) {
+ if (asprintf(&arg, "--tr=%s", tr_opt) < 0)
+ goto oom;
+ args[ac++] = arg;
+ }
+
if (write_devices && am_sender)
args[ac++] = "--write-devices";
diff --git a/rsync.1.md b/rsync.1.md
--- a/rsync.1.md
+++ b/rsync.1.md
@@ -466,6 +466,7 @@ detailed description below for a complete description.
--read-batch=FILE read a batched update from FILE
--protocol=NUM force an older protocol version to be used
--iconv=CONVERT_SPEC request charset conversion of filenames
+--tr=BAD/GOOD transliterate filenames
--checksum-seed=NUM set block/file checksum seed (advanced)
--ipv4, -4 prefer IPv4
--ipv6, -6 prefer IPv6
@@ -3319,6 +3320,25 @@ your home directory (remove the '=' for that).
free to specify just the local charset for a daemon transfer (e.g.
`--iconv=utf8`).
+0. `--tr=BAD/GOOD`
+
+ Transliterates filenames on the receiver, after the iconv conversion (if
+ any). This can be used to remove characters illegal on the destination
+ filesystem. If you use this option, consider saving a "find . -ls" listing
+ of the source in the destination to help you determine the original
+ filenames in case of need.
+
+ The argument consists of a string of characters to remove, optionally
+ followed by a slash and a string of corresponding characters with which to
+ replace them. The second string may be shorter, in which case any leftover
+ characters in the first string are simply deleted. For example,
+ `--tr=':\/!'` replaces colons with exclamation marks and deletes
+ backslashes. Slashes cannot be transliterated because it would cause
+ havoc.
+
+ If the receiver is invoked over a remote shell, use `--protect-args` to
+ stop the shell from interpreting any nasty characters in the argument.
+
0. `--ipv4`, `-4` or `--ipv6`, `-6`
Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh. This
diff -Nurp a/rsync.1 b/rsync.1
--- a/rsync.1
+++ b/rsync.1
@@ -542,6 +542,7 @@ detailed description below for a complet
--read-batch=FILE read a batched update from FILE
--protocol=NUM force an older protocol version to be used
--iconv=CONVERT_SPEC request charset conversion of filenames
+--tr=BAD/GOOD transliterate filenames
--checksum-seed=NUM set block/file checksum seed (advanced)
--ipv4, -4 prefer IPv4
--ipv6, -6 prefer IPv6
@@ -3372,6 +3373,23 @@ daemon uses the charset specified in its
regardless of the remote charset you actually pass. Thus, you may feel
free to specify just the local charset for a daemon transfer (e.g.
\fB\-\-iconv=utf8\fP).
+.IP "\fB\-\-tr=BAD/GOOD\fP"
+Transliterates filenames on the receiver, after the iconv conversion (if
+any). This can be used to remove characters illegal on the destination
+filesystem. If you use this option, consider saving a "find . \-ls" listing
+of the source in the destination to help you determine the original
+filenames in case of need.
+.IP
+The argument consists of a string of characters to remove, optionally
+followed by a slash and a string of corresponding characters with which to
+replace them. The second string may be shorter, in which case any leftover
+characters in the first string are simply deleted. For example,
+\fB\-\-tr=':\\/!'\fP replaces colons with exclamation marks and deletes
+backslashes. Slashes cannot be transliterated because it would cause
+havoc.
+.IP
+If the receiver is invoked over a remote shell, use \fB\-\-protect-args\fP to
+stop the shell from interpreting any nasty characters in the argument.
.IP "\fB\-\-ipv4\fP, \fB\-4\fP or \fB\-\-ipv6\fP, \fB\-6\fP"
Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh. This
affects sockets that rsync has direct control over, such as the outgoing
diff -Nurp a/rsync.1.html b/rsync.1.html
--- a/rsync.1.html
+++ b/rsync.1.html
@@ -457,6 +457,7 @@ detailed description below for a complet
--read-batch=FILE read a batched update from FILE
--protocol=NUM force an older protocol version to be used
--iconv=CONVERT_SPEC request charset conversion of filenames
+--tr=BAD/GOOD transliterate filenames
--checksum-seed=NUM set block/file checksum seed (advanced)
--ipv4, -4 prefer IPv4
--ipv6, -6 prefer IPv6
@@ -3129,6 +3130,23 @@ free to specify just the local charset f
<code>--iconv=utf8</code>).</p>
</dd>
+<dt><code>--tr=BAD/GOOD</code></dt><dd>
+<p>Transliterates filenames on the receiver, after the iconv conversion (if
+any). This can be used to remove characters illegal on the destination
+filesystem. If you use this option, consider saving a "find . -⁠ls" listing
+of the source in the destination to help you determine the original
+filenames in case of need.</p>
+<p>The argument consists of a string of characters to remove, optionally
+followed by a slash and a string of corresponding characters with which to
+replace them. The second string may be shorter, in which case any leftover
+characters in the first string are simply deleted. For example,
+<code>--tr=':\/!'</code> replaces colons with exclamation marks and deletes
+backslashes. Slashes cannot be transliterated because it would cause
+havoc.</p>
+<p>If the receiver is invoked over a remote shell, use <code>--protect-args</code> to
+stop the shell from interpreting any nasty characters in the argument.</p>
+</dd>
+
<dt><code>--ipv4</code>, <code>-4</code> or <code>--ipv6</code>, <code>-6</code></dt><dd>
<p>Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh. This
affects sockets that rsync has direct control over, such as the outgoing
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>