Annotation of embedaddon/rsync/patches/transliterate.diff, revision 1.1.1.1
1.1 misho 1: This patch adds an option --tr=BAD/GOOD to transliterate filenames. It
2: can be used to remove characters illegal on the destination filesystem.
3: Jeff Weber expressed interest in this:
4:
5: http://lists.samba.org/archive/rsync/2007-October/018996.html
6:
7: To use this patch, run these commands for a successful build:
8:
9: patch -p1 <patches/transliterate.diff
10: ./configure (optional if already run)
11: make
12:
13: based-on: e94bad1c156fc3910f24e2b3b71a81b0b0bdeb70
14: diff --git a/flist.c b/flist.c
15: --- a/flist.c
16: +++ b/flist.c
17: @@ -77,6 +77,7 @@ extern uid_t our_uid;
18: extern struct stats stats;
19: extern char *filesfrom_host;
20: extern char *usermap, *groupmap;
21: +extern char *tr_opt;
22:
23: extern char curr_dir[MAXPATHLEN];
24:
25: @@ -104,6 +105,8 @@ int file_old_total = 0; /* total of active items that will soon be gone */
26: int flist_eof = 0; /* all the file-lists are now known */
27: int xfer_flags_as_varint = 0;
28:
29: +char tr_substitutions[256];
30: +
31: #define NORMAL_NAME 0
32: #define SLASH_ENDING_NAME 1
33: #define DOTDIR_NAME 2
34: @@ -674,6 +677,23 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file,
35: stats.total_size += F_LENGTH(file);
36: }
37:
38: +static void transliterate(char *path, int len)
39: +{
40: + while (1) {
41: + /* Find position of any char in tr_opt in path, or the end of the path. */
42: + int span = strcspn(path, tr_opt);
43: + if ((len -= span) == 0)
44: + return;
45: + path += span;
46: + if ((*path = tr_substitutions[*(uchar*)path]) == '\0')
47: + memmove(path, path+1, len--); /* copies the trailing '\0' too. */
48: + else {
49: + path++;
50: + len--;
51: + }
52: + }
53: +}
54: +
55: static struct file_struct *recv_file_entry(int f, struct file_list *flist, int xflags)
56: {
57: static int64 modtime, atime;
58: @@ -744,9 +764,13 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x
59: outbuf.len = 0;
60: }
61: thisname[outbuf.len] = '\0';
62: + basename_len = outbuf.len;
63: }
64: #endif
65:
66: + if (tr_opt)
67: + transliterate(thisname, basename_len);
68: +
69: if (*thisname
70: && (clean_fname(thisname, CFN_REFUSE_DOT_DOT_DIRS) < 0 || (!relative_paths && *thisname == '/'))) {
71: rprintf(FERROR, "ABORTING due to unsafe pathname from sender: %s\n", thisname);
72: @@ -2531,6 +2555,15 @@ struct file_list *recv_file_list(int f, int dir_ndx)
73: parse_name_map(usermap, True);
74: if (groupmap)
75: parse_name_map(groupmap, False);
76: + if (tr_opt) { /* Parse FROM/TO string and populate tr_substitutions[] */
77: + char *f, *t;
78: + if ((t = strchr(tr_opt, '/')) != NULL)
79: + *t++ = '\0';
80: + else
81: + t = "";
82: + for (f = tr_opt; *f; f++)
83: + tr_substitutions[*(uchar*)f] = *t ? *t++ : '\0';
84: + }
85: }
86:
87: start_read = stats.total_read;
88: diff --git a/options.c b/options.c
89: --- a/options.c
90: +++ b/options.c
91: @@ -201,6 +201,7 @@ int logfile_format_has_i = 0;
92: int logfile_format_has_o_or_i = 0;
93: int always_checksum = 0;
94: int list_only = 0;
95: +char *tr_opt = NULL;
96:
97: #define MAX_BATCH_NAME_LEN 256 /* Must be less than MAXPATHLEN-13 */
98: char *batch_name = NULL;
99: @@ -797,6 +798,7 @@ static struct poptOption long_options[] = {
100: {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
101: {"iconv", 0, POPT_ARG_STRING, &iconv_opt, 0, 0, 0 },
102: {"no-iconv", 0, POPT_ARG_NONE, 0, OPT_NO_ICONV, 0, 0 },
103: + {"tr", 0, POPT_ARG_STRING, &tr_opt, 0, 0, 0 },
104: {"ipv4", '4', POPT_ARG_VAL, &default_af_hint, AF_INET, 0, 0 },
105: {"ipv6", '6', POPT_ARG_VAL, &default_af_hint, AF_INET6, 0, 0 },
106: {"8-bit-output", '8', POPT_ARG_VAL, &allow_8bit_chars, 1, 0, 0 },
107: @@ -2436,6 +2438,24 @@ int parse_arguments(int *argc_p, const char ***argv_p)
108: }
109: }
110:
111: + if (tr_opt) {
112: + if (*tr_opt == '/' && tr_opt[1]) {
113: + snprintf(err_buf, sizeof err_buf,
114: + "Do not start the --tr arg with a slash\n");
115: + return 0;
116: + }
117: + if (*tr_opt && *tr_opt != '/') {
118: + need_unsorted_flist = 1;
119: + arg = strchr(tr_opt, '/');
120: + if (arg && strchr(arg+1, '/')) {
121: + snprintf(err_buf, sizeof err_buf,
122: + "--tr cannot transliterate slashes\n");
123: + return 0;
124: + }
125: + } else
126: + tr_opt = NULL;
127: + }
128: +
129: am_starting_up = 0;
130:
131: return 1;
132: @@ -2887,6 +2907,12 @@ void server_options(char **args, int *argc_p)
133: if (relative_paths && !implied_dirs && (!am_sender || protocol_version >= 30))
134: args[ac++] = "--no-implied-dirs";
135:
136: + if (tr_opt) {
137: + if (asprintf(&arg, "--tr=%s", tr_opt) < 0)
138: + goto oom;
139: + args[ac++] = arg;
140: + }
141: +
142: if (write_devices && am_sender)
143: args[ac++] = "--write-devices";
144:
145: diff --git a/rsync.1.md b/rsync.1.md
146: --- a/rsync.1.md
147: +++ b/rsync.1.md
148: @@ -466,6 +466,7 @@ detailed description below for a complete description.
149: --read-batch=FILE read a batched update from FILE
150: --protocol=NUM force an older protocol version to be used
151: --iconv=CONVERT_SPEC request charset conversion of filenames
152: +--tr=BAD/GOOD transliterate filenames
153: --checksum-seed=NUM set block/file checksum seed (advanced)
154: --ipv4, -4 prefer IPv4
155: --ipv6, -6 prefer IPv6
156: @@ -3319,6 +3320,25 @@ your home directory (remove the '=' for that).
157: free to specify just the local charset for a daemon transfer (e.g.
158: `--iconv=utf8`).
159:
160: +0. `--tr=BAD/GOOD`
161: +
162: + Transliterates filenames on the receiver, after the iconv conversion (if
163: + any). This can be used to remove characters illegal on the destination
164: + filesystem. If you use this option, consider saving a "find . -ls" listing
165: + of the source in the destination to help you determine the original
166: + filenames in case of need.
167: +
168: + The argument consists of a string of characters to remove, optionally
169: + followed by a slash and a string of corresponding characters with which to
170: + replace them. The second string may be shorter, in which case any leftover
171: + characters in the first string are simply deleted. For example,
172: + `--tr=':\/!'` replaces colons with exclamation marks and deletes
173: + backslashes. Slashes cannot be transliterated because it would cause
174: + havoc.
175: +
176: + If the receiver is invoked over a remote shell, use `--protect-args` to
177: + stop the shell from interpreting any nasty characters in the argument.
178: +
179: 0. `--ipv4`, `-4` or `--ipv6`, `-6`
180:
181: Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh. This
182: diff -Nurp a/rsync.1 b/rsync.1
183: --- a/rsync.1
184: +++ b/rsync.1
185: @@ -542,6 +542,7 @@ detailed description below for a complet
186: --read-batch=FILE read a batched update from FILE
187: --protocol=NUM force an older protocol version to be used
188: --iconv=CONVERT_SPEC request charset conversion of filenames
189: +--tr=BAD/GOOD transliterate filenames
190: --checksum-seed=NUM set block/file checksum seed (advanced)
191: --ipv4, -4 prefer IPv4
192: --ipv6, -6 prefer IPv6
193: @@ -3372,6 +3373,23 @@ daemon uses the charset specified in its
194: regardless of the remote charset you actually pass. Thus, you may feel
195: free to specify just the local charset for a daemon transfer (e.g.
196: \fB\-\-iconv=utf8\fP).
197: +.IP "\fB\-\-tr=BAD/GOOD\fP"
198: +Transliterates filenames on the receiver, after the iconv conversion (if
199: +any). This can be used to remove characters illegal on the destination
200: +filesystem. If you use this option, consider saving a "find . \-ls" listing
201: +of the source in the destination to help you determine the original
202: +filenames in case of need.
203: +.IP
204: +The argument consists of a string of characters to remove, optionally
205: +followed by a slash and a string of corresponding characters with which to
206: +replace them. The second string may be shorter, in which case any leftover
207: +characters in the first string are simply deleted. For example,
208: +\fB\-\-tr=':\\/!'\fP replaces colons with exclamation marks and deletes
209: +backslashes. Slashes cannot be transliterated because it would cause
210: +havoc.
211: +.IP
212: +If the receiver is invoked over a remote shell, use \fB\-\-protect-args\fP to
213: +stop the shell from interpreting any nasty characters in the argument.
214: .IP "\fB\-\-ipv4\fP, \fB\-4\fP or \fB\-\-ipv6\fP, \fB\-6\fP"
215: Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh. This
216: affects sockets that rsync has direct control over, such as the outgoing
217: diff -Nurp a/rsync.1.html b/rsync.1.html
218: --- a/rsync.1.html
219: +++ b/rsync.1.html
220: @@ -457,6 +457,7 @@ detailed description below for a complet
221: --read-batch=FILE read a batched update from FILE
222: --protocol=NUM force an older protocol version to be used
223: --iconv=CONVERT_SPEC request charset conversion of filenames
224: +--tr=BAD/GOOD transliterate filenames
225: --checksum-seed=NUM set block/file checksum seed (advanced)
226: --ipv4, -4 prefer IPv4
227: --ipv6, -6 prefer IPv6
228: @@ -3129,6 +3130,23 @@ free to specify just the local charset f
229: <code>--iconv=utf8</code>).</p>
230: </dd>
231:
232: +<dt><code>--tr=BAD/GOOD</code></dt><dd>
233: +<p>Transliterates filenames on the receiver, after the iconv conversion (if
234: +any). This can be used to remove characters illegal on the destination
235: +filesystem. If you use this option, consider saving a "find . -⁠ls" listing
236: +of the source in the destination to help you determine the original
237: +filenames in case of need.</p>
238: +<p>The argument consists of a string of characters to remove, optionally
239: +followed by a slash and a string of corresponding characters with which to
240: +replace them. The second string may be shorter, in which case any leftover
241: +characters in the first string are simply deleted. For example,
242: +<code>--tr=':\/!'</code> replaces colons with exclamation marks and deletes
243: +backslashes. Slashes cannot be transliterated because it would cause
244: +havoc.</p>
245: +<p>If the receiver is invoked over a remote shell, use <code>--protect-args</code> to
246: +stop the shell from interpreting any nasty characters in the argument.</p>
247: +</dd>
248: +
249: <dt><code>--ipv4</code>, <code>-4</code> or <code>--ipv6</code>, <code>-6</code></dt><dd>
250: <p>Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh. This
251: affects sockets that rsync has direct control over, such as the outgoing
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>