File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / rsync / patches / transliterate.diff
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 00:32:36 2021 UTC (4 years ago) by misho
Branches: rsync, MAIN
CVS tags: v3_2_3, HEAD
rsync 3.2.3

    1: This patch adds an option --tr=BAD/GOOD to transliterate filenames.  It
    2: can be used to remove characters illegal on the destination filesystem.
    3: Jeff Weber expressed interest in this:
    4: 
    5: http://lists.samba.org/archive/rsync/2007-October/018996.html
    6: 
    7: To use this patch, run these commands for a successful build:
    8: 
    9:     patch -p1 <patches/transliterate.diff
   10:     ./configure                                 (optional if already run)
   11:     make
   12: 
   13: based-on: e94bad1c156fc3910f24e2b3b71a81b0b0bdeb70
   14: diff --git a/flist.c b/flist.c
   15: --- a/flist.c
   16: +++ b/flist.c
   17: @@ -77,6 +77,7 @@ extern uid_t our_uid;
   18:  extern struct stats stats;
   19:  extern char *filesfrom_host;
   20:  extern char *usermap, *groupmap;
   21: +extern char *tr_opt;
   22:  
   23:  extern char curr_dir[MAXPATHLEN];
   24:  
   25: @@ -104,6 +105,8 @@ int file_old_total = 0; /* total of active items that will soon be gone */
   26:  int flist_eof = 0; /* all the file-lists are now known */
   27:  int xfer_flags_as_varint = 0;
   28:  
   29: +char tr_substitutions[256];
   30: +
   31:  #define NORMAL_NAME 0
   32:  #define SLASH_ENDING_NAME 1
   33:  #define DOTDIR_NAME 2
   34: @@ -674,6 +677,23 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file,
   35:  		stats.total_size += F_LENGTH(file);
   36:  }
   37:  
   38: +static void transliterate(char *path, int len)
   39: +{
   40: +	while (1) {
   41: +		/* Find position of any char in tr_opt in path, or the end of the path. */
   42: +		int span = strcspn(path, tr_opt);
   43: +		if ((len -= span) == 0)
   44: +			return;
   45: +		path += span;
   46: +		if ((*path = tr_substitutions[*(uchar*)path]) == '\0')
   47: +			memmove(path, path+1, len--); /* copies the trailing '\0' too. */
   48: +		else {
   49: +			path++;
   50: +			len--;
   51: +		}
   52: +	}
   53: +}
   54: +
   55:  static struct file_struct *recv_file_entry(int f, struct file_list *flist, int xflags)
   56:  {
   57:  	static int64 modtime, atime;
   58: @@ -744,9 +764,13 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x
   59:  			outbuf.len = 0;
   60:  		}
   61:  		thisname[outbuf.len] = '\0';
   62: +		basename_len = outbuf.len;
   63:  	}
   64:  #endif
   65:  
   66: +	if (tr_opt)
   67: +		transliterate(thisname, basename_len);
   68: +
   69:  	if (*thisname
   70:  	 && (clean_fname(thisname, CFN_REFUSE_DOT_DOT_DIRS) < 0 || (!relative_paths && *thisname == '/'))) {
   71:  		rprintf(FERROR, "ABORTING due to unsafe pathname from sender: %s\n", thisname);
   72: @@ -2531,6 +2555,15 @@ struct file_list *recv_file_list(int f, int dir_ndx)
   73:  			parse_name_map(usermap, True);
   74:  		if (groupmap)
   75:  			parse_name_map(groupmap, False);
   76: +		if (tr_opt) { /* Parse FROM/TO string and populate tr_substitutions[] */
   77: +			char *f, *t;
   78: +			if ((t = strchr(tr_opt, '/')) != NULL)
   79: +				*t++ = '\0';
   80: +			else
   81: +				t = "";
   82: +			for (f = tr_opt; *f; f++)
   83: +				tr_substitutions[*(uchar*)f] = *t ? *t++ : '\0';
   84: +		}
   85:  	}
   86:  
   87:  	start_read = stats.total_read;
   88: diff --git a/options.c b/options.c
   89: --- a/options.c
   90: +++ b/options.c
   91: @@ -201,6 +201,7 @@ int logfile_format_has_i = 0;
   92:  int logfile_format_has_o_or_i = 0;
   93:  int always_checksum = 0;
   94:  int list_only = 0;
   95: +char *tr_opt = NULL;
   96:  
   97:  #define MAX_BATCH_NAME_LEN 256	/* Must be less than MAXPATHLEN-13 */
   98:  char *batch_name = NULL;
   99: @@ -797,6 +798,7 @@ static struct poptOption long_options[] = {
  100:    {"temp-dir",        'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
  101:    {"iconv",            0,  POPT_ARG_STRING, &iconv_opt, 0, 0, 0 },
  102:    {"no-iconv",         0,  POPT_ARG_NONE,   0, OPT_NO_ICONV, 0, 0 },
  103: +  {"tr",               0,  POPT_ARG_STRING, &tr_opt, 0, 0, 0 },
  104:    {"ipv4",            '4', POPT_ARG_VAL,    &default_af_hint, AF_INET, 0, 0 },
  105:    {"ipv6",            '6', POPT_ARG_VAL,    &default_af_hint, AF_INET6, 0, 0 },
  106:    {"8-bit-output",    '8', POPT_ARG_VAL,    &allow_8bit_chars, 1, 0, 0 },
  107: @@ -2436,6 +2438,24 @@ int parse_arguments(int *argc_p, const char ***argv_p)
  108:  		}
  109:  	}
  110:  
  111: +	if (tr_opt) {
  112: +		if (*tr_opt == '/' && tr_opt[1]) {
  113: +			snprintf(err_buf, sizeof err_buf,
  114: +				"Do not start the --tr arg with a slash\n");
  115: +			return 0;
  116: +		}
  117: +		if (*tr_opt && *tr_opt != '/') {
  118: +			need_unsorted_flist = 1;
  119: +			arg = strchr(tr_opt, '/');
  120: +			if (arg && strchr(arg+1, '/')) {
  121: +				snprintf(err_buf, sizeof err_buf,
  122: +					"--tr cannot transliterate slashes\n");
  123: +				return 0;
  124: +			}
  125: +		} else
  126: +			tr_opt = NULL;
  127: +	}
  128: +
  129:  	am_starting_up = 0;
  130:  
  131:  	return 1;
  132: @@ -2887,6 +2907,12 @@ void server_options(char **args, int *argc_p)
  133:  	if (relative_paths && !implied_dirs && (!am_sender || protocol_version >= 30))
  134:  		args[ac++] = "--no-implied-dirs";
  135:  
  136: +	if (tr_opt) {
  137: +		if (asprintf(&arg, "--tr=%s", tr_opt) < 0)
  138: +			goto oom;
  139: +		args[ac++] = arg;
  140: +	}
  141: +
  142:  	if (write_devices && am_sender)
  143:  		args[ac++] = "--write-devices";
  144:  
  145: diff --git a/rsync.1.md b/rsync.1.md
  146: --- a/rsync.1.md
  147: +++ b/rsync.1.md
  148: @@ -466,6 +466,7 @@ detailed description below for a complete description.
  149:  --read-batch=FILE        read a batched update from FILE
  150:  --protocol=NUM           force an older protocol version to be used
  151:  --iconv=CONVERT_SPEC     request charset conversion of filenames
  152: +--tr=BAD/GOOD            transliterate filenames
  153:  --checksum-seed=NUM      set block/file checksum seed (advanced)
  154:  --ipv4, -4               prefer IPv4
  155:  --ipv6, -6               prefer IPv6
  156: @@ -3319,6 +3320,25 @@ your home directory (remove the '=' for that).
  157:      free to specify just the local charset for a daemon transfer (e.g.
  158:      `--iconv=utf8`).
  159:  
  160: +0.  `--tr=BAD/GOOD`
  161: +
  162: +    Transliterates filenames on the receiver, after the iconv conversion (if
  163: +    any).  This can be used to remove characters illegal on the destination
  164: +    filesystem.  If you use this option, consider saving a "find . -ls" listing
  165: +    of the source in the destination to help you determine the original
  166: +    filenames in case of need.
  167: +
  168: +    The argument consists of a string of characters to remove, optionally
  169: +    followed by a slash and a string of corresponding characters with which to
  170: +    replace them.  The second string may be shorter, in which case any leftover
  171: +    characters in the first string are simply deleted.  For example,
  172: +    `--tr=':\/!'` replaces colons with exclamation marks and deletes
  173: +    backslashes.  Slashes cannot be transliterated because it would cause
  174: +    havoc.
  175: +
  176: +    If the receiver is invoked over a remote shell, use `--protect-args` to
  177: +    stop the shell from interpreting any nasty characters in the argument.
  178: +
  179:  0.  `--ipv4`, `-4` or `--ipv6`, `-6`
  180:  
  181:      Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh.  This
  182: diff -Nurp a/rsync.1 b/rsync.1
  183: --- a/rsync.1
  184: +++ b/rsync.1
  185: @@ -542,6 +542,7 @@ detailed description below for a complet
  186:  --read-batch=FILE        read a batched update from FILE
  187:  --protocol=NUM           force an older protocol version to be used
  188:  --iconv=CONVERT_SPEC     request charset conversion of filenames
  189: +--tr=BAD/GOOD            transliterate filenames
  190:  --checksum-seed=NUM      set block/file checksum seed (advanced)
  191:  --ipv4, -4               prefer IPv4
  192:  --ipv6, -6               prefer IPv6
  193: @@ -3372,6 +3373,23 @@ daemon uses the charset specified in its
  194:  regardless of the remote charset you actually pass.  Thus, you may feel
  195:  free to specify just the local charset for a daemon transfer (e.g.
  196:  \fB\-\-iconv=utf8\fP).
  197: +.IP "\fB\-\-tr=BAD/GOOD\fP"
  198: +Transliterates filenames on the receiver, after the iconv conversion (if
  199: +any).  This can be used to remove characters illegal on the destination
  200: +filesystem.  If you use this option, consider saving a "find . \-ls" listing
  201: +of the source in the destination to help you determine the original
  202: +filenames in case of need.
  203: +.IP
  204: +The argument consists of a string of characters to remove, optionally
  205: +followed by a slash and a string of corresponding characters with which to
  206: +replace them.  The second string may be shorter, in which case any leftover
  207: +characters in the first string are simply deleted.  For example,
  208: +\fB\-\-tr=':\\/!'\fP replaces colons with exclamation marks and deletes
  209: +backslashes.  Slashes cannot be transliterated because it would cause
  210: +havoc.
  211: +.IP
  212: +If the receiver is invoked over a remote shell, use \fB\-\-protect-args\fP to
  213: +stop the shell from interpreting any nasty characters in the argument.
  214:  .IP "\fB\-\-ipv4\fP, \fB\-4\fP or \fB\-\-ipv6\fP, \fB\-6\fP"
  215:  Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh.  This
  216:  affects sockets that rsync has direct control over, such as the outgoing
  217: diff -Nurp a/rsync.1.html b/rsync.1.html
  218: --- a/rsync.1.html
  219: +++ b/rsync.1.html
  220: @@ -457,6 +457,7 @@ detailed description below for a complet
  221:  --read-batch=FILE        read a batched update from FILE
  222:  --protocol=NUM           force an older protocol version to be used
  223:  --iconv=CONVERT_SPEC     request charset conversion of filenames
  224: +--tr=BAD/GOOD            transliterate filenames
  225:  --checksum-seed=NUM      set block/file checksum seed (advanced)
  226:  --ipv4, -4               prefer IPv4
  227:  --ipv6, -6               prefer IPv6
  228: @@ -3129,6 +3130,23 @@ free to specify just the local charset f
  229:  <code>--iconv=utf8</code>).</p>
  230:  </dd>
  231:  
  232: +<dt><code>--tr=BAD/GOOD</code></dt><dd>
  233: +<p>Transliterates filenames on the receiver, after the iconv conversion (if
  234: +any).  This can be used to remove characters illegal on the destination
  235: +filesystem.  If you use this option, consider saving a &quot;find . -&#8288;ls&quot; listing
  236: +of the source in the destination to help you determine the original
  237: +filenames in case of need.</p>
  238: +<p>The argument consists of a string of characters to remove, optionally
  239: +followed by a slash and a string of corresponding characters with which to
  240: +replace them.  The second string may be shorter, in which case any leftover
  241: +characters in the first string are simply deleted.  For example,
  242: +<code>--tr=':\/!'</code> replaces colons with exclamation marks and deletes
  243: +backslashes.  Slashes cannot be transliterated because it would cause
  244: +havoc.</p>
  245: +<p>If the receiver is invoked over a remote shell, use <code>--protect-args</code> to
  246: +stop the shell from interpreting any nasty characters in the argument.</p>
  247: +</dd>
  248: +
  249:  <dt><code>--ipv4</code>, <code>-4</code> or <code>--ipv6</code>, <code>-6</code></dt><dd>
  250:  <p>Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh.  This
  251:  affects sockets that rsync has direct control over, such as the outgoing

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>