Annotation of embedaddon/rsync/patches/transliterate.diff, revision 1.1.1.1

1.1       misho       1: This patch adds an option --tr=BAD/GOOD to transliterate filenames.  It
                      2: can be used to remove characters illegal on the destination filesystem.
                      3: Jeff Weber expressed interest in this:
                      4: 
                      5: http://lists.samba.org/archive/rsync/2007-October/018996.html
                      6: 
                      7: To use this patch, run these commands for a successful build:
                      8: 
                      9:     patch -p1 <patches/transliterate.diff
                     10:     ./configure                                 (optional if already run)
                     11:     make
                     12: 
                     13: based-on: e94bad1c156fc3910f24e2b3b71a81b0b0bdeb70
                     14: diff --git a/flist.c b/flist.c
                     15: --- a/flist.c
                     16: +++ b/flist.c
                     17: @@ -77,6 +77,7 @@ extern uid_t our_uid;
                     18:  extern struct stats stats;
                     19:  extern char *filesfrom_host;
                     20:  extern char *usermap, *groupmap;
                     21: +extern char *tr_opt;
                     22:  
                     23:  extern char curr_dir[MAXPATHLEN];
                     24:  
                     25: @@ -104,6 +105,8 @@ int file_old_total = 0; /* total of active items that will soon be gone */
                     26:  int flist_eof = 0; /* all the file-lists are now known */
                     27:  int xfer_flags_as_varint = 0;
                     28:  
                     29: +char tr_substitutions[256];
                     30: +
                     31:  #define NORMAL_NAME 0
                     32:  #define SLASH_ENDING_NAME 1
                     33:  #define DOTDIR_NAME 2
                     34: @@ -674,6 +677,23 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file,
                     35:                stats.total_size += F_LENGTH(file);
                     36:  }
                     37:  
                     38: +static void transliterate(char *path, int len)
                     39: +{
                     40: +      while (1) {
                     41: +              /* Find position of any char in tr_opt in path, or the end of the path. */
                     42: +              int span = strcspn(path, tr_opt);
                     43: +              if ((len -= span) == 0)
                     44: +                      return;
                     45: +              path += span;
                     46: +              if ((*path = tr_substitutions[*(uchar*)path]) == '\0')
                     47: +                      memmove(path, path+1, len--); /* copies the trailing '\0' too. */
                     48: +              else {
                     49: +                      path++;
                     50: +                      len--;
                     51: +              }
                     52: +      }
                     53: +}
                     54: +
                     55:  static struct file_struct *recv_file_entry(int f, struct file_list *flist, int xflags)
                     56:  {
                     57:        static int64 modtime, atime;
                     58: @@ -744,9 +764,13 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x
                     59:                        outbuf.len = 0;
                     60:                }
                     61:                thisname[outbuf.len] = '\0';
                     62: +              basename_len = outbuf.len;
                     63:        }
                     64:  #endif
                     65:  
                     66: +      if (tr_opt)
                     67: +              transliterate(thisname, basename_len);
                     68: +
                     69:        if (*thisname
                     70:         && (clean_fname(thisname, CFN_REFUSE_DOT_DOT_DIRS) < 0 || (!relative_paths && *thisname == '/'))) {
                     71:                rprintf(FERROR, "ABORTING due to unsafe pathname from sender: %s\n", thisname);
                     72: @@ -2531,6 +2555,15 @@ struct file_list *recv_file_list(int f, int dir_ndx)
                     73:                        parse_name_map(usermap, True);
                     74:                if (groupmap)
                     75:                        parse_name_map(groupmap, False);
                     76: +              if (tr_opt) { /* Parse FROM/TO string and populate tr_substitutions[] */
                     77: +                      char *f, *t;
                     78: +                      if ((t = strchr(tr_opt, '/')) != NULL)
                     79: +                              *t++ = '\0';
                     80: +                      else
                     81: +                              t = "";
                     82: +                      for (f = tr_opt; *f; f++)
                     83: +                              tr_substitutions[*(uchar*)f] = *t ? *t++ : '\0';
                     84: +              }
                     85:        }
                     86:  
                     87:        start_read = stats.total_read;
                     88: diff --git a/options.c b/options.c
                     89: --- a/options.c
                     90: +++ b/options.c
                     91: @@ -201,6 +201,7 @@ int logfile_format_has_i = 0;
                     92:  int logfile_format_has_o_or_i = 0;
                     93:  int always_checksum = 0;
                     94:  int list_only = 0;
                     95: +char *tr_opt = NULL;
                     96:  
                     97:  #define MAX_BATCH_NAME_LEN 256        /* Must be less than MAXPATHLEN-13 */
                     98:  char *batch_name = NULL;
                     99: @@ -797,6 +798,7 @@ static struct poptOption long_options[] = {
                    100:    {"temp-dir",        'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
                    101:    {"iconv",            0,  POPT_ARG_STRING, &iconv_opt, 0, 0, 0 },
                    102:    {"no-iconv",         0,  POPT_ARG_NONE,   0, OPT_NO_ICONV, 0, 0 },
                    103: +  {"tr",               0,  POPT_ARG_STRING, &tr_opt, 0, 0, 0 },
                    104:    {"ipv4",            '4', POPT_ARG_VAL,    &default_af_hint, AF_INET, 0, 0 },
                    105:    {"ipv6",            '6', POPT_ARG_VAL,    &default_af_hint, AF_INET6, 0, 0 },
                    106:    {"8-bit-output",    '8', POPT_ARG_VAL,    &allow_8bit_chars, 1, 0, 0 },
                    107: @@ -2436,6 +2438,24 @@ int parse_arguments(int *argc_p, const char ***argv_p)
                    108:                }
                    109:        }
                    110:  
                    111: +      if (tr_opt) {
                    112: +              if (*tr_opt == '/' && tr_opt[1]) {
                    113: +                      snprintf(err_buf, sizeof err_buf,
                    114: +                              "Do not start the --tr arg with a slash\n");
                    115: +                      return 0;
                    116: +              }
                    117: +              if (*tr_opt && *tr_opt != '/') {
                    118: +                      need_unsorted_flist = 1;
                    119: +                      arg = strchr(tr_opt, '/');
                    120: +                      if (arg && strchr(arg+1, '/')) {
                    121: +                              snprintf(err_buf, sizeof err_buf,
                    122: +                                      "--tr cannot transliterate slashes\n");
                    123: +                              return 0;
                    124: +                      }
                    125: +              } else
                    126: +                      tr_opt = NULL;
                    127: +      }
                    128: +
                    129:        am_starting_up = 0;
                    130:  
                    131:        return 1;
                    132: @@ -2887,6 +2907,12 @@ void server_options(char **args, int *argc_p)
                    133:        if (relative_paths && !implied_dirs && (!am_sender || protocol_version >= 30))
                    134:                args[ac++] = "--no-implied-dirs";
                    135:  
                    136: +      if (tr_opt) {
                    137: +              if (asprintf(&arg, "--tr=%s", tr_opt) < 0)
                    138: +                      goto oom;
                    139: +              args[ac++] = arg;
                    140: +      }
                    141: +
                    142:        if (write_devices && am_sender)
                    143:                args[ac++] = "--write-devices";
                    144:  
                    145: diff --git a/rsync.1.md b/rsync.1.md
                    146: --- a/rsync.1.md
                    147: +++ b/rsync.1.md
                    148: @@ -466,6 +466,7 @@ detailed description below for a complete description.
                    149:  --read-batch=FILE        read a batched update from FILE
                    150:  --protocol=NUM           force an older protocol version to be used
                    151:  --iconv=CONVERT_SPEC     request charset conversion of filenames
                    152: +--tr=BAD/GOOD            transliterate filenames
                    153:  --checksum-seed=NUM      set block/file checksum seed (advanced)
                    154:  --ipv4, -4               prefer IPv4
                    155:  --ipv6, -6               prefer IPv6
                    156: @@ -3319,6 +3320,25 @@ your home directory (remove the '=' for that).
                    157:      free to specify just the local charset for a daemon transfer (e.g.
                    158:      `--iconv=utf8`).
                    159:  
                    160: +0.  `--tr=BAD/GOOD`
                    161: +
                    162: +    Transliterates filenames on the receiver, after the iconv conversion (if
                    163: +    any).  This can be used to remove characters illegal on the destination
                    164: +    filesystem.  If you use this option, consider saving a "find . -ls" listing
                    165: +    of the source in the destination to help you determine the original
                    166: +    filenames in case of need.
                    167: +
                    168: +    The argument consists of a string of characters to remove, optionally
                    169: +    followed by a slash and a string of corresponding characters with which to
                    170: +    replace them.  The second string may be shorter, in which case any leftover
                    171: +    characters in the first string are simply deleted.  For example,
                    172: +    `--tr=':\/!'` replaces colons with exclamation marks and deletes
                    173: +    backslashes.  Slashes cannot be transliterated because it would cause
                    174: +    havoc.
                    175: +
                    176: +    If the receiver is invoked over a remote shell, use `--protect-args` to
                    177: +    stop the shell from interpreting any nasty characters in the argument.
                    178: +
                    179:  0.  `--ipv4`, `-4` or `--ipv6`, `-6`
                    180:  
                    181:      Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh.  This
                    182: diff -Nurp a/rsync.1 b/rsync.1
                    183: --- a/rsync.1
                    184: +++ b/rsync.1
                    185: @@ -542,6 +542,7 @@ detailed description below for a complet
                    186:  --read-batch=FILE        read a batched update from FILE
                    187:  --protocol=NUM           force an older protocol version to be used
                    188:  --iconv=CONVERT_SPEC     request charset conversion of filenames
                    189: +--tr=BAD/GOOD            transliterate filenames
                    190:  --checksum-seed=NUM      set block/file checksum seed (advanced)
                    191:  --ipv4, -4               prefer IPv4
                    192:  --ipv6, -6               prefer IPv6
                    193: @@ -3372,6 +3373,23 @@ daemon uses the charset specified in its
                    194:  regardless of the remote charset you actually pass.  Thus, you may feel
                    195:  free to specify just the local charset for a daemon transfer (e.g.
                    196:  \fB\-\-iconv=utf8\fP).
                    197: +.IP "\fB\-\-tr=BAD/GOOD\fP"
                    198: +Transliterates filenames on the receiver, after the iconv conversion (if
                    199: +any).  This can be used to remove characters illegal on the destination
                    200: +filesystem.  If you use this option, consider saving a "find . \-ls" listing
                    201: +of the source in the destination to help you determine the original
                    202: +filenames in case of need.
                    203: +.IP
                    204: +The argument consists of a string of characters to remove, optionally
                    205: +followed by a slash and a string of corresponding characters with which to
                    206: +replace them.  The second string may be shorter, in which case any leftover
                    207: +characters in the first string are simply deleted.  For example,
                    208: +\fB\-\-tr=':\\/!'\fP replaces colons with exclamation marks and deletes
                    209: +backslashes.  Slashes cannot be transliterated because it would cause
                    210: +havoc.
                    211: +.IP
                    212: +If the receiver is invoked over a remote shell, use \fB\-\-protect-args\fP to
                    213: +stop the shell from interpreting any nasty characters in the argument.
                    214:  .IP "\fB\-\-ipv4\fP, \fB\-4\fP or \fB\-\-ipv6\fP, \fB\-6\fP"
                    215:  Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh.  This
                    216:  affects sockets that rsync has direct control over, such as the outgoing
                    217: diff -Nurp a/rsync.1.html b/rsync.1.html
                    218: --- a/rsync.1.html
                    219: +++ b/rsync.1.html
                    220: @@ -457,6 +457,7 @@ detailed description below for a complet
                    221:  --read-batch=FILE        read a batched update from FILE
                    222:  --protocol=NUM           force an older protocol version to be used
                    223:  --iconv=CONVERT_SPEC     request charset conversion of filenames
                    224: +--tr=BAD/GOOD            transliterate filenames
                    225:  --checksum-seed=NUM      set block/file checksum seed (advanced)
                    226:  --ipv4, -4               prefer IPv4
                    227:  --ipv6, -6               prefer IPv6
                    228: @@ -3129,6 +3130,23 @@ free to specify just the local charset f
                    229:  <code>--iconv=utf8</code>).</p>
                    230:  </dd>
                    231:  
                    232: +<dt><code>--tr=BAD/GOOD</code></dt><dd>
                    233: +<p>Transliterates filenames on the receiver, after the iconv conversion (if
                    234: +any).  This can be used to remove characters illegal on the destination
                    235: +filesystem.  If you use this option, consider saving a &quot;find . -&#8288;ls&quot; listing
                    236: +of the source in the destination to help you determine the original
                    237: +filenames in case of need.</p>
                    238: +<p>The argument consists of a string of characters to remove, optionally
                    239: +followed by a slash and a string of corresponding characters with which to
                    240: +replace them.  The second string may be shorter, in which case any leftover
                    241: +characters in the first string are simply deleted.  For example,
                    242: +<code>--tr=':\/!'</code> replaces colons with exclamation marks and deletes
                    243: +backslashes.  Slashes cannot be transliterated because it would cause
                    244: +havoc.</p>
                    245: +<p>If the receiver is invoked over a remote shell, use <code>--protect-args</code> to
                    246: +stop the shell from interpreting any nasty characters in the argument.</p>
                    247: +</dd>
                    248: +
                    249:  <dt><code>--ipv4</code>, <code>-4</code> or <code>--ipv6</code>, <code>-6</code></dt><dd>
                    250:  <p>Tells rsync to prefer IPv4/IPv6 when creating sockets or running ssh.  This
                    251:  affects sockets that rsync has direct control over, such as the outgoing

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>