Annotation of embedaddon/rsync/util.c, revision 1.1.1.2
1.1 misho 1: /*
2: * Utility routines used in rsync.
3: *
4: * Copyright (C) 1996-2000 Andrew Tridgell
5: * Copyright (C) 1996 Paul Mackerras
6: * Copyright (C) 2001, 2002 Martin Pool <mbp@samba.org>
1.1.1.2 ! misho 7: * Copyright (C) 2003-2013 Wayne Davison
1.1 misho 8: *
9: * This program is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU General Public License as published by
11: * the Free Software Foundation; either version 3 of the License, or
12: * (at your option) any later version.
13: *
14: * This program is distributed in the hope that it will be useful,
15: * but WITHOUT ANY WARRANTY; without even the implied warranty of
16: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17: * GNU General Public License for more details.
18: *
19: * You should have received a copy of the GNU General Public License along
20: * with this program; if not, visit the http://fsf.org website.
21: */
22:
23: #include "rsync.h"
24: #include "ifuncs.h"
1.1.1.2 ! misho 25: #include "itypes.h"
! 26: #include "inums.h"
1.1 misho 27:
28: extern int module_id;
1.1.1.2 ! misho 29: extern int protect_args;
1.1 misho 30: extern int modify_window;
31: extern int relative_paths;
32: extern int preserve_times;
33: extern int preserve_xattrs;
1.1.1.2 ! misho 34: extern int preallocate_files;
1.1 misho 35: extern char *module_dir;
36: extern unsigned int module_dirlen;
37: extern char *partial_dir;
1.1.1.2 ! misho 38: extern filter_rule_list daemon_filter_list;
1.1 misho 39:
40: int sanitize_paths = 0;
41:
42: char curr_dir[MAXPATHLEN];
43: unsigned int curr_dir_len;
44: int curr_dir_depth; /* This is only set for a sanitizing daemon. */
45:
46: /* Set a fd into nonblocking mode. */
47: void set_nonblocking(int fd)
48: {
49: int val;
50:
51: if ((val = fcntl(fd, F_GETFL)) == -1)
52: return;
53: if (!(val & NONBLOCK_FLAG)) {
54: val |= NONBLOCK_FLAG;
55: fcntl(fd, F_SETFL, val);
56: }
57: }
58:
59: /* Set a fd into blocking mode. */
60: void set_blocking(int fd)
61: {
62: int val;
63:
64: if ((val = fcntl(fd, F_GETFL)) == -1)
65: return;
66: if (val & NONBLOCK_FLAG) {
67: val &= ~NONBLOCK_FLAG;
68: fcntl(fd, F_SETFL, val);
69: }
70: }
71:
72: /**
73: * Create a file descriptor pair - like pipe() but use socketpair if
74: * possible (because of blocking issues on pipes).
75: *
76: * Always set non-blocking.
77: */
78: int fd_pair(int fd[2])
79: {
80: int ret;
81:
82: #ifdef HAVE_SOCKETPAIR
83: ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
84: #else
85: ret = pipe(fd);
86: #endif
87:
88: if (ret == 0) {
89: set_nonblocking(fd[0]);
90: set_nonblocking(fd[1]);
91: }
92:
93: return ret;
94: }
95:
96: void print_child_argv(const char *prefix, char **cmd)
97: {
1.1.1.2 ! misho 98: int cnt = 0;
1.1 misho 99: rprintf(FCLIENT, "%s ", prefix);
100: for (; *cmd; cmd++) {
101: /* Look for characters that ought to be quoted. This
102: * is not a great quoting algorithm, but it's
103: * sufficient for a log message. */
104: if (strspn(*cmd, "abcdefghijklmnopqrstuvwxyz"
105: "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
106: "0123456789"
107: ",.-_=+@/") != strlen(*cmd)) {
108: rprintf(FCLIENT, "\"%s\" ", *cmd);
109: } else {
110: rprintf(FCLIENT, "%s ", *cmd);
111: }
1.1.1.2 ! misho 112: cnt++;
1.1 misho 113: }
1.1.1.2 ! misho 114: rprintf(FCLIENT, " (%d args)\n", cnt);
1.1 misho 115: }
116:
117: /* This returns 0 for success, 1 for a symlink if symlink time-setting
118: * is not possible, or -1 for any other error. */
1.1.1.2 ! misho 119: int set_modtime(const char *fname, time_t modtime, uint32 mod_nsec, mode_t mode)
1.1 misho 120: {
121: static int switch_step = 0;
122:
1.1.1.2 ! misho 123: if (DEBUG_GTE(TIME, 1)) {
1.1 misho 124: rprintf(FINFO, "set modtime of %s to (%ld) %s",
125: fname, (long)modtime,
126: asctime(localtime(&modtime)));
127: }
128:
129: switch (switch_step) {
130: #ifdef HAVE_UTIMENSAT
131: #include "case_N.h"
1.1.1.2 ! misho 132: if (do_utimensat(fname, modtime, mod_nsec) == 0)
1.1 misho 133: break;
134: if (errno != ENOSYS)
135: return -1;
136: switch_step++;
137: /* FALLTHROUGH */
138: #endif
139:
140: #ifdef HAVE_LUTIMES
141: #include "case_N.h"
1.1.1.2 ! misho 142: if (do_lutimes(fname, modtime, mod_nsec) == 0)
1.1 misho 143: break;
144: if (errno != ENOSYS)
145: return -1;
146: switch_step++;
147: /* FALLTHROUGH */
148: #endif
149:
150: #include "case_N.h"
151: switch_step++;
152: if (preserve_times & PRESERVE_LINK_TIMES) {
153: preserve_times &= ~PRESERVE_LINK_TIMES;
154: if (S_ISLNK(mode))
155: return 1;
156: }
157: /* FALLTHROUGH */
158:
159: #include "case_N.h"
160: #ifdef HAVE_UTIMES
1.1.1.2 ! misho 161: if (do_utimes(fname, modtime, mod_nsec) == 0)
1.1 misho 162: break;
163: #else
1.1.1.2 ! misho 164: if (do_utime(fname, modtime, mod_nsec) == 0)
1.1 misho 165: break;
166: #endif
167:
168: return -1;
169: }
170:
171: return 0;
172: }
173:
174: /* Create any necessary directories in fname. Any missing directories are
1.1.1.2 ! misho 175: * created with default permissions. Returns < 0 on error, or the number
! 176: * of directories created. */
! 177: int make_path(char *fname, int flags)
1.1 misho 178: {
1.1.1.2 ! misho 179: char *end, *p;
1.1 misho 180: int ret = 0;
181:
1.1.1.2 ! misho 182: if (flags & MKP_SKIP_SLASH) {
! 183: while (*fname == '/')
! 184: fname++;
! 185: }
! 186:
! 187: while (*fname == '.' && fname[1] == '/')
1.1 misho 188: fname += 2;
189:
1.1.1.2 ! misho 190: if (flags & MKP_DROP_NAME) {
! 191: end = strrchr(fname, '/');
! 192: if (!end)
! 193: return 0;
! 194: *end = '\0';
! 195: } else
! 196: end = fname + strlen(fname);
! 197:
! 198: /* Try to find an existing dir, starting from the deepest dir. */
! 199: for (p = end; ; ) {
! 200: if (do_mkdir(fname, ACCESSPERMS) == 0) {
! 201: ret++;
! 202: break;
! 203: }
! 204: if (errno != ENOENT) {
! 205: if (errno != EEXIST)
! 206: ret = -ret - 1;
! 207: break;
! 208: }
! 209: while (1) {
! 210: if (p == fname) {
! 211: ret = -ret - 1;
! 212: goto double_break;
! 213: }
! 214: if (*--p == '/') {
! 215: if (p == fname) {
! 216: ret = -ret - 1; /* impossible... */
! 217: goto double_break;
! 218: }
! 219: *p = '\0';
! 220: break;
! 221: }
! 222: }
1.1 misho 223: }
1.1.1.2 ! misho 224: double_break:
! 225:
! 226: /* Make all the dirs that we didn't find on the way here. */
! 227: while (p != end) {
! 228: *p = '/';
! 229: p += strlen(p);
! 230: if (ret < 0) /* Skip mkdir on error, but keep restoring the path. */
! 231: continue;
! 232: if (do_mkdir(fname, ACCESSPERMS) < 0)
! 233: ret = -ret - 1;
! 234: else
! 235: ret++;
! 236: }
! 237:
! 238: if (flags & MKP_DROP_NAME)
! 239: *end = '/';
1.1 misho 240:
241: return ret;
242: }
243:
244: /**
245: * Write @p len bytes at @p ptr to descriptor @p desc, retrying if
246: * interrupted.
247: *
248: * @retval len upon success
249: *
250: * @retval <0 write's (negative) error code
251: *
252: * Derived from GNU C's cccp.c.
253: */
254: int full_write(int desc, const char *ptr, size_t len)
255: {
256: int total_written;
257:
258: total_written = 0;
259: while (len > 0) {
260: int written = write(desc, ptr, len);
261: if (written < 0) {
262: if (errno == EINTR)
263: continue;
264: return written;
265: }
266: total_written += written;
267: ptr += written;
268: len -= written;
269: }
270: return total_written;
271: }
272:
273: /**
274: * Read @p len bytes at @p ptr from descriptor @p desc, retrying if
275: * interrupted.
276: *
277: * @retval >0 the actual number of bytes read
278: *
279: * @retval 0 for EOF
280: *
281: * @retval <0 for an error.
282: *
283: * Derived from GNU C's cccp.c. */
284: static int safe_read(int desc, char *ptr, size_t len)
285: {
286: int n_chars;
287:
288: if (len == 0)
289: return len;
290:
291: do {
292: n_chars = read(desc, ptr, len);
293: } while (n_chars < 0 && errno == EINTR);
294:
295: return n_chars;
296: }
297:
298: /* Copy a file. If ofd < 0, copy_file unlinks and opens the "dest" file.
299: * Otherwise, it just writes to and closes the provided file descriptor.
300: * In either case, if --xattrs are being preserved, the dest file will
301: * have its xattrs set from the source file.
302: *
303: * This is used in conjunction with the --temp-dir, --backup, and
304: * --copy-dest options. */
1.1.1.2 ! misho 305: int copy_file(const char *source, const char *dest, int ofd, mode_t mode)
1.1 misho 306: {
307: int ifd;
308: char buf[1024 * 8];
309: int len; /* Number of bytes read into `buf'. */
1.1.1.2 ! misho 310: #ifdef PREALLOCATE_NEEDS_TRUNCATE
! 311: OFF_T preallocated_len = 0, offset = 0;
! 312: #endif
1.1 misho 313:
314: if ((ifd = do_open(source, O_RDONLY, 0)) < 0) {
315: int save_errno = errno;
316: rsyserr(FERROR_XFER, errno, "open %s", full_fname(source));
317: errno = save_errno;
318: return -1;
319: }
320:
321: if (ofd < 0) {
322: if (robust_unlink(dest) && errno != ENOENT) {
323: int save_errno = errno;
324: rsyserr(FERROR_XFER, errno, "unlink %s", full_fname(dest));
325: errno = save_errno;
326: return -1;
327: }
328:
1.1.1.2 ! misho 329: #ifdef SUPPORT_XATTRS
! 330: if (preserve_xattrs)
! 331: mode |= S_IWUSR;
! 332: #endif
! 333: mode &= INITACCESSPERMS;
1.1 misho 334: if ((ofd = do_open(dest, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, mode)) < 0) {
1.1.1.2 ! misho 335: int save_errno = errno;
! 336: rsyserr(FERROR_XFER, save_errno, "open %s", full_fname(dest));
! 337: close(ifd);
! 338: errno = save_errno;
! 339: return -1;
! 340: }
! 341: }
! 342:
! 343: #ifdef SUPPORT_PREALLOCATION
! 344: if (preallocate_files) {
! 345: STRUCT_STAT srcst;
! 346:
! 347: /* Try to preallocate enough space for file's eventual length. Can
! 348: * reduce fragmentation on filesystems like ext4, xfs, and NTFS. */
! 349: if (do_fstat(ifd, &srcst) < 0)
! 350: rsyserr(FWARNING, errno, "fstat %s", full_fname(source));
! 351: else if (srcst.st_size > 0) {
! 352: if (do_fallocate(ofd, 0, srcst.st_size) == 0) {
! 353: #ifdef PREALLOCATE_NEEDS_TRUNCATE
! 354: preallocated_len = srcst.st_size;
! 355: #endif
! 356: } else
! 357: rsyserr(FWARNING, errno, "do_fallocate %s", full_fname(dest));
1.1 misho 358: }
359: }
1.1.1.2 ! misho 360: #endif
1.1 misho 361:
362: while ((len = safe_read(ifd, buf, sizeof buf)) > 0) {
363: if (full_write(ofd, buf, len) < 0) {
364: int save_errno = errno;
365: rsyserr(FERROR_XFER, errno, "write %s", full_fname(dest));
366: close(ifd);
367: close(ofd);
368: errno = save_errno;
369: return -1;
370: }
1.1.1.2 ! misho 371: #ifdef PREALLOCATE_NEEDS_TRUNCATE
! 372: offset += len;
! 373: #endif
1.1 misho 374: }
375:
376: if (len < 0) {
377: int save_errno = errno;
378: rsyserr(FERROR_XFER, errno, "read %s", full_fname(source));
379: close(ifd);
380: close(ofd);
381: errno = save_errno;
382: return -1;
383: }
384:
385: if (close(ifd) < 0) {
386: rsyserr(FWARNING, errno, "close failed on %s",
387: full_fname(source));
388: }
389:
1.1.1.2 ! misho 390: #ifdef PREALLOCATE_NEEDS_TRUNCATE
! 391: /* Source file might have shrunk since we fstatted it.
! 392: * Cut off any extra preallocated zeros from dest file. */
! 393: if (offset < preallocated_len && do_ftruncate(ofd, offset) < 0) {
! 394: /* If we fail to truncate, the dest file may be wrong, so we
! 395: * must trigger the "partial transfer" error. */
! 396: rsyserr(FERROR_XFER, errno, "ftruncate %s", full_fname(dest));
! 397: }
! 398: #endif
! 399:
1.1 misho 400: if (close(ofd) < 0) {
401: int save_errno = errno;
402: rsyserr(FERROR_XFER, errno, "close failed on %s",
403: full_fname(dest));
404: errno = save_errno;
405: return -1;
406: }
407:
408: #ifdef SUPPORT_XATTRS
409: if (preserve_xattrs)
410: copy_xattrs(source, dest);
411: #endif
412:
413: return 0;
414: }
415:
416: /* MAX_RENAMES should be 10**MAX_RENAMES_DIGITS */
417: #define MAX_RENAMES_DIGITS 3
418: #define MAX_RENAMES 1000
419:
420: /**
421: * Robust unlink: some OS'es (HPUX) refuse to unlink busy files, so
422: * rename to <path>/.rsyncNNN instead.
423: *
424: * Note that successive rsync runs will shuffle the filenames around a
425: * bit as long as the file is still busy; this is because this function
426: * does not know if the unlink call is due to a new file coming in, or
427: * --delete trying to remove old .rsyncNNN files, hence it renames it
428: * each time.
429: **/
430: int robust_unlink(const char *fname)
431: {
432: #ifndef ETXTBSY
433: return do_unlink(fname);
434: #else
435: static int counter = 1;
436: int rc, pos, start;
437: char path[MAXPATHLEN];
438:
439: rc = do_unlink(fname);
440: if (rc == 0 || errno != ETXTBSY)
441: return rc;
442:
443: if ((pos = strlcpy(path, fname, MAXPATHLEN)) >= MAXPATHLEN)
444: pos = MAXPATHLEN - 1;
445:
446: while (pos > 0 && path[pos-1] != '/')
447: pos--;
448: pos += strlcpy(path+pos, ".rsync", MAXPATHLEN-pos);
449:
450: if (pos > (MAXPATHLEN-MAX_RENAMES_DIGITS-1)) {
451: errno = ETXTBSY;
452: return -1;
453: }
454:
455: /* start where the last one left off to reduce chance of clashes */
456: start = counter;
457: do {
458: snprintf(&path[pos], MAX_RENAMES_DIGITS+1, "%03d", counter);
459: if (++counter >= MAX_RENAMES)
460: counter = 1;
461: } while ((rc = access(path, 0)) == 0 && counter != start);
462:
1.1.1.2 ! misho 463: if (INFO_GTE(MISC, 1)) {
1.1 misho 464: rprintf(FWARNING, "renaming %s to %s because of text busy\n",
465: fname, path);
466: }
467:
468: /* maybe we should return rename()'s exit status? Nah. */
469: if (do_rename(fname, path) != 0) {
470: errno = ETXTBSY;
471: return -1;
472: }
473: return 0;
474: #endif
475: }
476:
477: /* Returns 0 on successful rename, 1 if we successfully copied the file
478: * across filesystems, -2 if copy_file() failed, and -1 on other errors.
479: * If partialptr is not NULL and we need to do a copy, copy the file into
480: * the active partial-dir instead of over the destination file. */
481: int robust_rename(const char *from, const char *to, const char *partialptr,
482: int mode)
483: {
484: int tries = 4;
485:
486: while (tries--) {
487: if (do_rename(from, to) == 0)
488: return 0;
489:
490: switch (errno) {
491: #ifdef ETXTBSY
492: case ETXTBSY:
493: if (robust_unlink(to) != 0) {
494: errno = ETXTBSY;
495: return -1;
496: }
497: errno = ETXTBSY;
498: break;
499: #endif
500: case EXDEV:
501: if (partialptr) {
502: if (!handle_partial_dir(partialptr,PDIR_CREATE))
503: return -2;
504: to = partialptr;
505: }
1.1.1.2 ! misho 506: if (copy_file(from, to, -1, mode) != 0)
1.1 misho 507: return -2;
508: do_unlink(from);
509: return 1;
510: default:
511: return -1;
512: }
513: }
514: return -1;
515: }
516:
517: static pid_t all_pids[10];
518: static int num_pids;
519:
520: /** Fork and record the pid of the child. **/
521: pid_t do_fork(void)
522: {
523: pid_t newpid = fork();
524:
525: if (newpid != 0 && newpid != -1) {
526: all_pids[num_pids++] = newpid;
527: }
528: return newpid;
529: }
530:
531: /**
532: * Kill all children.
533: *
534: * @todo It would be kind of nice to make sure that they are actually
535: * all our children before we kill them, because their pids may have
536: * been recycled by some other process. Perhaps when we wait for a
537: * child, we should remove it from this array. Alternatively we could
538: * perhaps use process groups, but I think that would not work on
539: * ancient Unix versions that don't support them.
540: **/
541: void kill_all(int sig)
542: {
543: int i;
544:
545: for (i = 0; i < num_pids; i++) {
546: /* Let's just be a little careful where we
547: * point that gun, hey? See kill(2) for the
548: * magic caused by negative values. */
549: pid_t p = all_pids[i];
550:
551: if (p == getpid())
552: continue;
553: if (p <= 0)
554: continue;
555:
556: kill(p, sig);
557: }
558: }
559:
560: /** Lock a byte range in a open file */
561: int lock_range(int fd, int offset, int len)
562: {
563: struct flock lock;
564:
565: lock.l_type = F_WRLCK;
566: lock.l_whence = SEEK_SET;
567: lock.l_start = offset;
568: lock.l_len = len;
569: lock.l_pid = 0;
570:
571: return fcntl(fd,F_SETLK,&lock) == 0;
572: }
573:
574: #define ENSURE_MEMSPACE(buf, type, sz, req) \
575: if ((req) > sz && !(buf = realloc_array(buf, type, sz = MAX(sz * 2, req)))) \
576: out_of_memory("glob_expand")
577:
578: static inline void call_glob_match(const char *name, int len, int from_glob,
579: char *arg, int abpos, int fbpos);
580:
581: static struct glob_data {
582: char *arg_buf, *filt_buf, **argv;
583: int absize, fbsize, maxargs, argc;
584: } glob;
585:
586: static void glob_match(char *arg, int abpos, int fbpos)
587: {
588: int len;
589: char *slash;
590:
591: while (*arg == '.' && arg[1] == '/') {
592: if (fbpos < 0) {
593: ENSURE_MEMSPACE(glob.filt_buf, char, glob.fbsize, glob.absize);
594: memcpy(glob.filt_buf, glob.arg_buf, abpos + 1);
595: fbpos = abpos;
596: }
597: ENSURE_MEMSPACE(glob.arg_buf, char, glob.absize, abpos + 3);
598: glob.arg_buf[abpos++] = *arg++;
599: glob.arg_buf[abpos++] = *arg++;
600: glob.arg_buf[abpos] = '\0';
601: }
602: if ((slash = strchr(arg, '/')) != NULL) {
603: *slash = '\0';
604: len = slash - arg;
605: } else
606: len = strlen(arg);
607: if (strpbrk(arg, "*?[")) {
608: struct dirent *di;
609: DIR *d;
610:
611: if (!(d = opendir(abpos ? glob.arg_buf : ".")))
612: return;
613: while ((di = readdir(d)) != NULL) {
614: char *dname = d_name(di);
615: if (dname[0] == '.' && (dname[1] == '\0'
616: || (dname[1] == '.' && dname[2] == '\0')))
617: continue;
618: if (!wildmatch(arg, dname))
619: continue;
620: call_glob_match(dname, strlen(dname), 1,
621: slash ? arg + len + 1 : NULL,
622: abpos, fbpos);
623: }
624: closedir(d);
625: } else {
626: call_glob_match(arg, len, 0,
627: slash ? arg + len + 1 : NULL,
628: abpos, fbpos);
629: }
630: if (slash)
631: *slash = '/';
632: }
633:
634: static inline void call_glob_match(const char *name, int len, int from_glob,
635: char *arg, int abpos, int fbpos)
636: {
637: char *use_buf;
638:
639: ENSURE_MEMSPACE(glob.arg_buf, char, glob.absize, abpos + len + 2);
640: memcpy(glob.arg_buf + abpos, name, len);
641: abpos += len;
642: glob.arg_buf[abpos] = '\0';
643:
644: if (fbpos >= 0) {
645: ENSURE_MEMSPACE(glob.filt_buf, char, glob.fbsize, fbpos + len + 2);
646: memcpy(glob.filt_buf + fbpos, name, len);
647: fbpos += len;
648: glob.filt_buf[fbpos] = '\0';
649: use_buf = glob.filt_buf;
650: } else
651: use_buf = glob.arg_buf;
652:
653: if (from_glob || (arg && len)) {
654: STRUCT_STAT st;
655: int is_dir;
656:
657: if (do_stat(glob.arg_buf, &st) != 0)
658: return;
659: is_dir = S_ISDIR(st.st_mode) != 0;
660: if (arg && !is_dir)
661: return;
662:
663: if (daemon_filter_list.head
664: && check_filter(&daemon_filter_list, FLOG, use_buf, is_dir) < 0)
665: return;
666: }
667:
668: if (arg) {
669: glob.arg_buf[abpos++] = '/';
670: glob.arg_buf[abpos] = '\0';
671: if (fbpos >= 0) {
672: glob.filt_buf[fbpos++] = '/';
673: glob.filt_buf[fbpos] = '\0';
674: }
675: glob_match(arg, abpos, fbpos);
676: } else {
677: ENSURE_MEMSPACE(glob.argv, char *, glob.maxargs, glob.argc + 1);
678: if (!(glob.argv[glob.argc++] = strdup(glob.arg_buf)))
679: out_of_memory("glob_match");
680: }
681: }
682:
683: /* This routine performs wild-card expansion of the pathname in "arg". Any
684: * daemon-excluded files/dirs will not be matched by the wildcards. Returns 0
685: * if a wild-card string is the only returned item (due to matching nothing). */
686: int glob_expand(const char *arg, char ***argv_p, int *argc_p, int *maxargs_p)
687: {
688: int ret, save_argc;
689: char *s;
690:
691: if (!arg) {
692: if (glob.filt_buf)
693: free(glob.filt_buf);
694: free(glob.arg_buf);
695: memset(&glob, 0, sizeof glob);
696: return -1;
697: }
698:
699: if (sanitize_paths)
700: s = sanitize_path(NULL, arg, "", 0, SP_KEEP_DOT_DIRS);
701: else {
702: s = strdup(arg);
703: if (!s)
704: out_of_memory("glob_expand");
705: clean_fname(s, CFN_KEEP_DOT_DIRS
706: | CFN_KEEP_TRAILING_SLASH
707: | CFN_COLLAPSE_DOT_DOT_DIRS);
708: }
709:
710: ENSURE_MEMSPACE(glob.arg_buf, char, glob.absize, MAXPATHLEN);
711: *glob.arg_buf = '\0';
712:
713: glob.argc = save_argc = *argc_p;
714: glob.argv = *argv_p;
715: glob.maxargs = *maxargs_p;
716:
717: ENSURE_MEMSPACE(glob.argv, char *, glob.maxargs, 100);
718:
719: glob_match(s, 0, -1);
720:
721: /* The arg didn't match anything, so add the failed arg to the list. */
722: if (glob.argc == save_argc) {
723: ENSURE_MEMSPACE(glob.argv, char *, glob.maxargs, glob.argc + 1);
724: glob.argv[glob.argc++] = s;
725: ret = 0;
726: } else {
727: free(s);
728: ret = 1;
729: }
730:
731: *maxargs_p = glob.maxargs;
732: *argv_p = glob.argv;
733: *argc_p = glob.argc;
734:
735: return ret;
736: }
737:
738: /* This routine is only used in daemon mode. */
739: void glob_expand_module(char *base1, char *arg, char ***argv_p, int *argc_p, int *maxargs_p)
740: {
741: char *p, *s;
742: char *base = base1;
743: int base_len = strlen(base);
744:
745: if (!arg || !*arg)
746: return;
747:
748: if (strncmp(arg, base, base_len) == 0)
749: arg += base_len;
750:
1.1.1.2 ! misho 751: if (protect_args) {
! 752: glob_expand(arg, argv_p, argc_p, maxargs_p);
! 753: return;
! 754: }
! 755:
1.1 misho 756: if (!(arg = strdup(arg)))
757: out_of_memory("glob_expand_module");
758:
1.1.1.2 ! misho 759: if (asprintf(&base," %s/", base1) < 0)
1.1 misho 760: out_of_memory("glob_expand_module");
761: base_len++;
762:
763: for (s = arg; *s; s = p + base_len) {
764: if ((p = strstr(s, base)) != NULL)
765: *p = '\0'; /* split it at this point */
766: glob_expand(s, argv_p, argc_p, maxargs_p);
767: if (!p)
768: break;
769: }
770:
771: free(arg);
772: free(base);
773: }
774:
775: /**
776: * Convert a string to lower case
777: **/
778: void strlower(char *s)
779: {
780: while (*s) {
781: if (isUpper(s))
782: *s = toLower(s);
783: s++;
784: }
785: }
786:
787: /* Join strings p1 & p2 into "dest" with a guaranteed '/' between them. (If
788: * p1 ends with a '/', no extra '/' is inserted.) Returns the length of both
789: * strings + 1 (if '/' was inserted), regardless of whether the null-terminated
790: * string fits into destsize. */
791: size_t pathjoin(char *dest, size_t destsize, const char *p1, const char *p2)
792: {
793: size_t len = strlcpy(dest, p1, destsize);
794: if (len < destsize - 1) {
795: if (!len || dest[len-1] != '/')
796: dest[len++] = '/';
797: if (len < destsize - 1)
798: len += strlcpy(dest + len, p2, destsize - len);
799: else {
800: dest[len] = '\0';
801: len += strlen(p2);
802: }
803: }
804: else
805: len += strlen(p2) + 1; /* Assume we'd insert a '/'. */
806: return len;
807: }
808:
809: /* Join any number of strings together, putting them in "dest". The return
810: * value is the length of all the strings, regardless of whether the null-
811: * terminated whole fits in destsize. Your list of string pointers must end
812: * with a NULL to indicate the end of the list. */
813: size_t stringjoin(char *dest, size_t destsize, ...)
814: {
815: va_list ap;
816: size_t len, ret = 0;
817: const char *src;
818:
819: va_start(ap, destsize);
820: while (1) {
821: if (!(src = va_arg(ap, const char *)))
822: break;
823: len = strlen(src);
824: ret += len;
825: if (destsize > 1) {
826: if (len >= destsize)
827: len = destsize - 1;
828: memcpy(dest, src, len);
829: destsize -= len;
830: dest += len;
831: }
832: }
833: *dest = '\0';
834: va_end(ap);
835:
836: return ret;
837: }
838:
839: int count_dir_elements(const char *p)
840: {
841: int cnt = 0, new_component = 1;
842: while (*p) {
843: if (*p++ == '/')
844: new_component = (*p != '.' || (p[1] != '/' && p[1] != '\0'));
845: else if (new_component) {
846: new_component = 0;
847: cnt++;
848: }
849: }
850: return cnt;
851: }
852:
853: /* Turns multiple adjacent slashes into a single slash (possible exception:
854: * the preserving of two leading slashes at the start), drops all leading or
855: * interior "." elements unless CFN_KEEP_DOT_DIRS is flagged. Will also drop
856: * a trailing '.' after a '/' if CFN_DROP_TRAILING_DOT_DIR is flagged, removes
857: * a trailing slash (perhaps after removing the aforementioned dot) unless
858: * CFN_KEEP_TRAILING_SLASH is flagged, and will also collapse ".." elements
859: * (except at the start) if CFN_COLLAPSE_DOT_DOT_DIRS is flagged. If the
860: * resulting name would be empty, returns ".". */
861: unsigned int clean_fname(char *name, int flags)
862: {
863: char *limit = name - 1, *t = name, *f = name;
864: int anchored;
865:
866: if (!name)
867: return 0;
868:
869: if ((anchored = *f == '/') != 0) {
870: *t++ = *f++;
871: #ifdef __CYGWIN__
872: /* If there are exactly 2 slashes at the start, preserve
873: * them. Would break daemon excludes unless the paths are
874: * really treated differently, so used this sparingly. */
875: if (*f == '/' && f[1] != '/')
876: *t++ = *f++;
877: #endif
878: } else if (flags & CFN_KEEP_DOT_DIRS && *f == '.' && f[1] == '/') {
879: *t++ = *f++;
880: *t++ = *f++;
881: }
882: while (*f) {
883: /* discard extra slashes */
884: if (*f == '/') {
885: f++;
886: continue;
887: }
888: if (*f == '.') {
889: /* discard interior "." dirs */
890: if (f[1] == '/' && !(flags & CFN_KEEP_DOT_DIRS)) {
891: f += 2;
892: continue;
893: }
894: if (f[1] == '\0' && flags & CFN_DROP_TRAILING_DOT_DIR)
895: break;
896: /* collapse ".." dirs */
897: if (flags & CFN_COLLAPSE_DOT_DOT_DIRS
898: && f[1] == '.' && (f[2] == '/' || !f[2])) {
899: char *s = t - 1;
900: if (s == name && anchored) {
901: f += 2;
902: continue;
903: }
904: while (s > limit && *--s != '/') {}
905: if (s != t - 1 && (s < name || *s == '/')) {
906: t = s + 1;
907: f += 2;
908: continue;
909: }
910: limit = t + 2;
911: }
912: }
913: while (*f && (*t++ = *f++) != '/') {}
914: }
915:
916: if (t > name+anchored && t[-1] == '/' && !(flags & CFN_KEEP_TRAILING_SLASH))
917: t--;
918: if (t == name)
919: *t++ = '.';
920: *t = '\0';
921:
922: return t - name;
923: }
924:
925: /* Make path appear as if a chroot had occurred. This handles a leading
926: * "/" (either removing it or expanding it) and any leading or embedded
927: * ".." components that attempt to escape past the module's top dir.
928: *
929: * If dest is NULL, a buffer is allocated to hold the result. It is legal
930: * to call with the dest and the path (p) pointing to the same buffer, but
931: * rootdir will be ignored to avoid expansion of the string.
932: *
933: * The rootdir string contains a value to use in place of a leading slash.
934: * Specify NULL to get the default of "module_dir".
935: *
936: * The depth var is a count of how many '..'s to allow at the start of the
937: * path.
938: *
939: * We also clean the path in a manner similar to clean_fname() but with a
940: * few differences:
941: *
942: * Turns multiple adjacent slashes into a single slash, gets rid of "." dir
943: * elements (INCLUDING a trailing dot dir), PRESERVES a trailing slash, and
944: * ALWAYS collapses ".." elements (except for those at the start of the
945: * string up to "depth" deep). If the resulting name would be empty,
946: * change it into a ".". */
947: char *sanitize_path(char *dest, const char *p, const char *rootdir, int depth,
948: int flags)
949: {
950: char *start, *sanp;
951: int rlen = 0, drop_dot_dirs = !relative_paths || !(flags & SP_KEEP_DOT_DIRS);
952:
953: if (dest != p) {
954: int plen = strlen(p);
955: if (*p == '/') {
956: if (!rootdir)
957: rootdir = module_dir;
958: rlen = strlen(rootdir);
959: depth = 0;
960: p++;
961: }
962: if (dest) {
963: if (rlen + plen + 1 >= MAXPATHLEN)
964: return NULL;
965: } else if (!(dest = new_array(char, rlen + plen + 1)))
966: out_of_memory("sanitize_path");
967: if (rlen) {
968: memcpy(dest, rootdir, rlen);
969: if (rlen > 1)
970: dest[rlen++] = '/';
971: }
972: }
973:
974: if (drop_dot_dirs) {
975: while (*p == '.' && p[1] == '/')
976: p += 2;
977: }
978:
979: start = sanp = dest + rlen;
980: /* This loop iterates once per filename component in p, pointing at
981: * the start of the name (past any prior slash) for each iteration. */
982: while (*p) {
983: /* discard leading or extra slashes */
984: if (*p == '/') {
985: p++;
986: continue;
987: }
988: if (drop_dot_dirs) {
989: if (*p == '.' && (p[1] == '/' || p[1] == '\0')) {
990: /* skip "." component */
991: p++;
992: continue;
993: }
994: }
995: if (*p == '.' && p[1] == '.' && (p[2] == '/' || p[2] == '\0')) {
996: /* ".." component followed by slash or end */
997: if (depth <= 0 || sanp != start) {
998: p += 2;
999: if (sanp != start) {
1000: /* back up sanp one level */
1001: --sanp; /* now pointing at slash */
1002: while (sanp > start && sanp[-1] != '/')
1003: sanp--;
1004: }
1005: continue;
1006: }
1007: /* allow depth levels of .. at the beginning */
1008: depth--;
1009: /* move the virtual beginning to leave the .. alone */
1010: start = sanp + 3;
1011: }
1012: /* copy one component through next slash */
1013: while (*p && (*sanp++ = *p++) != '/') {}
1014: }
1015: if (sanp == dest) {
1016: /* ended up with nothing, so put in "." component */
1017: *sanp++ = '.';
1018: }
1019: *sanp = '\0';
1020:
1021: return dest;
1022: }
1023:
1024: /* Like chdir(), but it keeps track of the current directory (in the
1025: * global "curr_dir"), and ensures that the path size doesn't overflow.
1026: * Also cleans the path using the clean_fname() function. */
1027: int change_dir(const char *dir, int set_path_only)
1028: {
1.1.1.2 ! misho 1029: static int initialised, skipped_chdir;
1.1 misho 1030: unsigned int len;
1031:
1032: if (!initialised) {
1033: initialised = 1;
1034: if (getcwd(curr_dir, sizeof curr_dir - 1) == NULL) {
1035: rsyserr(FERROR, errno, "getcwd()");
1036: exit_cleanup(RERR_FILESELECT);
1037: }
1038: curr_dir_len = strlen(curr_dir);
1039: }
1040:
1041: if (!dir) /* this call was probably just to initialize */
1042: return 0;
1043:
1044: len = strlen(dir);
1.1.1.2 ! misho 1045: if (len == 1 && *dir == '.' && (!skipped_chdir || set_path_only))
1.1 misho 1046: return 1;
1047:
1048: if (*dir == '/') {
1049: if (len >= sizeof curr_dir) {
1050: errno = ENAMETOOLONG;
1051: return 0;
1052: }
1053: if (!set_path_only && chdir(dir))
1054: return 0;
1.1.1.2 ! misho 1055: skipped_chdir = set_path_only;
1.1 misho 1056: memcpy(curr_dir, dir, len + 1);
1057: } else {
1058: if (curr_dir_len + 1 + len >= sizeof curr_dir) {
1059: errno = ENAMETOOLONG;
1060: return 0;
1061: }
1062: if (!(curr_dir_len && curr_dir[curr_dir_len-1] == '/'))
1063: curr_dir[curr_dir_len++] = '/';
1064: memcpy(curr_dir + curr_dir_len, dir, len + 1);
1065:
1066: if (!set_path_only && chdir(curr_dir)) {
1067: curr_dir[curr_dir_len] = '\0';
1068: return 0;
1069: }
1.1.1.2 ! misho 1070: skipped_chdir = set_path_only;
1.1 misho 1071: }
1072:
1.1.1.2 ! misho 1073: curr_dir_len = clean_fname(curr_dir, CFN_COLLAPSE_DOT_DOT_DIRS | CFN_DROP_TRAILING_DOT_DIR);
1.1 misho 1074: if (sanitize_paths) {
1075: if (module_dirlen > curr_dir_len)
1076: module_dirlen = curr_dir_len;
1077: curr_dir_depth = count_dir_elements(curr_dir + module_dirlen);
1078: }
1079:
1.1.1.2 ! misho 1080: if (DEBUG_GTE(CHDIR, 1) && !set_path_only)
1.1 misho 1081: rprintf(FINFO, "[%s] change_dir(%s)\n", who_am_i(), curr_dir);
1082:
1083: return 1;
1084: }
1085:
1086: /* This will make a relative path absolute and clean it up via clean_fname().
1087: * Returns the string, which might be newly allocated, or NULL on error. */
1088: char *normalize_path(char *path, BOOL force_newbuf, unsigned int *len_ptr)
1089: {
1090: unsigned int len;
1091:
1092: if (*path != '/') { /* Make path absolute. */
1093: int len = strlen(path);
1094: if (curr_dir_len + 1 + len >= sizeof curr_dir)
1095: return NULL;
1096: curr_dir[curr_dir_len] = '/';
1097: memcpy(curr_dir + curr_dir_len + 1, path, len + 1);
1098: if (!(path = strdup(curr_dir)))
1099: out_of_memory("normalize_path");
1100: curr_dir[curr_dir_len] = '\0';
1101: } else if (force_newbuf) {
1102: if (!(path = strdup(path)))
1103: out_of_memory("normalize_path");
1104: }
1105:
1106: len = clean_fname(path, CFN_COLLAPSE_DOT_DOT_DIRS | CFN_DROP_TRAILING_DOT_DIR);
1107:
1108: if (len_ptr)
1109: *len_ptr = len;
1110:
1111: return path;
1112: }
1113:
1114: /**
1115: * Return a quoted string with the full pathname of the indicated filename.
1116: * The string " (in MODNAME)" may also be appended. The returned pointer
1117: * remains valid until the next time full_fname() is called.
1118: **/
1119: char *full_fname(const char *fn)
1120: {
1121: static char *result = NULL;
1122: char *m1, *m2, *m3;
1123: char *p1, *p2;
1124:
1125: if (result)
1126: free(result);
1127:
1128: if (*fn == '/')
1129: p1 = p2 = "";
1130: else {
1131: p1 = curr_dir + module_dirlen;
1132: for (p2 = p1; *p2 == '/'; p2++) {}
1133: if (*p2)
1134: p2 = "/";
1135: }
1136: if (module_id >= 0) {
1137: m1 = " (in ";
1138: m2 = lp_name(module_id);
1139: m3 = ")";
1140: } else
1141: m1 = m2 = m3 = "";
1142:
1.1.1.2 ! misho 1143: if (asprintf(&result, "\"%s%s%s\"%s%s%s", p1, p2, fn, m1, m2, m3) < 0)
1.1 misho 1144: out_of_memory("full_fname");
1145:
1146: return result;
1147: }
1148:
1149: static char partial_fname[MAXPATHLEN];
1150:
1151: char *partial_dir_fname(const char *fname)
1152: {
1153: char *t = partial_fname;
1154: int sz = sizeof partial_fname;
1155: const char *fn;
1156:
1157: if ((fn = strrchr(fname, '/')) != NULL) {
1158: fn++;
1159: if (*partial_dir != '/') {
1160: int len = fn - fname;
1161: strncpy(t, fname, len); /* safe */
1162: t += len;
1163: sz -= len;
1164: }
1165: } else
1166: fn = fname;
1167: if ((int)pathjoin(t, sz, partial_dir, fn) >= sz)
1168: return NULL;
1169: if (daemon_filter_list.head) {
1170: t = strrchr(partial_fname, '/');
1171: *t = '\0';
1172: if (check_filter(&daemon_filter_list, FLOG, partial_fname, 1) < 0)
1173: return NULL;
1174: *t = '/';
1175: if (check_filter(&daemon_filter_list, FLOG, partial_fname, 0) < 0)
1176: return NULL;
1177: }
1178:
1179: return partial_fname;
1180: }
1181:
1182: /* If no --partial-dir option was specified, we don't need to do anything
1183: * (the partial-dir is essentially '.'), so just return success. */
1184: int handle_partial_dir(const char *fname, int create)
1185: {
1186: char *fn, *dir;
1187:
1188: if (fname != partial_fname)
1189: return 1;
1190: if (!create && *partial_dir == '/')
1191: return 1;
1192: if (!(fn = strrchr(partial_fname, '/')))
1193: return 1;
1194:
1195: *fn = '\0';
1196: dir = partial_fname;
1197: if (create) {
1198: STRUCT_STAT st;
1199: int statret = do_lstat(dir, &st);
1200: if (statret == 0 && !S_ISDIR(st.st_mode)) {
1201: if (do_unlink(dir) < 0) {
1202: *fn = '/';
1203: return 0;
1204: }
1205: statret = -1;
1206: }
1207: if (statret < 0 && do_mkdir(dir, 0700) < 0) {
1208: *fn = '/';
1209: return 0;
1210: }
1211: } else
1212: do_rmdir(dir);
1213: *fn = '/';
1214:
1215: return 1;
1216: }
1217:
1218: /* Determine if a symlink points outside the current directory tree.
1219: * This is considered "unsafe" because e.g. when mirroring somebody
1220: * else's machine it might allow them to establish a symlink to
1221: * /etc/passwd, and then read it through a web server.
1222: *
1223: * Returns 1 if unsafe, 0 if safe.
1224: *
1225: * Null symlinks and absolute symlinks are always unsafe.
1226: *
1227: * Basically here we are concerned with symlinks whose target contains
1228: * "..", because this might cause us to walk back up out of the
1229: * transferred directory. We are not allowed to go back up and
1230: * reenter.
1231: *
1232: * "dest" is the target of the symlink in question.
1233: *
1234: * "src" is the top source directory currently applicable at the level
1235: * of the referenced symlink. This is usually the symlink's full path
1236: * (including its name), as referenced from the root of the transfer. */
1237: int unsafe_symlink(const char *dest, const char *src)
1238: {
1239: const char *name, *slash;
1240: int depth = 0;
1241:
1242: /* all absolute and null symlinks are unsafe */
1243: if (!dest || !*dest || *dest == '/')
1244: return 1;
1245:
1246: /* find out what our safety margin is */
1247: for (name = src; (slash = strchr(name, '/')) != 0; name = slash+1) {
1248: /* ".." segment starts the count over. "." segment is ignored. */
1249: if (*name == '.' && (name[1] == '/' || (name[1] == '.' && name[2] == '/'))) {
1250: if (name[1] == '.')
1251: depth = 0;
1252: } else
1253: depth++;
1254: while (slash[1] == '/') slash++; /* just in case src isn't clean */
1255: }
1256: if (*name == '.' && name[1] == '.' && name[2] == '\0')
1257: depth = 0;
1258:
1259: for (name = dest; (slash = strchr(name, '/')) != 0; name = slash+1) {
1260: if (*name == '.' && (name[1] == '/' || (name[1] == '.' && name[2] == '/'))) {
1261: if (name[1] == '.') {
1262: /* if at any point we go outside the current directory
1263: then stop - it is unsafe */
1264: if (--depth < 0)
1265: return 1;
1266: }
1267: } else
1268: depth++;
1269: while (slash[1] == '/') slash++;
1270: }
1271: if (*name == '.' && name[1] == '.' && name[2] == '\0')
1272: depth--;
1273:
1274: return depth < 0;
1275: }
1276:
1277: /* Return the date and time as a string. Some callers tweak returned buf. */
1278: char *timestring(time_t t)
1279: {
1280: static char TimeBuf[200];
1281: struct tm *tm = localtime(&t);
1282: char *p;
1283:
1284: #ifdef HAVE_STRFTIME
1285: strftime(TimeBuf, sizeof TimeBuf - 1, "%Y/%m/%d %H:%M:%S", tm);
1286: #else
1287: strlcpy(TimeBuf, asctime(tm), sizeof TimeBuf);
1288: #endif
1289:
1290: if ((p = strchr(TimeBuf, '\n')) != NULL)
1291: *p = '\0';
1292:
1293: return TimeBuf;
1294: }
1295:
1296: /* Determine if two time_t values are equivalent (either exact, or in
1297: * the modification timestamp window established by --modify-window).
1298: *
1299: * @retval 0 if the times should be treated as the same
1300: *
1301: * @retval +1 if the first is later
1302: *
1303: * @retval -1 if the 2nd is later
1304: **/
1305: int cmp_time(time_t file1, time_t file2)
1306: {
1307: if (file2 > file1) {
1308: if (file2 - file1 <= modify_window)
1309: return 0;
1310: return -1;
1311: }
1312: if (file1 - file2 <= modify_window)
1313: return 0;
1314: return 1;
1315: }
1316:
1317:
1318: #ifdef __INSURE__XX
1319: #include <dlfcn.h>
1320:
1321: /**
1322: This routine is a trick to immediately catch errors when debugging
1323: with insure. A xterm with a gdb is popped up when insure catches
1324: a error. It is Linux specific.
1325: **/
1326: int _Insure_trap_error(int a1, int a2, int a3, int a4, int a5, int a6)
1327: {
1328: static int (*fn)();
1.1.1.2 ! misho 1329: int ret, pid_int = getpid();
1.1 misho 1330: char *cmd;
1331:
1.1.1.2 ! misho 1332: if (asprintf(&cmd,
! 1333: "/usr/X11R6/bin/xterm -display :0 -T Panic -n Panic -e /bin/sh -c 'cat /tmp/ierrs.*.%d ; "
! 1334: "gdb /proc/%d/exe %d'", pid_int, pid_int, pid_int) < 0)
! 1335: return -1;
1.1 misho 1336:
1337: if (!fn) {
1338: static void *h;
1339: h = dlopen("/usr/local/parasoft/insure++lite/lib.linux2/libinsure.so", RTLD_LAZY);
1340: fn = dlsym(h, "_Insure_trap_error");
1341: }
1342:
1343: ret = fn(a1, a2, a3, a4, a5, a6);
1344:
1345: system(cmd);
1346:
1347: free(cmd);
1348:
1349: return ret;
1350: }
1351: #endif
1352:
1353: /* Take a filename and filename length and return the most significant
1354: * filename suffix we can find. This ignores suffixes such as "~",
1355: * ".bak", ".orig", ".~1~", etc. */
1356: const char *find_filename_suffix(const char *fn, int fn_len, int *len_ptr)
1357: {
1358: const char *suf, *s;
1359: BOOL had_tilde;
1360: int s_len;
1361:
1362: /* One or more dots at the start aren't a suffix. */
1363: while (fn_len && *fn == '.') fn++, fn_len--;
1364:
1365: /* Ignore the ~ in a "foo~" filename. */
1366: if (fn_len > 1 && fn[fn_len-1] == '~')
1367: fn_len--, had_tilde = True;
1368: else
1369: had_tilde = False;
1370:
1371: /* Assume we don't find an suffix. */
1372: suf = "";
1373: *len_ptr = 0;
1374:
1375: /* Find the last significant suffix. */
1376: for (s = fn + fn_len; fn_len > 1; ) {
1377: while (*--s != '.' && s != fn) {}
1378: if (s == fn)
1379: break;
1380: s_len = fn_len - (s - fn);
1381: fn_len = s - fn;
1382: if (s_len == 4) {
1383: if (strcmp(s+1, "bak") == 0
1384: || strcmp(s+1, "old") == 0)
1385: continue;
1386: } else if (s_len == 5) {
1387: if (strcmp(s+1, "orig") == 0)
1388: continue;
1389: } else if (s_len > 2 && had_tilde
1390: && s[1] == '~' && isDigit(s + 2))
1391: continue;
1392: *len_ptr = s_len;
1393: suf = s;
1394: if (s_len == 1)
1395: break;
1396: /* Determine if the suffix is all digits. */
1397: for (s++, s_len--; s_len > 0; s++, s_len--) {
1398: if (!isDigit(s))
1399: return suf;
1400: }
1401: /* An all-digit suffix may not be that signficant. */
1402: s = suf;
1403: }
1404:
1405: return suf;
1406: }
1407:
1408: /* This is an implementation of the Levenshtein distance algorithm. It
1409: * was implemented to avoid needing a two-dimensional matrix (to save
1410: * memory). It was also tweaked to try to factor in the ASCII distance
1411: * between changed characters as a minor distance quantity. The normal
1412: * Levenshtein units of distance (each signifying a single change between
1413: * the two strings) are defined as a "UNIT". */
1414:
1415: #define UNIT (1 << 16)
1416:
1.1.1.2 ! misho 1417: uint32 fuzzy_distance(const char *s1, unsigned len1, const char *s2, unsigned len2)
1.1 misho 1418: {
1419: uint32 a[MAXPATHLEN], diag, above, left, diag_inc, above_inc, left_inc;
1420: int32 cost;
1.1.1.2 ! misho 1421: unsigned i1, i2;
1.1 misho 1422:
1423: if (!len1 || !len2) {
1424: if (!len1) {
1425: s1 = s2;
1426: len1 = len2;
1427: }
1428: for (i1 = 0, cost = 0; i1 < len1; i1++)
1429: cost += s1[i1];
1430: return (int32)len1 * UNIT + cost;
1431: }
1432:
1433: for (i2 = 0; i2 < len2; i2++)
1434: a[i2] = (i2+1) * UNIT;
1435:
1436: for (i1 = 0; i1 < len1; i1++) {
1437: diag = i1 * UNIT;
1438: above = (i1+1) * UNIT;
1439: for (i2 = 0; i2 < len2; i2++) {
1440: left = a[i2];
1441: if ((cost = *((uchar*)s1+i1) - *((uchar*)s2+i2)) != 0) {
1442: if (cost < 0)
1443: cost = UNIT - cost;
1444: else
1445: cost = UNIT + cost;
1446: }
1447: diag_inc = diag + cost;
1448: left_inc = left + UNIT + *((uchar*)s1+i1);
1449: above_inc = above + UNIT + *((uchar*)s2+i2);
1450: a[i2] = above = left < above
1451: ? (left_inc < diag_inc ? left_inc : diag_inc)
1452: : (above_inc < diag_inc ? above_inc : diag_inc);
1453: diag = left;
1454: }
1455: }
1456:
1457: return a[len2-1];
1458: }
1459:
1460: #define BB_SLOT_SIZE (16*1024) /* Desired size in bytes */
1461: #define BB_PER_SLOT_BITS (BB_SLOT_SIZE * 8) /* Number of bits per slot */
1462: #define BB_PER_SLOT_INTS (BB_SLOT_SIZE / 4) /* Number of int32s per slot */
1463:
1464: struct bitbag {
1465: uint32 **bits;
1466: int slot_cnt;
1467: };
1468:
1469: struct bitbag *bitbag_create(int max_ndx)
1470: {
1471: struct bitbag *bb = new(struct bitbag);
1472: bb->slot_cnt = (max_ndx + BB_PER_SLOT_BITS - 1) / BB_PER_SLOT_BITS;
1473:
1474: if (!(bb->bits = (uint32**)calloc(bb->slot_cnt, sizeof (uint32*))))
1475: out_of_memory("bitbag_create");
1476:
1477: return bb;
1478: }
1479:
1480: void bitbag_set_bit(struct bitbag *bb, int ndx)
1481: {
1482: int slot = ndx / BB_PER_SLOT_BITS;
1483: ndx %= BB_PER_SLOT_BITS;
1484:
1485: if (!bb->bits[slot]) {
1486: if (!(bb->bits[slot] = (uint32*)calloc(BB_PER_SLOT_INTS, 4)))
1487: out_of_memory("bitbag_set_bit");
1488: }
1489:
1490: bb->bits[slot][ndx/32] |= 1u << (ndx % 32);
1491: }
1492:
1493: #if 0 /* not needed yet */
1494: void bitbag_clear_bit(struct bitbag *bb, int ndx)
1495: {
1496: int slot = ndx / BB_PER_SLOT_BITS;
1497: ndx %= BB_PER_SLOT_BITS;
1498:
1499: if (!bb->bits[slot])
1500: return;
1501:
1502: bb->bits[slot][ndx/32] &= ~(1u << (ndx % 32));
1503: }
1504:
1505: int bitbag_check_bit(struct bitbag *bb, int ndx)
1506: {
1507: int slot = ndx / BB_PER_SLOT_BITS;
1508: ndx %= BB_PER_SLOT_BITS;
1509:
1510: if (!bb->bits[slot])
1511: return 0;
1512:
1513: return bb->bits[slot][ndx/32] & (1u << (ndx % 32)) ? 1 : 0;
1514: }
1515: #endif
1516:
1517: /* Call this with -1 to start checking from 0. Returns -1 at the end. */
1518: int bitbag_next_bit(struct bitbag *bb, int after)
1519: {
1520: uint32 bits, mask;
1521: int i, ndx = after + 1;
1522: int slot = ndx / BB_PER_SLOT_BITS;
1523: ndx %= BB_PER_SLOT_BITS;
1524:
1525: mask = (1u << (ndx % 32)) - 1;
1526: for (i = ndx / 32; slot < bb->slot_cnt; slot++, i = mask = 0) {
1527: if (!bb->bits[slot])
1528: continue;
1529: for ( ; i < BB_PER_SLOT_INTS; i++, mask = 0) {
1530: if (!(bits = bb->bits[slot][i] & ~mask))
1531: continue;
1532: /* The xor magic figures out the lowest enabled bit in
1533: * bits, and the switch quickly computes log2(bit). */
1534: switch (bits ^ (bits & (bits-1))) {
1535: #define LOG2(n) case 1u << n: return slot*BB_PER_SLOT_BITS + i*32 + n
1536: LOG2(0); LOG2(1); LOG2(2); LOG2(3);
1537: LOG2(4); LOG2(5); LOG2(6); LOG2(7);
1538: LOG2(8); LOG2(9); LOG2(10); LOG2(11);
1539: LOG2(12); LOG2(13); LOG2(14); LOG2(15);
1540: LOG2(16); LOG2(17); LOG2(18); LOG2(19);
1541: LOG2(20); LOG2(21); LOG2(22); LOG2(23);
1542: LOG2(24); LOG2(25); LOG2(26); LOG2(27);
1543: LOG2(28); LOG2(29); LOG2(30); LOG2(31);
1544: }
1545: return -1; /* impossible... */
1546: }
1547: }
1548:
1549: return -1;
1550: }
1551:
1552: void flist_ndx_push(flist_ndx_list *lp, int ndx)
1553: {
1554: struct flist_ndx_item *item;
1555:
1556: if (!(item = new(struct flist_ndx_item)))
1557: out_of_memory("flist_ndx_push");
1558: item->next = NULL;
1559: item->ndx = ndx;
1560: if (lp->tail)
1561: lp->tail->next = item;
1562: else
1563: lp->head = item;
1564: lp->tail = item;
1565: }
1566:
1567: int flist_ndx_pop(flist_ndx_list *lp)
1568: {
1569: struct flist_ndx_item *next;
1570: int ndx;
1571:
1572: if (!lp->head)
1573: return -1;
1574:
1575: ndx = lp->head->ndx;
1576: next = lp->head->next;
1577: free(lp->head);
1578: lp->head = next;
1579: if (!next)
1580: lp->tail = NULL;
1581:
1582: return ndx;
1583: }
1584:
1585: void *expand_item_list(item_list *lp, size_t item_size,
1586: const char *desc, int incr)
1587: {
1588: /* First time through, 0 <= 0, so list is expanded. */
1589: if (lp->malloced <= lp->count) {
1590: void *new_ptr;
1591: size_t new_size = lp->malloced;
1592: if (incr < 0)
1593: new_size += -incr; /* increase slowly */
1594: else if (new_size < (size_t)incr)
1595: new_size += incr;
1596: else
1597: new_size *= 2;
1598: if (new_size < lp->malloced)
1599: overflow_exit("expand_item_list");
1600: /* Using _realloc_array() lets us pass the size, not a type. */
1601: new_ptr = _realloc_array(lp->items, item_size, new_size);
1.1.1.2 ! misho 1602: if (DEBUG_GTE(FLIST, 3)) {
! 1603: rprintf(FINFO, "[%s] expand %s to %s bytes, did%s move\n",
! 1604: who_am_i(), desc, big_num(new_size * item_size),
1.1 misho 1605: new_ptr == lp->items ? " not" : "");
1606: }
1607: if (!new_ptr)
1608: out_of_memory("expand_item_list");
1609:
1610: lp->items = new_ptr;
1611: lp->malloced = new_size;
1612: }
1613: return (char*)lp->items + (lp->count++ * item_size);
1614: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>