Annotation of embedaddon/rsync/util.c, revision 1.1.1.1
1.1 misho 1: /*
2: * Utility routines used in rsync.
3: *
4: * Copyright (C) 1996-2000 Andrew Tridgell
5: * Copyright (C) 1996 Paul Mackerras
6: * Copyright (C) 2001, 2002 Martin Pool <mbp@samba.org>
7: * Copyright (C) 2003-2009 Wayne Davison
8: *
9: * This program is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU General Public License as published by
11: * the Free Software Foundation; either version 3 of the License, or
12: * (at your option) any later version.
13: *
14: * This program is distributed in the hope that it will be useful,
15: * but WITHOUT ANY WARRANTY; without even the implied warranty of
16: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17: * GNU General Public License for more details.
18: *
19: * You should have received a copy of the GNU General Public License along
20: * with this program; if not, visit the http://fsf.org website.
21: */
22:
23: #include "rsync.h"
24: #include "ifuncs.h"
25:
26: extern int verbose;
27: extern int module_id;
28: extern int modify_window;
29: extern int relative_paths;
30: extern int preserve_times;
31: extern int human_readable;
32: extern int preserve_xattrs;
33: extern char *module_dir;
34: extern unsigned int module_dirlen;
35: extern mode_t orig_umask;
36: extern char *partial_dir;
37: extern struct filter_list_struct daemon_filter_list;
38:
39: int sanitize_paths = 0;
40:
41: char curr_dir[MAXPATHLEN];
42: unsigned int curr_dir_len;
43: int curr_dir_depth; /* This is only set for a sanitizing daemon. */
44:
45: /* Set a fd into nonblocking mode. */
46: void set_nonblocking(int fd)
47: {
48: int val;
49:
50: if ((val = fcntl(fd, F_GETFL)) == -1)
51: return;
52: if (!(val & NONBLOCK_FLAG)) {
53: val |= NONBLOCK_FLAG;
54: fcntl(fd, F_SETFL, val);
55: }
56: }
57:
58: /* Set a fd into blocking mode. */
59: void set_blocking(int fd)
60: {
61: int val;
62:
63: if ((val = fcntl(fd, F_GETFL)) == -1)
64: return;
65: if (val & NONBLOCK_FLAG) {
66: val &= ~NONBLOCK_FLAG;
67: fcntl(fd, F_SETFL, val);
68: }
69: }
70:
71: /**
72: * Create a file descriptor pair - like pipe() but use socketpair if
73: * possible (because of blocking issues on pipes).
74: *
75: * Always set non-blocking.
76: */
77: int fd_pair(int fd[2])
78: {
79: int ret;
80:
81: #ifdef HAVE_SOCKETPAIR
82: ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
83: #else
84: ret = pipe(fd);
85: #endif
86:
87: if (ret == 0) {
88: set_nonblocking(fd[0]);
89: set_nonblocking(fd[1]);
90: }
91:
92: return ret;
93: }
94:
95: void print_child_argv(const char *prefix, char **cmd)
96: {
97: rprintf(FCLIENT, "%s ", prefix);
98: for (; *cmd; cmd++) {
99: /* Look for characters that ought to be quoted. This
100: * is not a great quoting algorithm, but it's
101: * sufficient for a log message. */
102: if (strspn(*cmd, "abcdefghijklmnopqrstuvwxyz"
103: "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
104: "0123456789"
105: ",.-_=+@/") != strlen(*cmd)) {
106: rprintf(FCLIENT, "\"%s\" ", *cmd);
107: } else {
108: rprintf(FCLIENT, "%s ", *cmd);
109: }
110: }
111: rprintf(FCLIENT, "\n");
112: }
113:
114: NORETURN void out_of_memory(const char *str)
115: {
116: rprintf(FERROR, "ERROR: out of memory in %s [%s]\n", str, who_am_i());
117: exit_cleanup(RERR_MALLOC);
118: }
119:
120: NORETURN void overflow_exit(const char *str)
121: {
122: rprintf(FERROR, "ERROR: buffer overflow in %s [%s]\n", str, who_am_i());
123: exit_cleanup(RERR_MALLOC);
124: }
125:
126: /* This returns 0 for success, 1 for a symlink if symlink time-setting
127: * is not possible, or -1 for any other error. */
128: int set_modtime(const char *fname, time_t modtime, mode_t mode)
129: {
130: static int switch_step = 0;
131:
132: if (verbose > 2) {
133: rprintf(FINFO, "set modtime of %s to (%ld) %s",
134: fname, (long)modtime,
135: asctime(localtime(&modtime)));
136: }
137:
138: switch (switch_step) {
139: #ifdef HAVE_UTIMENSAT
140: #include "case_N.h"
141: if (do_utimensat(fname, modtime, 0) == 0)
142: break;
143: if (errno != ENOSYS)
144: return -1;
145: switch_step++;
146: /* FALLTHROUGH */
147: #endif
148:
149: #ifdef HAVE_LUTIMES
150: #include "case_N.h"
151: if (do_lutimes(fname, modtime, 0) == 0)
152: break;
153: if (errno != ENOSYS)
154: return -1;
155: switch_step++;
156: /* FALLTHROUGH */
157: #endif
158:
159: #include "case_N.h"
160: switch_step++;
161: if (preserve_times & PRESERVE_LINK_TIMES) {
162: preserve_times &= ~PRESERVE_LINK_TIMES;
163: if (S_ISLNK(mode))
164: return 1;
165: }
166: /* FALLTHROUGH */
167:
168: #include "case_N.h"
169: #ifdef HAVE_UTIMES
170: if (do_utimes(fname, modtime, 0) == 0)
171: break;
172: #else
173: if (do_utime(fname, modtime, 0) == 0)
174: break;
175: #endif
176:
177: return -1;
178: }
179:
180: return 0;
181: }
182:
183: /* This creates a new directory with default permissions. Since there
184: * might be some directory-default permissions affecting this, we can't
185: * force the permissions directly using the original umask and mkdir(). */
186: int mkdir_defmode(char *fname)
187: {
188: int ret;
189:
190: umask(orig_umask);
191: ret = do_mkdir(fname, ACCESSPERMS);
192: umask(0);
193:
194: return ret;
195: }
196:
197: /* Create any necessary directories in fname. Any missing directories are
198: * created with default permissions. */
199: int create_directory_path(char *fname)
200: {
201: char *p;
202: int ret = 0;
203:
204: while (*fname == '/')
205: fname++;
206: while (strncmp(fname, "./", 2) == 0)
207: fname += 2;
208:
209: umask(orig_umask);
210: p = fname;
211: while ((p = strchr(p,'/')) != NULL) {
212: *p = '\0';
213: if (do_mkdir(fname, ACCESSPERMS) < 0 && errno != EEXIST)
214: ret = -1;
215: *p++ = '/';
216: }
217: umask(0);
218:
219: return ret;
220: }
221:
222: /**
223: * Write @p len bytes at @p ptr to descriptor @p desc, retrying if
224: * interrupted.
225: *
226: * @retval len upon success
227: *
228: * @retval <0 write's (negative) error code
229: *
230: * Derived from GNU C's cccp.c.
231: */
232: int full_write(int desc, const char *ptr, size_t len)
233: {
234: int total_written;
235:
236: total_written = 0;
237: while (len > 0) {
238: int written = write(desc, ptr, len);
239: if (written < 0) {
240: if (errno == EINTR)
241: continue;
242: return written;
243: }
244: total_written += written;
245: ptr += written;
246: len -= written;
247: }
248: return total_written;
249: }
250:
251: /**
252: * Read @p len bytes at @p ptr from descriptor @p desc, retrying if
253: * interrupted.
254: *
255: * @retval >0 the actual number of bytes read
256: *
257: * @retval 0 for EOF
258: *
259: * @retval <0 for an error.
260: *
261: * Derived from GNU C's cccp.c. */
262: static int safe_read(int desc, char *ptr, size_t len)
263: {
264: int n_chars;
265:
266: if (len == 0)
267: return len;
268:
269: do {
270: n_chars = read(desc, ptr, len);
271: } while (n_chars < 0 && errno == EINTR);
272:
273: return n_chars;
274: }
275:
276: /* Copy a file. If ofd < 0, copy_file unlinks and opens the "dest" file.
277: * Otherwise, it just writes to and closes the provided file descriptor.
278: * In either case, if --xattrs are being preserved, the dest file will
279: * have its xattrs set from the source file.
280: *
281: * This is used in conjunction with the --temp-dir, --backup, and
282: * --copy-dest options. */
283: int copy_file(const char *source, const char *dest, int ofd,
284: mode_t mode, int create_bak_dir)
285: {
286: int ifd;
287: char buf[1024 * 8];
288: int len; /* Number of bytes read into `buf'. */
289:
290: if ((ifd = do_open(source, O_RDONLY, 0)) < 0) {
291: int save_errno = errno;
292: rsyserr(FERROR_XFER, errno, "open %s", full_fname(source));
293: errno = save_errno;
294: return -1;
295: }
296:
297: if (ofd < 0) {
298: if (robust_unlink(dest) && errno != ENOENT) {
299: int save_errno = errno;
300: rsyserr(FERROR_XFER, errno, "unlink %s", full_fname(dest));
301: errno = save_errno;
302: return -1;
303: }
304:
305: if ((ofd = do_open(dest, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, mode)) < 0) {
306: int save_errno = errno ? errno : EINVAL; /* 0 paranoia */
307: if (create_bak_dir && errno == ENOENT && make_bak_dir(dest) == 0) {
308: if ((ofd = do_open(dest, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, mode)) < 0)
309: save_errno = errno ? errno : save_errno;
310: else
311: save_errno = 0;
312: }
313: if (save_errno) {
314: rsyserr(FERROR_XFER, save_errno, "open %s", full_fname(dest));
315: close(ifd);
316: errno = save_errno;
317: return -1;
318: }
319: }
320: }
321:
322: while ((len = safe_read(ifd, buf, sizeof buf)) > 0) {
323: if (full_write(ofd, buf, len) < 0) {
324: int save_errno = errno;
325: rsyserr(FERROR_XFER, errno, "write %s", full_fname(dest));
326: close(ifd);
327: close(ofd);
328: errno = save_errno;
329: return -1;
330: }
331: }
332:
333: if (len < 0) {
334: int save_errno = errno;
335: rsyserr(FERROR_XFER, errno, "read %s", full_fname(source));
336: close(ifd);
337: close(ofd);
338: errno = save_errno;
339: return -1;
340: }
341:
342: if (close(ifd) < 0) {
343: rsyserr(FWARNING, errno, "close failed on %s",
344: full_fname(source));
345: }
346:
347: if (close(ofd) < 0) {
348: int save_errno = errno;
349: rsyserr(FERROR_XFER, errno, "close failed on %s",
350: full_fname(dest));
351: errno = save_errno;
352: return -1;
353: }
354:
355: #ifdef SUPPORT_XATTRS
356: if (preserve_xattrs)
357: copy_xattrs(source, dest);
358: #endif
359:
360: return 0;
361: }
362:
363: /* MAX_RENAMES should be 10**MAX_RENAMES_DIGITS */
364: #define MAX_RENAMES_DIGITS 3
365: #define MAX_RENAMES 1000
366:
367: /**
368: * Robust unlink: some OS'es (HPUX) refuse to unlink busy files, so
369: * rename to <path>/.rsyncNNN instead.
370: *
371: * Note that successive rsync runs will shuffle the filenames around a
372: * bit as long as the file is still busy; this is because this function
373: * does not know if the unlink call is due to a new file coming in, or
374: * --delete trying to remove old .rsyncNNN files, hence it renames it
375: * each time.
376: **/
377: int robust_unlink(const char *fname)
378: {
379: #ifndef ETXTBSY
380: return do_unlink(fname);
381: #else
382: static int counter = 1;
383: int rc, pos, start;
384: char path[MAXPATHLEN];
385:
386: rc = do_unlink(fname);
387: if (rc == 0 || errno != ETXTBSY)
388: return rc;
389:
390: if ((pos = strlcpy(path, fname, MAXPATHLEN)) >= MAXPATHLEN)
391: pos = MAXPATHLEN - 1;
392:
393: while (pos > 0 && path[pos-1] != '/')
394: pos--;
395: pos += strlcpy(path+pos, ".rsync", MAXPATHLEN-pos);
396:
397: if (pos > (MAXPATHLEN-MAX_RENAMES_DIGITS-1)) {
398: errno = ETXTBSY;
399: return -1;
400: }
401:
402: /* start where the last one left off to reduce chance of clashes */
403: start = counter;
404: do {
405: snprintf(&path[pos], MAX_RENAMES_DIGITS+1, "%03d", counter);
406: if (++counter >= MAX_RENAMES)
407: counter = 1;
408: } while ((rc = access(path, 0)) == 0 && counter != start);
409:
410: if (verbose > 0) {
411: rprintf(FWARNING, "renaming %s to %s because of text busy\n",
412: fname, path);
413: }
414:
415: /* maybe we should return rename()'s exit status? Nah. */
416: if (do_rename(fname, path) != 0) {
417: errno = ETXTBSY;
418: return -1;
419: }
420: return 0;
421: #endif
422: }
423:
424: /* Returns 0 on successful rename, 1 if we successfully copied the file
425: * across filesystems, -2 if copy_file() failed, and -1 on other errors.
426: * If partialptr is not NULL and we need to do a copy, copy the file into
427: * the active partial-dir instead of over the destination file. */
428: int robust_rename(const char *from, const char *to, const char *partialptr,
429: int mode)
430: {
431: int tries = 4;
432:
433: while (tries--) {
434: if (do_rename(from, to) == 0)
435: return 0;
436:
437: switch (errno) {
438: #ifdef ETXTBSY
439: case ETXTBSY:
440: if (robust_unlink(to) != 0) {
441: errno = ETXTBSY;
442: return -1;
443: }
444: errno = ETXTBSY;
445: break;
446: #endif
447: case EXDEV:
448: if (partialptr) {
449: if (!handle_partial_dir(partialptr,PDIR_CREATE))
450: return -2;
451: to = partialptr;
452: }
453: if (copy_file(from, to, -1, mode, 0) != 0)
454: return -2;
455: do_unlink(from);
456: return 1;
457: default:
458: return -1;
459: }
460: }
461: return -1;
462: }
463:
464: static pid_t all_pids[10];
465: static int num_pids;
466:
467: /** Fork and record the pid of the child. **/
468: pid_t do_fork(void)
469: {
470: pid_t newpid = fork();
471:
472: if (newpid != 0 && newpid != -1) {
473: all_pids[num_pids++] = newpid;
474: }
475: return newpid;
476: }
477:
478: /**
479: * Kill all children.
480: *
481: * @todo It would be kind of nice to make sure that they are actually
482: * all our children before we kill them, because their pids may have
483: * been recycled by some other process. Perhaps when we wait for a
484: * child, we should remove it from this array. Alternatively we could
485: * perhaps use process groups, but I think that would not work on
486: * ancient Unix versions that don't support them.
487: **/
488: void kill_all(int sig)
489: {
490: int i;
491:
492: for (i = 0; i < num_pids; i++) {
493: /* Let's just be a little careful where we
494: * point that gun, hey? See kill(2) for the
495: * magic caused by negative values. */
496: pid_t p = all_pids[i];
497:
498: if (p == getpid())
499: continue;
500: if (p <= 0)
501: continue;
502:
503: kill(p, sig);
504: }
505: }
506:
507: /** Turn a user name into a uid */
508: int name_to_uid(const char *name, uid_t *uid_p)
509: {
510: struct passwd *pass;
511: if (!name || !*name)
512: return 0;
513: if (!(pass = getpwnam(name)))
514: return 0;
515: *uid_p = pass->pw_uid;
516: return 1;
517: }
518:
519: /** Turn a group name into a gid */
520: int name_to_gid(const char *name, gid_t *gid_p)
521: {
522: struct group *grp;
523: if (!name || !*name)
524: return 0;
525: if (!(grp = getgrnam(name)))
526: return 0;
527: *gid_p = grp->gr_gid;
528: return 1;
529: }
530:
531: /** Lock a byte range in a open file */
532: int lock_range(int fd, int offset, int len)
533: {
534: struct flock lock;
535:
536: lock.l_type = F_WRLCK;
537: lock.l_whence = SEEK_SET;
538: lock.l_start = offset;
539: lock.l_len = len;
540: lock.l_pid = 0;
541:
542: return fcntl(fd,F_SETLK,&lock) == 0;
543: }
544:
545: #define ENSURE_MEMSPACE(buf, type, sz, req) \
546: if ((req) > sz && !(buf = realloc_array(buf, type, sz = MAX(sz * 2, req)))) \
547: out_of_memory("glob_expand")
548:
549: static inline void call_glob_match(const char *name, int len, int from_glob,
550: char *arg, int abpos, int fbpos);
551:
552: static struct glob_data {
553: char *arg_buf, *filt_buf, **argv;
554: int absize, fbsize, maxargs, argc;
555: } glob;
556:
557: static void glob_match(char *arg, int abpos, int fbpos)
558: {
559: int len;
560: char *slash;
561:
562: while (*arg == '.' && arg[1] == '/') {
563: if (fbpos < 0) {
564: ENSURE_MEMSPACE(glob.filt_buf, char, glob.fbsize, glob.absize);
565: memcpy(glob.filt_buf, glob.arg_buf, abpos + 1);
566: fbpos = abpos;
567: }
568: ENSURE_MEMSPACE(glob.arg_buf, char, glob.absize, abpos + 3);
569: glob.arg_buf[abpos++] = *arg++;
570: glob.arg_buf[abpos++] = *arg++;
571: glob.arg_buf[abpos] = '\0';
572: }
573: if ((slash = strchr(arg, '/')) != NULL) {
574: *slash = '\0';
575: len = slash - arg;
576: } else
577: len = strlen(arg);
578: if (strpbrk(arg, "*?[")) {
579: struct dirent *di;
580: DIR *d;
581:
582: if (!(d = opendir(abpos ? glob.arg_buf : ".")))
583: return;
584: while ((di = readdir(d)) != NULL) {
585: char *dname = d_name(di);
586: if (dname[0] == '.' && (dname[1] == '\0'
587: || (dname[1] == '.' && dname[2] == '\0')))
588: continue;
589: if (!wildmatch(arg, dname))
590: continue;
591: call_glob_match(dname, strlen(dname), 1,
592: slash ? arg + len + 1 : NULL,
593: abpos, fbpos);
594: }
595: closedir(d);
596: } else {
597: call_glob_match(arg, len, 0,
598: slash ? arg + len + 1 : NULL,
599: abpos, fbpos);
600: }
601: if (slash)
602: *slash = '/';
603: }
604:
605: static inline void call_glob_match(const char *name, int len, int from_glob,
606: char *arg, int abpos, int fbpos)
607: {
608: char *use_buf;
609:
610: ENSURE_MEMSPACE(glob.arg_buf, char, glob.absize, abpos + len + 2);
611: memcpy(glob.arg_buf + abpos, name, len);
612: abpos += len;
613: glob.arg_buf[abpos] = '\0';
614:
615: if (fbpos >= 0) {
616: ENSURE_MEMSPACE(glob.filt_buf, char, glob.fbsize, fbpos + len + 2);
617: memcpy(glob.filt_buf + fbpos, name, len);
618: fbpos += len;
619: glob.filt_buf[fbpos] = '\0';
620: use_buf = glob.filt_buf;
621: } else
622: use_buf = glob.arg_buf;
623:
624: if (from_glob || (arg && len)) {
625: STRUCT_STAT st;
626: int is_dir;
627:
628: if (do_stat(glob.arg_buf, &st) != 0)
629: return;
630: is_dir = S_ISDIR(st.st_mode) != 0;
631: if (arg && !is_dir)
632: return;
633:
634: if (daemon_filter_list.head
635: && check_filter(&daemon_filter_list, FLOG, use_buf, is_dir) < 0)
636: return;
637: }
638:
639: if (arg) {
640: glob.arg_buf[abpos++] = '/';
641: glob.arg_buf[abpos] = '\0';
642: if (fbpos >= 0) {
643: glob.filt_buf[fbpos++] = '/';
644: glob.filt_buf[fbpos] = '\0';
645: }
646: glob_match(arg, abpos, fbpos);
647: } else {
648: ENSURE_MEMSPACE(glob.argv, char *, glob.maxargs, glob.argc + 1);
649: if (!(glob.argv[glob.argc++] = strdup(glob.arg_buf)))
650: out_of_memory("glob_match");
651: }
652: }
653:
654: /* This routine performs wild-card expansion of the pathname in "arg". Any
655: * daemon-excluded files/dirs will not be matched by the wildcards. Returns 0
656: * if a wild-card string is the only returned item (due to matching nothing). */
657: int glob_expand(const char *arg, char ***argv_p, int *argc_p, int *maxargs_p)
658: {
659: int ret, save_argc;
660: char *s;
661:
662: if (!arg) {
663: if (glob.filt_buf)
664: free(glob.filt_buf);
665: free(glob.arg_buf);
666: memset(&glob, 0, sizeof glob);
667: return -1;
668: }
669:
670: if (sanitize_paths)
671: s = sanitize_path(NULL, arg, "", 0, SP_KEEP_DOT_DIRS);
672: else {
673: s = strdup(arg);
674: if (!s)
675: out_of_memory("glob_expand");
676: clean_fname(s, CFN_KEEP_DOT_DIRS
677: | CFN_KEEP_TRAILING_SLASH
678: | CFN_COLLAPSE_DOT_DOT_DIRS);
679: }
680:
681: ENSURE_MEMSPACE(glob.arg_buf, char, glob.absize, MAXPATHLEN);
682: *glob.arg_buf = '\0';
683:
684: glob.argc = save_argc = *argc_p;
685: glob.argv = *argv_p;
686: glob.maxargs = *maxargs_p;
687:
688: ENSURE_MEMSPACE(glob.argv, char *, glob.maxargs, 100);
689:
690: glob_match(s, 0, -1);
691:
692: /* The arg didn't match anything, so add the failed arg to the list. */
693: if (glob.argc == save_argc) {
694: ENSURE_MEMSPACE(glob.argv, char *, glob.maxargs, glob.argc + 1);
695: glob.argv[glob.argc++] = s;
696: ret = 0;
697: } else {
698: free(s);
699: ret = 1;
700: }
701:
702: *maxargs_p = glob.maxargs;
703: *argv_p = glob.argv;
704: *argc_p = glob.argc;
705:
706: return ret;
707: }
708:
709: /* This routine is only used in daemon mode. */
710: void glob_expand_module(char *base1, char *arg, char ***argv_p, int *argc_p, int *maxargs_p)
711: {
712: char *p, *s;
713: char *base = base1;
714: int base_len = strlen(base);
715:
716: if (!arg || !*arg)
717: return;
718:
719: if (strncmp(arg, base, base_len) == 0)
720: arg += base_len;
721:
722: if (!(arg = strdup(arg)))
723: out_of_memory("glob_expand_module");
724:
725: if (asprintf(&base," %s/", base1) <= 0)
726: out_of_memory("glob_expand_module");
727: base_len++;
728:
729: for (s = arg; *s; s = p + base_len) {
730: if ((p = strstr(s, base)) != NULL)
731: *p = '\0'; /* split it at this point */
732: glob_expand(s, argv_p, argc_p, maxargs_p);
733: if (!p)
734: break;
735: }
736:
737: free(arg);
738: free(base);
739: }
740:
741: /**
742: * Convert a string to lower case
743: **/
744: void strlower(char *s)
745: {
746: while (*s) {
747: if (isUpper(s))
748: *s = toLower(s);
749: s++;
750: }
751: }
752:
753: /* Join strings p1 & p2 into "dest" with a guaranteed '/' between them. (If
754: * p1 ends with a '/', no extra '/' is inserted.) Returns the length of both
755: * strings + 1 (if '/' was inserted), regardless of whether the null-terminated
756: * string fits into destsize. */
757: size_t pathjoin(char *dest, size_t destsize, const char *p1, const char *p2)
758: {
759: size_t len = strlcpy(dest, p1, destsize);
760: if (len < destsize - 1) {
761: if (!len || dest[len-1] != '/')
762: dest[len++] = '/';
763: if (len < destsize - 1)
764: len += strlcpy(dest + len, p2, destsize - len);
765: else {
766: dest[len] = '\0';
767: len += strlen(p2);
768: }
769: }
770: else
771: len += strlen(p2) + 1; /* Assume we'd insert a '/'. */
772: return len;
773: }
774:
775: /* Join any number of strings together, putting them in "dest". The return
776: * value is the length of all the strings, regardless of whether the null-
777: * terminated whole fits in destsize. Your list of string pointers must end
778: * with a NULL to indicate the end of the list. */
779: size_t stringjoin(char *dest, size_t destsize, ...)
780: {
781: va_list ap;
782: size_t len, ret = 0;
783: const char *src;
784:
785: va_start(ap, destsize);
786: while (1) {
787: if (!(src = va_arg(ap, const char *)))
788: break;
789: len = strlen(src);
790: ret += len;
791: if (destsize > 1) {
792: if (len >= destsize)
793: len = destsize - 1;
794: memcpy(dest, src, len);
795: destsize -= len;
796: dest += len;
797: }
798: }
799: *dest = '\0';
800: va_end(ap);
801:
802: return ret;
803: }
804:
805: int count_dir_elements(const char *p)
806: {
807: int cnt = 0, new_component = 1;
808: while (*p) {
809: if (*p++ == '/')
810: new_component = (*p != '.' || (p[1] != '/' && p[1] != '\0'));
811: else if (new_component) {
812: new_component = 0;
813: cnt++;
814: }
815: }
816: return cnt;
817: }
818:
819: /* Turns multiple adjacent slashes into a single slash (possible exception:
820: * the preserving of two leading slashes at the start), drops all leading or
821: * interior "." elements unless CFN_KEEP_DOT_DIRS is flagged. Will also drop
822: * a trailing '.' after a '/' if CFN_DROP_TRAILING_DOT_DIR is flagged, removes
823: * a trailing slash (perhaps after removing the aforementioned dot) unless
824: * CFN_KEEP_TRAILING_SLASH is flagged, and will also collapse ".." elements
825: * (except at the start) if CFN_COLLAPSE_DOT_DOT_DIRS is flagged. If the
826: * resulting name would be empty, returns ".". */
827: unsigned int clean_fname(char *name, int flags)
828: {
829: char *limit = name - 1, *t = name, *f = name;
830: int anchored;
831:
832: if (!name)
833: return 0;
834:
835: if ((anchored = *f == '/') != 0) {
836: *t++ = *f++;
837: #ifdef __CYGWIN__
838: /* If there are exactly 2 slashes at the start, preserve
839: * them. Would break daemon excludes unless the paths are
840: * really treated differently, so used this sparingly. */
841: if (*f == '/' && f[1] != '/')
842: *t++ = *f++;
843: #endif
844: } else if (flags & CFN_KEEP_DOT_DIRS && *f == '.' && f[1] == '/') {
845: *t++ = *f++;
846: *t++ = *f++;
847: }
848: while (*f) {
849: /* discard extra slashes */
850: if (*f == '/') {
851: f++;
852: continue;
853: }
854: if (*f == '.') {
855: /* discard interior "." dirs */
856: if (f[1] == '/' && !(flags & CFN_KEEP_DOT_DIRS)) {
857: f += 2;
858: continue;
859: }
860: if (f[1] == '\0' && flags & CFN_DROP_TRAILING_DOT_DIR)
861: break;
862: /* collapse ".." dirs */
863: if (flags & CFN_COLLAPSE_DOT_DOT_DIRS
864: && f[1] == '.' && (f[2] == '/' || !f[2])) {
865: char *s = t - 1;
866: if (s == name && anchored) {
867: f += 2;
868: continue;
869: }
870: while (s > limit && *--s != '/') {}
871: if (s != t - 1 && (s < name || *s == '/')) {
872: t = s + 1;
873: f += 2;
874: continue;
875: }
876: limit = t + 2;
877: }
878: }
879: while (*f && (*t++ = *f++) != '/') {}
880: }
881:
882: if (t > name+anchored && t[-1] == '/' && !(flags & CFN_KEEP_TRAILING_SLASH))
883: t--;
884: if (t == name)
885: *t++ = '.';
886: *t = '\0';
887:
888: return t - name;
889: }
890:
891: /* Make path appear as if a chroot had occurred. This handles a leading
892: * "/" (either removing it or expanding it) and any leading or embedded
893: * ".." components that attempt to escape past the module's top dir.
894: *
895: * If dest is NULL, a buffer is allocated to hold the result. It is legal
896: * to call with the dest and the path (p) pointing to the same buffer, but
897: * rootdir will be ignored to avoid expansion of the string.
898: *
899: * The rootdir string contains a value to use in place of a leading slash.
900: * Specify NULL to get the default of "module_dir".
901: *
902: * The depth var is a count of how many '..'s to allow at the start of the
903: * path.
904: *
905: * We also clean the path in a manner similar to clean_fname() but with a
906: * few differences:
907: *
908: * Turns multiple adjacent slashes into a single slash, gets rid of "." dir
909: * elements (INCLUDING a trailing dot dir), PRESERVES a trailing slash, and
910: * ALWAYS collapses ".." elements (except for those at the start of the
911: * string up to "depth" deep). If the resulting name would be empty,
912: * change it into a ".". */
913: char *sanitize_path(char *dest, const char *p, const char *rootdir, int depth,
914: int flags)
915: {
916: char *start, *sanp;
917: int rlen = 0, drop_dot_dirs = !relative_paths || !(flags & SP_KEEP_DOT_DIRS);
918:
919: if (dest != p) {
920: int plen = strlen(p);
921: if (*p == '/') {
922: if (!rootdir)
923: rootdir = module_dir;
924: rlen = strlen(rootdir);
925: depth = 0;
926: p++;
927: }
928: if (dest) {
929: if (rlen + plen + 1 >= MAXPATHLEN)
930: return NULL;
931: } else if (!(dest = new_array(char, rlen + plen + 1)))
932: out_of_memory("sanitize_path");
933: if (rlen) {
934: memcpy(dest, rootdir, rlen);
935: if (rlen > 1)
936: dest[rlen++] = '/';
937: }
938: }
939:
940: if (drop_dot_dirs) {
941: while (*p == '.' && p[1] == '/')
942: p += 2;
943: }
944:
945: start = sanp = dest + rlen;
946: /* This loop iterates once per filename component in p, pointing at
947: * the start of the name (past any prior slash) for each iteration. */
948: while (*p) {
949: /* discard leading or extra slashes */
950: if (*p == '/') {
951: p++;
952: continue;
953: }
954: if (drop_dot_dirs) {
955: if (*p == '.' && (p[1] == '/' || p[1] == '\0')) {
956: /* skip "." component */
957: p++;
958: continue;
959: }
960: }
961: if (*p == '.' && p[1] == '.' && (p[2] == '/' || p[2] == '\0')) {
962: /* ".." component followed by slash or end */
963: if (depth <= 0 || sanp != start) {
964: p += 2;
965: if (sanp != start) {
966: /* back up sanp one level */
967: --sanp; /* now pointing at slash */
968: while (sanp > start && sanp[-1] != '/')
969: sanp--;
970: }
971: continue;
972: }
973: /* allow depth levels of .. at the beginning */
974: depth--;
975: /* move the virtual beginning to leave the .. alone */
976: start = sanp + 3;
977: }
978: /* copy one component through next slash */
979: while (*p && (*sanp++ = *p++) != '/') {}
980: }
981: if (sanp == dest) {
982: /* ended up with nothing, so put in "." component */
983: *sanp++ = '.';
984: }
985: *sanp = '\0';
986:
987: return dest;
988: }
989:
990: /* Like chdir(), but it keeps track of the current directory (in the
991: * global "curr_dir"), and ensures that the path size doesn't overflow.
992: * Also cleans the path using the clean_fname() function. */
993: int change_dir(const char *dir, int set_path_only)
994: {
995: static int initialised;
996: unsigned int len;
997:
998: if (!initialised) {
999: initialised = 1;
1000: if (getcwd(curr_dir, sizeof curr_dir - 1) == NULL) {
1001: rsyserr(FERROR, errno, "getcwd()");
1002: exit_cleanup(RERR_FILESELECT);
1003: }
1004: curr_dir_len = strlen(curr_dir);
1005: }
1006:
1007: if (!dir) /* this call was probably just to initialize */
1008: return 0;
1009:
1010: len = strlen(dir);
1011: if (len == 1 && *dir == '.')
1012: return 1;
1013:
1014: if (*dir == '/') {
1015: if (len >= sizeof curr_dir) {
1016: errno = ENAMETOOLONG;
1017: return 0;
1018: }
1019: if (!set_path_only && chdir(dir))
1020: return 0;
1021: memcpy(curr_dir, dir, len + 1);
1022: } else {
1023: if (curr_dir_len + 1 + len >= sizeof curr_dir) {
1024: errno = ENAMETOOLONG;
1025: return 0;
1026: }
1027: if (!(curr_dir_len && curr_dir[curr_dir_len-1] == '/'))
1028: curr_dir[curr_dir_len++] = '/';
1029: memcpy(curr_dir + curr_dir_len, dir, len + 1);
1030:
1031: if (!set_path_only && chdir(curr_dir)) {
1032: curr_dir[curr_dir_len] = '\0';
1033: return 0;
1034: }
1035: }
1036:
1037: curr_dir_len = clean_fname(curr_dir, CFN_COLLAPSE_DOT_DOT_DIRS);
1038: if (sanitize_paths) {
1039: if (module_dirlen > curr_dir_len)
1040: module_dirlen = curr_dir_len;
1041: curr_dir_depth = count_dir_elements(curr_dir + module_dirlen);
1042: }
1043:
1044: if (verbose >= 5 && !set_path_only)
1045: rprintf(FINFO, "[%s] change_dir(%s)\n", who_am_i(), curr_dir);
1046:
1047: return 1;
1048: }
1049:
1050: /* This will make a relative path absolute and clean it up via clean_fname().
1051: * Returns the string, which might be newly allocated, or NULL on error. */
1052: char *normalize_path(char *path, BOOL force_newbuf, unsigned int *len_ptr)
1053: {
1054: unsigned int len;
1055:
1056: if (*path != '/') { /* Make path absolute. */
1057: int len = strlen(path);
1058: if (curr_dir_len + 1 + len >= sizeof curr_dir)
1059: return NULL;
1060: curr_dir[curr_dir_len] = '/';
1061: memcpy(curr_dir + curr_dir_len + 1, path, len + 1);
1062: if (!(path = strdup(curr_dir)))
1063: out_of_memory("normalize_path");
1064: curr_dir[curr_dir_len] = '\0';
1065: } else if (force_newbuf) {
1066: if (!(path = strdup(path)))
1067: out_of_memory("normalize_path");
1068: }
1069:
1070: len = clean_fname(path, CFN_COLLAPSE_DOT_DOT_DIRS | CFN_DROP_TRAILING_DOT_DIR);
1071:
1072: if (len_ptr)
1073: *len_ptr = len;
1074:
1075: return path;
1076: }
1077:
1078: /**
1079: * Return a quoted string with the full pathname of the indicated filename.
1080: * The string " (in MODNAME)" may also be appended. The returned pointer
1081: * remains valid until the next time full_fname() is called.
1082: **/
1083: char *full_fname(const char *fn)
1084: {
1085: static char *result = NULL;
1086: char *m1, *m2, *m3;
1087: char *p1, *p2;
1088:
1089: if (result)
1090: free(result);
1091:
1092: if (*fn == '/')
1093: p1 = p2 = "";
1094: else {
1095: p1 = curr_dir + module_dirlen;
1096: for (p2 = p1; *p2 == '/'; p2++) {}
1097: if (*p2)
1098: p2 = "/";
1099: }
1100: if (module_id >= 0) {
1101: m1 = " (in ";
1102: m2 = lp_name(module_id);
1103: m3 = ")";
1104: } else
1105: m1 = m2 = m3 = "";
1106:
1107: if (asprintf(&result, "\"%s%s%s\"%s%s%s", p1, p2, fn, m1, m2, m3) <= 0)
1108: out_of_memory("full_fname");
1109:
1110: return result;
1111: }
1112:
1113: static char partial_fname[MAXPATHLEN];
1114:
1115: char *partial_dir_fname(const char *fname)
1116: {
1117: char *t = partial_fname;
1118: int sz = sizeof partial_fname;
1119: const char *fn;
1120:
1121: if ((fn = strrchr(fname, '/')) != NULL) {
1122: fn++;
1123: if (*partial_dir != '/') {
1124: int len = fn - fname;
1125: strncpy(t, fname, len); /* safe */
1126: t += len;
1127: sz -= len;
1128: }
1129: } else
1130: fn = fname;
1131: if ((int)pathjoin(t, sz, partial_dir, fn) >= sz)
1132: return NULL;
1133: if (daemon_filter_list.head) {
1134: t = strrchr(partial_fname, '/');
1135: *t = '\0';
1136: if (check_filter(&daemon_filter_list, FLOG, partial_fname, 1) < 0)
1137: return NULL;
1138: *t = '/';
1139: if (check_filter(&daemon_filter_list, FLOG, partial_fname, 0) < 0)
1140: return NULL;
1141: }
1142:
1143: return partial_fname;
1144: }
1145:
1146: /* If no --partial-dir option was specified, we don't need to do anything
1147: * (the partial-dir is essentially '.'), so just return success. */
1148: int handle_partial_dir(const char *fname, int create)
1149: {
1150: char *fn, *dir;
1151:
1152: if (fname != partial_fname)
1153: return 1;
1154: if (!create && *partial_dir == '/')
1155: return 1;
1156: if (!(fn = strrchr(partial_fname, '/')))
1157: return 1;
1158:
1159: *fn = '\0';
1160: dir = partial_fname;
1161: if (create) {
1162: STRUCT_STAT st;
1163: int statret = do_lstat(dir, &st);
1164: if (statret == 0 && !S_ISDIR(st.st_mode)) {
1165: if (do_unlink(dir) < 0) {
1166: *fn = '/';
1167: return 0;
1168: }
1169: statret = -1;
1170: }
1171: if (statret < 0 && do_mkdir(dir, 0700) < 0) {
1172: *fn = '/';
1173: return 0;
1174: }
1175: } else
1176: do_rmdir(dir);
1177: *fn = '/';
1178:
1179: return 1;
1180: }
1181:
1182: /* Determine if a symlink points outside the current directory tree.
1183: * This is considered "unsafe" because e.g. when mirroring somebody
1184: * else's machine it might allow them to establish a symlink to
1185: * /etc/passwd, and then read it through a web server.
1186: *
1187: * Returns 1 if unsafe, 0 if safe.
1188: *
1189: * Null symlinks and absolute symlinks are always unsafe.
1190: *
1191: * Basically here we are concerned with symlinks whose target contains
1192: * "..", because this might cause us to walk back up out of the
1193: * transferred directory. We are not allowed to go back up and
1194: * reenter.
1195: *
1196: * "dest" is the target of the symlink in question.
1197: *
1198: * "src" is the top source directory currently applicable at the level
1199: * of the referenced symlink. This is usually the symlink's full path
1200: * (including its name), as referenced from the root of the transfer. */
1201: int unsafe_symlink(const char *dest, const char *src)
1202: {
1203: const char *name, *slash;
1204: int depth = 0;
1205:
1206: /* all absolute and null symlinks are unsafe */
1207: if (!dest || !*dest || *dest == '/')
1208: return 1;
1209:
1210: /* find out what our safety margin is */
1211: for (name = src; (slash = strchr(name, '/')) != 0; name = slash+1) {
1212: /* ".." segment starts the count over. "." segment is ignored. */
1213: if (*name == '.' && (name[1] == '/' || (name[1] == '.' && name[2] == '/'))) {
1214: if (name[1] == '.')
1215: depth = 0;
1216: } else
1217: depth++;
1218: while (slash[1] == '/') slash++; /* just in case src isn't clean */
1219: }
1220: if (*name == '.' && name[1] == '.' && name[2] == '\0')
1221: depth = 0;
1222:
1223: for (name = dest; (slash = strchr(name, '/')) != 0; name = slash+1) {
1224: if (*name == '.' && (name[1] == '/' || (name[1] == '.' && name[2] == '/'))) {
1225: if (name[1] == '.') {
1226: /* if at any point we go outside the current directory
1227: then stop - it is unsafe */
1228: if (--depth < 0)
1229: return 1;
1230: }
1231: } else
1232: depth++;
1233: while (slash[1] == '/') slash++;
1234: }
1235: if (*name == '.' && name[1] == '.' && name[2] == '\0')
1236: depth--;
1237:
1238: return depth < 0;
1239: }
1240:
1241: #define HUMANIFY(mult) \
1242: do { \
1243: if (num >= mult || num <= -mult) { \
1244: double dnum = (double)num / mult; \
1245: char units; \
1246: if (num < 0) \
1247: dnum = -dnum; \
1248: if (dnum < mult) \
1249: units = 'K'; \
1250: else if ((dnum /= mult) < mult) \
1251: units = 'M'; \
1252: else { \
1253: dnum /= mult; \
1254: units = 'G'; \
1255: } \
1256: if (num < 0) \
1257: dnum = -dnum; \
1258: snprintf(bufs[n], sizeof bufs[0], "%.2f%c", dnum, units); \
1259: return bufs[n]; \
1260: } \
1261: } while (0)
1262:
1263: /* Return the int64 number as a string. If the --human-readable option was
1264: * specified, we may output the number in K, M, or G units. We can return
1265: * up to 4 buffers at a time. */
1266: char *human_num(int64 num)
1267: {
1268: static char bufs[4][128]; /* more than enough room */
1269: static unsigned int n;
1270: char *s;
1271: int negated;
1272:
1273: n = (n + 1) % (sizeof bufs / sizeof bufs[0]);
1274:
1275: if (human_readable) {
1276: if (human_readable == 1)
1277: HUMANIFY(1000);
1278: else
1279: HUMANIFY(1024);
1280: }
1281:
1282: s = bufs[n] + sizeof bufs[0] - 1;
1283: *s = '\0';
1284:
1285: if (!num)
1286: *--s = '0';
1287: if (num < 0) {
1288: /* A maximum-size negated number can't fit as a positive,
1289: * so do one digit in negated form to start us off. */
1290: *--s = (char)(-(num % 10)) + '0';
1291: num = -(num / 10);
1292: negated = 1;
1293: } else
1294: negated = 0;
1295:
1296: while (num) {
1297: *--s = (char)(num % 10) + '0';
1298: num /= 10;
1299: }
1300:
1301: if (negated)
1302: *--s = '-';
1303:
1304: return s;
1305: }
1306:
1307: /* Return the double number as a string. If the --human-readable option was
1308: * specified, we may output the number in K, M, or G units. We use a buffer
1309: * from human_num() to return our result. */
1310: char *human_dnum(double dnum, int decimal_digits)
1311: {
1312: char *buf = human_num(dnum);
1313: int len = strlen(buf);
1314: if (isDigit(buf + len - 1)) {
1315: /* There's extra room in buf prior to the start of the num. */
1316: buf -= decimal_digits + 2;
1317: snprintf(buf, len + decimal_digits + 3, "%.*f", decimal_digits, dnum);
1318: }
1319: return buf;
1320: }
1321:
1322: /* Return the date and time as a string. Some callers tweak returned buf. */
1323: char *timestring(time_t t)
1324: {
1325: static char TimeBuf[200];
1326: struct tm *tm = localtime(&t);
1327: char *p;
1328:
1329: #ifdef HAVE_STRFTIME
1330: strftime(TimeBuf, sizeof TimeBuf - 1, "%Y/%m/%d %H:%M:%S", tm);
1331: #else
1332: strlcpy(TimeBuf, asctime(tm), sizeof TimeBuf);
1333: #endif
1334:
1335: if ((p = strchr(TimeBuf, '\n')) != NULL)
1336: *p = '\0';
1337:
1338: return TimeBuf;
1339: }
1340:
1341: /**
1342: * Sleep for a specified number of milliseconds.
1343: *
1344: * Always returns TRUE. (In the future it might return FALSE if
1345: * interrupted.)
1346: **/
1347: int msleep(int t)
1348: {
1349: int tdiff = 0;
1350: struct timeval tval, t1, t2;
1351:
1352: gettimeofday(&t1, NULL);
1353:
1354: while (tdiff < t) {
1355: tval.tv_sec = (t-tdiff)/1000;
1356: tval.tv_usec = 1000*((t-tdiff)%1000);
1357:
1358: errno = 0;
1359: select(0,NULL,NULL, NULL, &tval);
1360:
1361: gettimeofday(&t2, NULL);
1362: tdiff = (t2.tv_sec - t1.tv_sec)*1000 +
1363: (t2.tv_usec - t1.tv_usec)/1000;
1364: }
1365:
1366: return True;
1367: }
1368:
1369: /* Determine if two time_t values are equivalent (either exact, or in
1370: * the modification timestamp window established by --modify-window).
1371: *
1372: * @retval 0 if the times should be treated as the same
1373: *
1374: * @retval +1 if the first is later
1375: *
1376: * @retval -1 if the 2nd is later
1377: **/
1378: int cmp_time(time_t file1, time_t file2)
1379: {
1380: if (file2 > file1) {
1381: if (file2 - file1 <= modify_window)
1382: return 0;
1383: return -1;
1384: }
1385: if (file1 - file2 <= modify_window)
1386: return 0;
1387: return 1;
1388: }
1389:
1390:
1391: #ifdef __INSURE__XX
1392: #include <dlfcn.h>
1393:
1394: /**
1395: This routine is a trick to immediately catch errors when debugging
1396: with insure. A xterm with a gdb is popped up when insure catches
1397: a error. It is Linux specific.
1398: **/
1399: int _Insure_trap_error(int a1, int a2, int a3, int a4, int a5, int a6)
1400: {
1401: static int (*fn)();
1402: int ret;
1403: char *cmd;
1404:
1405: asprintf(&cmd, "/usr/X11R6/bin/xterm -display :0 -T Panic -n Panic -e /bin/sh -c 'cat /tmp/ierrs.*.%d ; gdb /proc/%d/exe %d'",
1406: getpid(), getpid(), getpid());
1407:
1408: if (!fn) {
1409: static void *h;
1410: h = dlopen("/usr/local/parasoft/insure++lite/lib.linux2/libinsure.so", RTLD_LAZY);
1411: fn = dlsym(h, "_Insure_trap_error");
1412: }
1413:
1414: ret = fn(a1, a2, a3, a4, a5, a6);
1415:
1416: system(cmd);
1417:
1418: free(cmd);
1419:
1420: return ret;
1421: }
1422: #endif
1423:
1424: #define MALLOC_MAX 0x40000000
1425:
1426: void *_new_array(unsigned long num, unsigned int size, int use_calloc)
1427: {
1428: if (num >= MALLOC_MAX/size)
1429: return NULL;
1430: return use_calloc ? calloc(num, size) : malloc(num * size);
1431: }
1432:
1433: void *_realloc_array(void *ptr, unsigned int size, size_t num)
1434: {
1435: if (num >= MALLOC_MAX/size)
1436: return NULL;
1437: if (!ptr)
1438: return malloc(size * num);
1439: return realloc(ptr, size * num);
1440: }
1441:
1442: /* Take a filename and filename length and return the most significant
1443: * filename suffix we can find. This ignores suffixes such as "~",
1444: * ".bak", ".orig", ".~1~", etc. */
1445: const char *find_filename_suffix(const char *fn, int fn_len, int *len_ptr)
1446: {
1447: const char *suf, *s;
1448: BOOL had_tilde;
1449: int s_len;
1450:
1451: /* One or more dots at the start aren't a suffix. */
1452: while (fn_len && *fn == '.') fn++, fn_len--;
1453:
1454: /* Ignore the ~ in a "foo~" filename. */
1455: if (fn_len > 1 && fn[fn_len-1] == '~')
1456: fn_len--, had_tilde = True;
1457: else
1458: had_tilde = False;
1459:
1460: /* Assume we don't find an suffix. */
1461: suf = "";
1462: *len_ptr = 0;
1463:
1464: /* Find the last significant suffix. */
1465: for (s = fn + fn_len; fn_len > 1; ) {
1466: while (*--s != '.' && s != fn) {}
1467: if (s == fn)
1468: break;
1469: s_len = fn_len - (s - fn);
1470: fn_len = s - fn;
1471: if (s_len == 4) {
1472: if (strcmp(s+1, "bak") == 0
1473: || strcmp(s+1, "old") == 0)
1474: continue;
1475: } else if (s_len == 5) {
1476: if (strcmp(s+1, "orig") == 0)
1477: continue;
1478: } else if (s_len > 2 && had_tilde
1479: && s[1] == '~' && isDigit(s + 2))
1480: continue;
1481: *len_ptr = s_len;
1482: suf = s;
1483: if (s_len == 1)
1484: break;
1485: /* Determine if the suffix is all digits. */
1486: for (s++, s_len--; s_len > 0; s++, s_len--) {
1487: if (!isDigit(s))
1488: return suf;
1489: }
1490: /* An all-digit suffix may not be that signficant. */
1491: s = suf;
1492: }
1493:
1494: return suf;
1495: }
1496:
1497: /* This is an implementation of the Levenshtein distance algorithm. It
1498: * was implemented to avoid needing a two-dimensional matrix (to save
1499: * memory). It was also tweaked to try to factor in the ASCII distance
1500: * between changed characters as a minor distance quantity. The normal
1501: * Levenshtein units of distance (each signifying a single change between
1502: * the two strings) are defined as a "UNIT". */
1503:
1504: #define UNIT (1 << 16)
1505:
1506: uint32 fuzzy_distance(const char *s1, int len1, const char *s2, int len2)
1507: {
1508: uint32 a[MAXPATHLEN], diag, above, left, diag_inc, above_inc, left_inc;
1509: int32 cost;
1510: int i1, i2;
1511:
1512: if (!len1 || !len2) {
1513: if (!len1) {
1514: s1 = s2;
1515: len1 = len2;
1516: }
1517: for (i1 = 0, cost = 0; i1 < len1; i1++)
1518: cost += s1[i1];
1519: return (int32)len1 * UNIT + cost;
1520: }
1521:
1522: for (i2 = 0; i2 < len2; i2++)
1523: a[i2] = (i2+1) * UNIT;
1524:
1525: for (i1 = 0; i1 < len1; i1++) {
1526: diag = i1 * UNIT;
1527: above = (i1+1) * UNIT;
1528: for (i2 = 0; i2 < len2; i2++) {
1529: left = a[i2];
1530: if ((cost = *((uchar*)s1+i1) - *((uchar*)s2+i2)) != 0) {
1531: if (cost < 0)
1532: cost = UNIT - cost;
1533: else
1534: cost = UNIT + cost;
1535: }
1536: diag_inc = diag + cost;
1537: left_inc = left + UNIT + *((uchar*)s1+i1);
1538: above_inc = above + UNIT + *((uchar*)s2+i2);
1539: a[i2] = above = left < above
1540: ? (left_inc < diag_inc ? left_inc : diag_inc)
1541: : (above_inc < diag_inc ? above_inc : diag_inc);
1542: diag = left;
1543: }
1544: }
1545:
1546: return a[len2-1];
1547: }
1548:
1549: #define BB_SLOT_SIZE (16*1024) /* Desired size in bytes */
1550: #define BB_PER_SLOT_BITS (BB_SLOT_SIZE * 8) /* Number of bits per slot */
1551: #define BB_PER_SLOT_INTS (BB_SLOT_SIZE / 4) /* Number of int32s per slot */
1552:
1553: struct bitbag {
1554: uint32 **bits;
1555: int slot_cnt;
1556: };
1557:
1558: struct bitbag *bitbag_create(int max_ndx)
1559: {
1560: struct bitbag *bb = new(struct bitbag);
1561: bb->slot_cnt = (max_ndx + BB_PER_SLOT_BITS - 1) / BB_PER_SLOT_BITS;
1562:
1563: if (!(bb->bits = (uint32**)calloc(bb->slot_cnt, sizeof (uint32*))))
1564: out_of_memory("bitbag_create");
1565:
1566: return bb;
1567: }
1568:
1569: void bitbag_set_bit(struct bitbag *bb, int ndx)
1570: {
1571: int slot = ndx / BB_PER_SLOT_BITS;
1572: ndx %= BB_PER_SLOT_BITS;
1573:
1574: if (!bb->bits[slot]) {
1575: if (!(bb->bits[slot] = (uint32*)calloc(BB_PER_SLOT_INTS, 4)))
1576: out_of_memory("bitbag_set_bit");
1577: }
1578:
1579: bb->bits[slot][ndx/32] |= 1u << (ndx % 32);
1580: }
1581:
1582: #if 0 /* not needed yet */
1583: void bitbag_clear_bit(struct bitbag *bb, int ndx)
1584: {
1585: int slot = ndx / BB_PER_SLOT_BITS;
1586: ndx %= BB_PER_SLOT_BITS;
1587:
1588: if (!bb->bits[slot])
1589: return;
1590:
1591: bb->bits[slot][ndx/32] &= ~(1u << (ndx % 32));
1592: }
1593:
1594: int bitbag_check_bit(struct bitbag *bb, int ndx)
1595: {
1596: int slot = ndx / BB_PER_SLOT_BITS;
1597: ndx %= BB_PER_SLOT_BITS;
1598:
1599: if (!bb->bits[slot])
1600: return 0;
1601:
1602: return bb->bits[slot][ndx/32] & (1u << (ndx % 32)) ? 1 : 0;
1603: }
1604: #endif
1605:
1606: /* Call this with -1 to start checking from 0. Returns -1 at the end. */
1607: int bitbag_next_bit(struct bitbag *bb, int after)
1608: {
1609: uint32 bits, mask;
1610: int i, ndx = after + 1;
1611: int slot = ndx / BB_PER_SLOT_BITS;
1612: ndx %= BB_PER_SLOT_BITS;
1613:
1614: mask = (1u << (ndx % 32)) - 1;
1615: for (i = ndx / 32; slot < bb->slot_cnt; slot++, i = mask = 0) {
1616: if (!bb->bits[slot])
1617: continue;
1618: for ( ; i < BB_PER_SLOT_INTS; i++, mask = 0) {
1619: if (!(bits = bb->bits[slot][i] & ~mask))
1620: continue;
1621: /* The xor magic figures out the lowest enabled bit in
1622: * bits, and the switch quickly computes log2(bit). */
1623: switch (bits ^ (bits & (bits-1))) {
1624: #define LOG2(n) case 1u << n: return slot*BB_PER_SLOT_BITS + i*32 + n
1625: LOG2(0); LOG2(1); LOG2(2); LOG2(3);
1626: LOG2(4); LOG2(5); LOG2(6); LOG2(7);
1627: LOG2(8); LOG2(9); LOG2(10); LOG2(11);
1628: LOG2(12); LOG2(13); LOG2(14); LOG2(15);
1629: LOG2(16); LOG2(17); LOG2(18); LOG2(19);
1630: LOG2(20); LOG2(21); LOG2(22); LOG2(23);
1631: LOG2(24); LOG2(25); LOG2(26); LOG2(27);
1632: LOG2(28); LOG2(29); LOG2(30); LOG2(31);
1633: }
1634: return -1; /* impossible... */
1635: }
1636: }
1637:
1638: return -1;
1639: }
1640:
1641: void flist_ndx_push(flist_ndx_list *lp, int ndx)
1642: {
1643: struct flist_ndx_item *item;
1644:
1645: if (!(item = new(struct flist_ndx_item)))
1646: out_of_memory("flist_ndx_push");
1647: item->next = NULL;
1648: item->ndx = ndx;
1649: if (lp->tail)
1650: lp->tail->next = item;
1651: else
1652: lp->head = item;
1653: lp->tail = item;
1654: }
1655:
1656: int flist_ndx_pop(flist_ndx_list *lp)
1657: {
1658: struct flist_ndx_item *next;
1659: int ndx;
1660:
1661: if (!lp->head)
1662: return -1;
1663:
1664: ndx = lp->head->ndx;
1665: next = lp->head->next;
1666: free(lp->head);
1667: lp->head = next;
1668: if (!next)
1669: lp->tail = NULL;
1670:
1671: return ndx;
1672: }
1673:
1674: void *expand_item_list(item_list *lp, size_t item_size,
1675: const char *desc, int incr)
1676: {
1677: /* First time through, 0 <= 0, so list is expanded. */
1678: if (lp->malloced <= lp->count) {
1679: void *new_ptr;
1680: size_t new_size = lp->malloced;
1681: if (incr < 0)
1682: new_size += -incr; /* increase slowly */
1683: else if (new_size < (size_t)incr)
1684: new_size += incr;
1685: else
1686: new_size *= 2;
1687: if (new_size < lp->malloced)
1688: overflow_exit("expand_item_list");
1689: /* Using _realloc_array() lets us pass the size, not a type. */
1690: new_ptr = _realloc_array(lp->items, item_size, new_size);
1691: if (verbose >= 4) {
1692: rprintf(FINFO, "[%s] expand %s to %.0f bytes, did%s move\n",
1693: who_am_i(), desc, (double)new_size * item_size,
1694: new_ptr == lp->items ? " not" : "");
1695: }
1696: if (!new_ptr)
1697: out_of_memory("expand_item_list");
1698:
1699: lp->items = new_ptr;
1700: lp->malloced = new_size;
1701: }
1702: return (char*)lp->items + (lp->count++ * item_size);
1703: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>