Annotation of embedaddon/php/ext/fileinfo/libmagic/apprentice.c, revision 1.1.1.2
1.1 misho 1: /*
2: * Copyright (c) Ian F. Darwin 1986-1995.
3: * Software written by Ian F. Darwin and others;
4: * maintained 1995-present by Christos Zoulas and others.
5: *
6: * Redistribution and use in source and binary forms, with or without
7: * modification, are permitted provided that the following conditions
8: * are met:
9: * 1. Redistributions of source code must retain the above copyright
10: * notice immediately at the beginning of the file, without modification,
11: * this list of conditions, and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26: * SUCH DAMAGE.
27: */
28: /*
29: * apprentice - make one pass through /etc/magic, learning its secrets.
30: */
31:
32: #include "php.h"
33:
34: #include "file.h"
35:
36: #ifndef lint
1.1.1.2 ! misho 37: FILE_RCSID("@(#)$File: apprentice.c,v 1.173 2011/12/08 12:38:24 rrt Exp $")
1.1 misho 38: #endif /* lint */
39:
40: #include "magic.h"
41: #include "patchlevel.h"
42: #include <stdlib.h>
43:
44: #if defined(__hpux) && !defined(HAVE_STRTOULL)
45: #if SIZEOF_LONG == 8
46: # define strtoull strtoul
47: #else
48: # define strtoull __strtoull
49: #endif
50: #endif
51:
52: #ifdef PHP_WIN32
53: #include "win32/unistd.h"
54: #if _MSC_VER <= 1300
55: # include "win32/php_strtoi64.h"
56: #endif
57: #define strtoull _strtoui64
58: #else
59: #include <unistd.h>
60: #endif
61:
62: #include <string.h>
63: #include <assert.h>
64: #include <ctype.h>
65: #include <fcntl.h>
66: #ifndef PHP_WIN32
67: #include <dirent.h>
68: #endif
69:
70: #define EATAB {while (isascii((unsigned char) *l) && \
71: isspace((unsigned char) *l)) ++l;}
72: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
73: tolower((unsigned char) (l)) : (l))
74: /*
75: * Work around a bug in headers on Digital Unix.
76: * At least confirmed for: OSF1 V4.0 878
77: */
78: #if defined(__osf__) && defined(__DECC)
79: #ifdef MAP_FAILED
80: #undef MAP_FAILED
81: #endif
82: #endif
83:
84: #ifndef MAP_FAILED
85: #define MAP_FAILED (void *) -1
86: #endif
87:
88: #ifndef MAP_FILE
89: #define MAP_FILE 0
90: #endif
91:
92: struct magic_entry {
93: struct magic *mp;
94: uint32_t cont_count;
95: uint32_t max_count;
96: };
97:
98: int file_formats[FILE_NAMES_SIZE];
99: const size_t file_nformats = FILE_NAMES_SIZE;
100: const char *file_names[FILE_NAMES_SIZE];
101: const size_t file_nnames = FILE_NAMES_SIZE;
102:
103: private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
104: private int hextoint(int);
105: private const char *getstr(struct magic_set *, struct magic *, const char *,
106: int);
107: private int parse(struct magic_set *, struct magic_entry **, uint32_t *,
108: const char *, size_t, int);
109: private void eatsize(const char **);
110: private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
111: private size_t apprentice_magic_strength(const struct magic *);
112: private int apprentice_sort(const void *, const void *);
1.1.1.2 ! misho 113: private void apprentice_list(struct mlist *, int );
1.1 misho 114: private int apprentice_load(struct magic_set *, struct magic **, uint32_t *,
115: const char *, int);
116: private void byteswap(struct magic *, uint32_t);
117: private void bs1(struct magic *);
118: private uint16_t swap2(uint16_t);
119: private uint32_t swap4(uint32_t);
120: private uint64_t swap8(uint64_t);
121: private char *mkdbname(struct magic_set *, const char *, int);
122: private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
123: const char *);
124: private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
125: const char *);
126: private int check_format_type(const char *, int);
127: private int check_format(struct magic_set *, struct magic *);
128: private int get_op(char);
129: private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
130: private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
131: private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
132:
133: private size_t maxmagic = 0;
134: private size_t magicsize = sizeof(struct magic);
135:
136: private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
137: private struct {
138: const char *name;
139: size_t len;
140: int (*fun)(struct magic_set *, struct magic_entry *, const char *);
141: } bang[] = {
142: #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
143: DECLARE_FIELD(mime),
144: DECLARE_FIELD(apple),
145: DECLARE_FIELD(strength),
146: #undef DECLARE_FIELD
147: { NULL, 0, NULL }
148: };
149:
150: #include "../data_file.c"
151:
152: static const struct type_tbl_s {
153: const char name[16];
154: const size_t len;
155: const int type;
156: const int format;
157: } type_tbl[] = {
158: # define XX(s) s, (sizeof(s) - 1)
159: # define XX_NULL "", 0
160: { XX("byte"), FILE_BYTE, FILE_FMT_NUM },
161: { XX("short"), FILE_SHORT, FILE_FMT_NUM },
162: { XX("default"), FILE_DEFAULT, FILE_FMT_STR },
163: { XX("long"), FILE_LONG, FILE_FMT_NUM },
164: { XX("string"), FILE_STRING, FILE_FMT_STR },
165: { XX("date"), FILE_DATE, FILE_FMT_STR },
166: { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM },
167: { XX("belong"), FILE_BELONG, FILE_FMT_NUM },
168: { XX("bedate"), FILE_BEDATE, FILE_FMT_STR },
169: { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM },
170: { XX("lelong"), FILE_LELONG, FILE_FMT_NUM },
171: { XX("ledate"), FILE_LEDATE, FILE_FMT_STR },
172: { XX("pstring"), FILE_PSTRING, FILE_FMT_STR },
173: { XX("ldate"), FILE_LDATE, FILE_FMT_STR },
174: { XX("beldate"), FILE_BELDATE, FILE_FMT_STR },
175: { XX("leldate"), FILE_LELDATE, FILE_FMT_STR },
176: { XX("regex"), FILE_REGEX, FILE_FMT_STR },
177: { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR },
178: { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR },
179: { XX("search"), FILE_SEARCH, FILE_FMT_STR },
180: { XX("medate"), FILE_MEDATE, FILE_FMT_STR },
181: { XX("meldate"), FILE_MELDATE, FILE_FMT_STR },
182: { XX("melong"), FILE_MELONG, FILE_FMT_NUM },
183: { XX("quad"), FILE_QUAD, FILE_FMT_QUAD },
184: { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD },
185: { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD },
186: { XX("qdate"), FILE_QDATE, FILE_FMT_STR },
187: { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR },
188: { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR },
189: { XX("qldate"), FILE_QLDATE, FILE_FMT_STR },
190: { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR },
191: { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR },
192: { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT },
193: { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT },
194: { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT },
195: { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE },
196: { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE },
197: { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE },
198: { XX("leid3"), FILE_LEID3, FILE_FMT_NUM },
199: { XX("beid3"), FILE_BEID3, FILE_FMT_NUM },
200: { XX("indirect"), FILE_INDIRECT, FILE_FMT_NONE },
201: { XX_NULL, FILE_INVALID, FILE_FMT_NONE },
202: # undef XX
203: # undef XX_NULL
204: };
205:
206: #ifndef S_ISDIR
207: #define S_ISDIR(mode) ((mode) & _S_IFDIR)
208: #endif
209:
210: private int
211: get_type(const char *l, const char **t)
212: {
213: const struct type_tbl_s *p;
214:
215: for (p = type_tbl; p->len; p++) {
216: if (strncmp(l, p->name, p->len) == 0) {
217: if (t)
218: *t = l + p->len;
219: break;
220: }
221: }
222: return p->type;
223: }
224:
225: private void
226: init_file_tables(void)
227: {
228: static int done = 0;
229: const struct type_tbl_s *p;
230:
231: if (done)
232: return;
233: done++;
234:
235: for (p = type_tbl; p->len; p++) {
236: assert(p->type < FILE_NAMES_SIZE);
237: file_names[p->type] = p->name;
238: file_formats[p->type] = p->format;
239: }
240: }
241:
242: /*
243: * Handle one file or directory.
244: */
245: private int
246: apprentice_1(struct magic_set *ms, const char *fn, int action,
247: struct mlist *mlist)
248: {
249: struct magic *magic = NULL;
250: uint32_t nmagic = 0;
251: struct mlist *ml;
252: int rv = -1;
253: int mapped;
254:
255: if (magicsize != FILE_MAGICSIZE) {
256: file_error(ms, 0, "magic element size %lu != %lu",
257: (unsigned long)sizeof(*magic),
258: (unsigned long)FILE_MAGICSIZE);
259: return -1;
260: }
261:
262: if (action == FILE_COMPILE) {
263: rv = apprentice_load(ms, &magic, &nmagic, fn, action);
264: if (rv != 0)
265: return -1;
266: rv = apprentice_compile(ms, &magic, &nmagic, fn);
267: efree(magic);
268: return rv;
269: }
270:
271: if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
272: if (fn) {
273: if (ms->flags & MAGIC_CHECK)
274: file_magwarn(ms, "using regular magic file `%s'", fn);
275: rv = apprentice_load(ms, &magic, &nmagic, fn, action);
276: }
277:
278: if (rv != 0)
279: return -1;
280: }
281:
282: mapped = rv;
283:
284: if (magic == NULL) {
285: file_delmagic(magic, mapped, nmagic);
286: return -1;
287: }
288:
289: ml = emalloc(sizeof(*ml));
290:
291: ml->magic = magic;
292: ml->nmagic = nmagic;
293: ml->mapped = mapped;
294:
295: mlist->prev->next = ml;
296: ml->prev = mlist->prev;
297: ml->next = mlist;
298: mlist->prev = ml;
299:
1.1.1.2 ! misho 300: if (action == FILE_LIST) {
! 301: printf("Binary patterns:\n");
! 302: apprentice_list(mlist, BINTEST);
! 303: printf("Text patterns:\n");
! 304: apprentice_list(mlist, TEXTTEST);
! 305: }
! 306:
1.1 misho 307: return 0;
308: }
309:
310: protected void
311: file_delmagic(struct magic *p, int type, size_t entries)
312: {
313: if (p == NULL)
314: return;
315: switch (type) {
316: case 3:
317: /* Do nothing, it's part of the code segment */
318: break;
319:
320: case 1:
321: p--;
322: /*FALLTHROUGH*/
323:
324: case 0:
325: efree(p);
326: break;
327:
328: default:
329: abort();
330: }
331: }
332:
333: /* const char *fn: list of magic files and directories */
334: protected struct mlist *
335: file_apprentice(struct magic_set *ms, const char *fn, int action)
336: {
337: char *p, *mfn;
338: int file_err, errs = -1;
339: struct mlist *mlist;
1.1.1.2 ! misho 340: /* XXX disabling default magic loading so the compiled in data is used */
! 341: #if 0
! 342: if ((fn = magic_getpath(fn, action)) == NULL)
! 343: return NULL;
! 344: #endif
1.1 misho 345:
346: init_file_tables();
347:
348: if (fn == NULL)
349: fn = getenv("MAGIC");
350: if (fn == NULL) {
351: mlist = emalloc(sizeof(*mlist));
352: mlist->next = mlist->prev = mlist;
353: apprentice_1(ms, fn, action, mlist);
354: return mlist;
355: }
356:
357: mfn = estrdup(fn);
358: fn = mfn;
359:
360: mlist = emalloc(sizeof(*mlist));
361: mlist->next = mlist->prev = mlist;
362:
363: while (fn) {
364: p = strchr(fn, PATHSEP);
365: if (p)
366: *p++ = '\0';
367: if (*fn == '\0')
368: break;
369: file_err = apprentice_1(ms, fn, action, mlist);
370: errs = MAX(errs, file_err);
371: fn = p;
372: }
373: if (errs == -1) {
374: efree(mfn);
375: efree(mlist);
376: mlist = NULL;
377: file_error(ms, 0, "could not find any magic files!");
378: return NULL;
379: }
380: efree(mfn);
381: return mlist;
382: }
383:
384: /*
385: * Get weight of this magic entry, for sorting purposes.
386: */
387: private size_t
388: apprentice_magic_strength(const struct magic *m)
389: {
390: #define MULT 10
391: size_t val = 2 * MULT; /* baseline strength */
392:
393: switch (m->type) {
394: case FILE_DEFAULT: /* make sure this sorts last */
395: if (m->factor_op != FILE_FACTOR_OP_NONE)
396: abort();
397: return 0;
398:
399: case FILE_BYTE:
400: val += 1 * MULT;
401: break;
402:
403: case FILE_SHORT:
404: case FILE_LESHORT:
405: case FILE_BESHORT:
406: val += 2 * MULT;
407: break;
408:
409: case FILE_LONG:
410: case FILE_LELONG:
411: case FILE_BELONG:
412: case FILE_MELONG:
413: val += 4 * MULT;
414: break;
415:
416: case FILE_PSTRING:
417: case FILE_STRING:
418: val += m->vallen * MULT;
419: break;
420:
421: case FILE_BESTRING16:
422: case FILE_LESTRING16:
423: val += m->vallen * MULT / 2;
424: break;
425:
426: case FILE_SEARCH:
427: case FILE_REGEX:
428: val += m->vallen * MAX(MULT / m->vallen, 1);
429: break;
430:
431: case FILE_DATE:
432: case FILE_LEDATE:
433: case FILE_BEDATE:
434: case FILE_MEDATE:
435: case FILE_LDATE:
436: case FILE_LELDATE:
437: case FILE_BELDATE:
438: case FILE_MELDATE:
439: case FILE_FLOAT:
440: case FILE_BEFLOAT:
441: case FILE_LEFLOAT:
442: val += 4 * MULT;
443: break;
444:
445: case FILE_QUAD:
446: case FILE_BEQUAD:
447: case FILE_LEQUAD:
448: case FILE_QDATE:
449: case FILE_LEQDATE:
450: case FILE_BEQDATE:
451: case FILE_QLDATE:
452: case FILE_LEQLDATE:
453: case FILE_BEQLDATE:
454: case FILE_DOUBLE:
455: case FILE_BEDOUBLE:
456: case FILE_LEDOUBLE:
457: val += 8 * MULT;
458: break;
459:
460: default:
461: val = 0;
462: (void)fprintf(stderr, "Bad type %d\n", m->type);
463: abort();
464: }
465:
466: switch (m->reln) {
467: case 'x': /* matches anything penalize */
468: case '!': /* matches almost anything penalize */
469: val = 0;
470: break;
471:
472: case '=': /* Exact match, prefer */
473: val += MULT;
474: break;
475:
476: case '>':
477: case '<': /* comparison match reduce strength */
478: val -= 2 * MULT;
479: break;
480:
481: case '^':
482: case '&': /* masking bits, we could count them too */
483: val -= MULT;
484: break;
485:
486: default:
487: (void)fprintf(stderr, "Bad relation %c\n", m->reln);
488: abort();
489: }
490:
491: if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */
492: val = 1;
493:
494: switch (m->factor_op) {
495: case FILE_FACTOR_OP_NONE:
496: break;
497: case FILE_FACTOR_OP_PLUS:
498: val += m->factor;
499: break;
500: case FILE_FACTOR_OP_MINUS:
501: val -= m->factor;
502: break;
503: case FILE_FACTOR_OP_TIMES:
504: val *= m->factor;
505: break;
506: case FILE_FACTOR_OP_DIV:
507: val /= m->factor;
508: break;
509: default:
510: abort();
511: }
512:
513:
514: /*
515: * Magic entries with no description get a bonus because they depend
516: * on subsequent magic entries to print something.
517: */
518: if (m->desc[0] == '\0')
519: val++;
520: return val;
521: }
522:
523: /*
524: * Sort callback for sorting entries by "strength" (basically length)
525: */
526: private int
527: apprentice_sort(const void *a, const void *b)
528: {
529: const struct magic_entry *ma = a;
530: const struct magic_entry *mb = b;
531: size_t sa = apprentice_magic_strength(ma->mp);
532: size_t sb = apprentice_magic_strength(mb->mp);
533: if (sa == sb)
534: return 0;
535: else if (sa > sb)
536: return -1;
537: else
538: return 1;
539: }
540:
1.1.1.2 ! misho 541: /*
! 542: * Shows sorted patterns list in the order which is used for the matching
! 543: */
! 544: private void
! 545: apprentice_list(struct mlist *mlist, int mode)
! 546: {
! 547: uint32_t magindex = 0;
! 548: struct mlist *ml;
! 549: for (ml = mlist->next; ml != mlist; ml = ml->next) {
! 550: for (magindex = 0; magindex < ml->nmagic; magindex++) {
! 551: struct magic *m = &ml->magic[magindex];
! 552: if ((m->flag & mode) != mode) {
! 553: /* Skip sub-tests */
! 554: while (magindex + 1 < ml->nmagic &&
! 555: ml->magic[magindex + 1].cont_level != 0)
! 556: ++magindex;
! 557: continue; /* Skip to next top-level test*/
! 558: }
! 559:
! 560: /*
! 561: * Try to iterate over the tree until we find item with
! 562: * description/mimetype.
! 563: */
! 564: while (magindex + 1 < ml->nmagic &&
! 565: ml->magic[magindex + 1].cont_level != 0 &&
! 566: *ml->magic[magindex].desc == '\0' &&
! 567: *ml->magic[magindex].mimetype == '\0')
! 568: magindex++;
! 569:
! 570: printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n",
! 571: apprentice_magic_strength(m),
! 572: ml->magic[magindex].desc,
! 573: ml->magic[magindex].mimetype);
! 574: }
! 575: }
! 576: }
! 577:
1.1 misho 578: private void
579: set_test_type(struct magic *mstart, struct magic *m)
580: {
581: switch (m->type) {
582: case FILE_BYTE:
583: case FILE_SHORT:
584: case FILE_LONG:
585: case FILE_DATE:
586: case FILE_BESHORT:
587: case FILE_BELONG:
588: case FILE_BEDATE:
589: case FILE_LESHORT:
590: case FILE_LELONG:
591: case FILE_LEDATE:
592: case FILE_LDATE:
593: case FILE_BELDATE:
594: case FILE_LELDATE:
595: case FILE_MEDATE:
596: case FILE_MELDATE:
597: case FILE_MELONG:
598: case FILE_QUAD:
599: case FILE_LEQUAD:
600: case FILE_BEQUAD:
601: case FILE_QDATE:
602: case FILE_LEQDATE:
603: case FILE_BEQDATE:
604: case FILE_QLDATE:
605: case FILE_LEQLDATE:
606: case FILE_BEQLDATE:
607: case FILE_FLOAT:
608: case FILE_BEFLOAT:
609: case FILE_LEFLOAT:
610: case FILE_DOUBLE:
611: case FILE_BEDOUBLE:
612: case FILE_LEDOUBLE:
1.1.1.2 ! misho 613: mstart->flag |= BINTEST;
! 614: break;
1.1 misho 615: case FILE_STRING:
616: case FILE_PSTRING:
617: case FILE_BESTRING16:
618: case FILE_LESTRING16:
1.1.1.2 ! misho 619: /* Allow text overrides */
! 620: if (mstart->str_flags & STRING_TEXTTEST)
! 621: mstart->flag |= TEXTTEST;
! 622: else
! 623: mstart->flag |= BINTEST;
1.1 misho 624: break;
625: case FILE_REGEX:
626: case FILE_SEARCH:
1.1.1.2 ! misho 627: /* Check for override */
! 628: if (mstart->str_flags & STRING_BINTEST)
! 629: mstart->flag |= BINTEST;
! 630: if (mstart->str_flags & STRING_TEXTTEST)
! 631: mstart->flag |= TEXTTEST;
! 632:
! 633: if (mstart->flag & (TEXTTEST|BINTEST))
! 634: break;
! 635:
1.1 misho 636: /* binary test if pattern is not text */
637: if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
638: NULL) <= 0)
639: mstart->flag |= BINTEST;
1.1.1.2 ! misho 640: else
! 641: mstart->flag |= TEXTTEST;
1.1 misho 642: break;
643: case FILE_DEFAULT:
644: /* can't deduce anything; we shouldn't see this at the
645: top level anyway */
646: break;
647: case FILE_INVALID:
648: default:
649: /* invalid search type, but no need to complain here */
650: break;
651: }
652: }
653:
654: /*
655: * Load and parse one file.
656: */
657: private void
658: load_1(struct magic_set *ms, int action, const char *fn, int *errs,
659: struct magic_entry **marray, uint32_t *marraycount)
660: {
661: char buffer[BUFSIZ + 1];
1.1.1.2 ! misho 662: char *line = NULL;
! 663: size_t len;
1.1 misho 664: size_t lineno = 0;
665:
666: php_stream *stream;
667:
668: TSRMLS_FETCH();
669:
670: #if PHP_API_VERSION < 20100412
671: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
672: #else
673: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
674: #endif
675:
676: if (stream == NULL) {
677: if (errno != ENOENT)
678: file_error(ms, errno, "cannot read magic file `%s'",
679: fn);
680: (*errs)++;
1.1.1.2 ! misho 681: return;
! 682: }
1.1 misho 683:
684: /* read and parse this file */
685: #if (PHP_MAJOR_VERSION < 6)
1.1.1.2 ! misho 686: for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
1.1 misho 687: #else
1.1.1.2 ! misho 688: for (ms->line = 1; (line = php_stream_get_line(stream, ZSTR(buffer), BUFSIZ, &len)) != NULL; ms->line++) {
1.1 misho 689: #endif
1.1.1.2 ! misho 690: if (len == 0) /* null line, garbage, etc */
! 691: continue;
! 692: if (line[len - 1] == '\n') {
! 693: lineno++;
! 694: line[len - 1] = '\0'; /* delete newline */
! 695: }
! 696: switch (line[0]) {
! 697: case '\0': /* empty, do not parse */
! 698: case '#': /* comment, do not parse */
! 699: continue;
! 700: case '!':
! 701: if (line[1] == ':') {
1.1 misho 702: size_t i;
703:
704: for (i = 0; bang[i].name != NULL; i++) {
1.1.1.2 ! misho 705: if ((size_t)(len - 2) > bang[i].len &&
1.1 misho 706: memcmp(bang[i].name, line + 2,
707: bang[i].len) == 0)
708: break;
709: }
710: if (bang[i].name == NULL) {
711: file_error(ms, 0,
712: "Unknown !: entry `%s'", line);
713: (*errs)++;
714: continue;
715: }
716: if (*marraycount == 0) {
717: file_error(ms, 0,
718: "No current entry for :!%s type",
719: bang[i].name);
720: (*errs)++;
721: continue;
722: }
723: if ((*bang[i].fun)(ms,
724: &(*marray)[*marraycount - 1],
725: line + bang[i].len + 2) != 0) {
726: (*errs)++;
727: continue;
728: }
729: continue;
730: }
1.1.1.2 ! misho 731: /*FALLTHROUGH*/
! 732: default:
! 733: if (parse(ms, marray, marraycount, line, lineno,
! 734: action) != 0)
1.1 misho 735: (*errs)++;
1.1.1.2 ! misho 736: break;
1.1 misho 737: }
738: }
1.1.1.2 ! misho 739: php_stream_close(stream);
1.1 misho 740: }
741:
742: /*
743: * parse a file or directory of files
744: * const char *fn: name of magic file or directory
745: */
746: private int
1.1.1.2 ! misho 747: cmpstrp(const void *p1, const void *p2)
! 748: {
! 749: return strcmp(*(char *const *)p1, *(char *const *)p2);
! 750: }
! 751:
! 752: private int
1.1 misho 753: apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
754: const char *fn, int action)
755: {
756: int errs = 0;
757: struct magic_entry *marray;
758: uint32_t marraycount, i, mentrycount = 0, starttest;
1.1.1.2 ! misho 759: size_t files = 0, maxfiles = 0;
! 760: char **filearr = NULL, mfn[MAXPATHLEN];
1.1 misho 761: struct stat st;
762: DIR *dir;
763: struct dirent *d;
764:
765: ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */
766:
767: maxmagic = MAXMAGIS;
768: marray = ecalloc(maxmagic, sizeof(*marray));
769: marraycount = 0;
770:
771: /* print silly verbose header for USG compat. */
772: if (action == FILE_CHECK)
773: (void)fprintf(stderr, "%s\n", usg_hdr);
774:
775: /* load directory or file */
776: /* FIXME: Read file names and sort them to prevent
777: non-determinism. See Debian bug #488562. */
778: if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
779: dir = opendir(fn);
1.1.1.2 ! misho 780: if (!dir) {
! 781: errs++;
! 782: goto out;
! 783: }
! 784: while ((d = readdir(dir)) != NULL) {
! 785: if (snprintf(mfn, sizeof(mfn), "%s/%s", fn, d->d_name) < 0) {
! 786: file_oomem(ms,
! 787: strlen(fn) + strlen(d->d_name) + 2);
! 788: errs++;
! 789: closedir(dir);
! 790: goto out;
! 791: }
! 792: if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
! 793: continue;
! 794: }
! 795: if (files >= maxfiles) {
! 796: size_t mlen;
! 797: maxfiles = (maxfiles + 1) * 2;
! 798: mlen = maxfiles * sizeof(*filearr);
! 799: if ((filearr = CAST(char **,
! 800: realloc(filearr, mlen))) == NULL) {
! 801: file_oomem(ms, mlen);
! 802: closedir(dir);
! 803: errs++;
! 804: goto out;
1.1 misho 805: }
806: }
1.1.1.2 ! misho 807: filearr[files++] = mfn;
! 808: }
! 809: closedir(dir);
! 810: qsort(filearr, files, sizeof(*filearr), cmpstrp);
! 811: for (i = 0; i < files; i++) {
! 812: load_1(ms, action, filearr[i], &errs, &marray,
! 813: &marraycount);
! 814: free(filearr[i]);
! 815: }
! 816: free(filearr);
1.1 misho 817: } else
818: load_1(ms, action, fn, &errs, &marray, &marraycount);
819: if (errs)
820: goto out;
821:
822: /* Set types of tests */
823: for (i = 0; i < marraycount; ) {
824: if (marray[i].mp->cont_level != 0) {
825: i++;
826: continue;
827: }
828:
829: starttest = i;
830: do {
831: static const char text[] = "text";
832: static const char binary[] = "binary";
833: static const size_t len = sizeof(text);
834: set_test_type(marray[starttest].mp, marray[i].mp);
835: if ((ms->flags & MAGIC_DEBUG) == 0)
836: continue;
837: (void)fprintf(stderr, "%s%s%s: %s\n",
838: marray[i].mp->mimetype,
839: marray[i].mp->mimetype[0] == '\0' ? "" : "; ",
840: marray[i].mp->desc[0] ? marray[i].mp->desc :
841: "(no description)",
842: marray[i].mp->flag & BINTEST ? binary : text);
843: if (marray[i].mp->flag & BINTEST) {
844: char *p = strstr(marray[i].mp->desc, text);
845: if (p && (p == marray[i].mp->desc ||
846: isspace((unsigned char)p[-1])) &&
847: (p + len - marray[i].mp->desc ==
848: MAXstring || (p[len] == '\0' ||
849: isspace((unsigned char)p[len]))))
850: (void)fprintf(stderr, "*** Possible "
851: "binary test for text type\n");
852: }
853: } while (++i < marraycount && marray[i].mp->cont_level != 0);
854: }
855:
856: qsort(marray, marraycount, sizeof(*marray), apprentice_sort);
857:
858: /*
859: * Make sure that any level 0 "default" line is last (if one exists).
860: */
861: for (i = 0; i < marraycount; i++) {
862: if (marray[i].mp->cont_level == 0 &&
863: marray[i].mp->type == FILE_DEFAULT) {
864: while (++i < marraycount)
865: if (marray[i].mp->cont_level == 0)
866: break;
867: if (i != marraycount) {
1.1.1.2 ! misho 868: /* XXX - Ugh! */
! 869: ms->line = marray[i].mp->lineno;
1.1 misho 870: file_magwarn(ms,
871: "level 0 \"default\" did not sort last");
872: }
873: break;
874: }
875: }
876:
877: for (i = 0; i < marraycount; i++)
878: mentrycount += marray[i].cont_count;
879:
880: *magicp = emalloc(sizeof(**magicp) * mentrycount);
881:
882: mentrycount = 0;
883: for (i = 0; i < marraycount; i++) {
884: (void)memcpy(*magicp + mentrycount, marray[i].mp,
885: marray[i].cont_count * sizeof(**magicp));
886: mentrycount += marray[i].cont_count;
887: }
888: out:
889: for (i = 0; i < marraycount; i++)
890: efree(marray[i].mp);
891: efree(marray);
892: if (errs) {
893: *magicp = NULL;
894: *nmagicp = 0;
895: return errs;
896: } else {
897: *nmagicp = mentrycount;
898: return 0;
899: }
900:
901: }
902:
903: /*
904: * extend the sign bit if the comparison is to be signed
905: */
906: protected uint64_t
907: file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
908: {
909: if (!(m->flag & UNSIGNED)) {
910: switch(m->type) {
911: /*
912: * Do not remove the casts below. They are
913: * vital. When later compared with the data,
914: * the sign extension must have happened.
915: */
916: case FILE_BYTE:
917: v = (char) v;
918: break;
919: case FILE_SHORT:
920: case FILE_BESHORT:
921: case FILE_LESHORT:
922: v = (short) v;
923: break;
924: case FILE_DATE:
925: case FILE_BEDATE:
926: case FILE_LEDATE:
927: case FILE_MEDATE:
928: case FILE_LDATE:
929: case FILE_BELDATE:
930: case FILE_LELDATE:
931: case FILE_MELDATE:
932: case FILE_LONG:
933: case FILE_BELONG:
934: case FILE_LELONG:
935: case FILE_MELONG:
936: case FILE_FLOAT:
937: case FILE_BEFLOAT:
938: case FILE_LEFLOAT:
939: v = (int32_t) v;
940: break;
941: case FILE_QUAD:
942: case FILE_BEQUAD:
943: case FILE_LEQUAD:
944: case FILE_QDATE:
945: case FILE_QLDATE:
946: case FILE_BEQDATE:
947: case FILE_BEQLDATE:
948: case FILE_LEQDATE:
949: case FILE_LEQLDATE:
950: case FILE_DOUBLE:
951: case FILE_BEDOUBLE:
952: case FILE_LEDOUBLE:
953: v = (int64_t) v;
954: break;
955: case FILE_STRING:
956: case FILE_PSTRING:
957: case FILE_BESTRING16:
958: case FILE_LESTRING16:
959: case FILE_REGEX:
960: case FILE_SEARCH:
961: case FILE_DEFAULT:
962: case FILE_INDIRECT:
963: break;
964: default:
965: if (ms->flags & MAGIC_CHECK)
966: file_magwarn(ms, "cannot happen: m->type=%d\n",
967: m->type);
968: return ~0U;
969: }
970: }
971: return v;
972: }
973:
974: private int
975: string_modifier_check(struct magic_set *ms, struct magic *m)
976: {
977: if ((ms->flags & MAGIC_CHECK) == 0)
978: return 0;
979:
1.1.1.2 ! misho 980: if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
! 981: file_magwarn(ms,
! 982: "'/BHhLl' modifiers are only allowed for pascal strings\n");
! 983: return -1;
! 984: }
1.1 misho 985: switch (m->type) {
986: case FILE_BESTRING16:
987: case FILE_LESTRING16:
988: if (m->str_flags != 0) {
989: file_magwarn(ms,
990: "no modifiers allowed for 16-bit strings\n");
991: return -1;
992: }
993: break;
994: case FILE_STRING:
995: case FILE_PSTRING:
996: if ((m->str_flags & REGEX_OFFSET_START) != 0) {
997: file_magwarn(ms,
998: "'/%c' only allowed on regex and search\n",
999: CHAR_REGEX_OFFSET_START);
1000: return -1;
1001: }
1002: break;
1003: case FILE_SEARCH:
1004: if (m->str_range == 0) {
1005: file_magwarn(ms,
1006: "missing range; defaulting to %d\n",
1007: STRING_DEFAULT_RANGE);
1008: m->str_range = STRING_DEFAULT_RANGE;
1009: return -1;
1010: }
1011: break;
1012: case FILE_REGEX:
1.1.1.2 ! misho 1013: if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1.1 misho 1014: file_magwarn(ms, "'/%c' not allowed on regex\n",
1.1.1.2 ! misho 1015: CHAR_COMPACT_WHITESPACE);
1.1 misho 1016: return -1;
1017: }
1.1.1.2 ! misho 1018: if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1.1 misho 1019: file_magwarn(ms, "'/%c' not allowed on regex\n",
1.1.1.2 ! misho 1020: CHAR_COMPACT_OPTIONAL_WHITESPACE);
1.1 misho 1021: return -1;
1022: }
1023: break;
1024: default:
1025: file_magwarn(ms, "coding error: m->type=%d\n",
1026: m->type);
1027: return -1;
1028: }
1029: return 0;
1030: }
1031:
1032: private int
1033: get_op(char c)
1034: {
1035: switch (c) {
1036: case '&':
1037: return FILE_OPAND;
1038: case '|':
1039: return FILE_OPOR;
1040: case '^':
1041: return FILE_OPXOR;
1042: case '+':
1043: return FILE_OPADD;
1044: case '-':
1045: return FILE_OPMINUS;
1046: case '*':
1047: return FILE_OPMULTIPLY;
1048: case '/':
1049: return FILE_OPDIVIDE;
1050: case '%':
1051: return FILE_OPMODULO;
1052: default:
1053: return -1;
1054: }
1055: }
1056:
1057: #ifdef ENABLE_CONDITIONALS
1058: private int
1059: get_cond(const char *l, const char **t)
1060: {
1061: static const struct cond_tbl_s {
1062: char name[8];
1063: size_t len;
1064: int cond;
1065: } cond_tbl[] = {
1066: { "if", 2, COND_IF },
1067: { "elif", 4, COND_ELIF },
1068: { "else", 4, COND_ELSE },
1069: { "", 0, COND_NONE },
1070: };
1071: const struct cond_tbl_s *p;
1072:
1073: for (p = cond_tbl; p->len; p++) {
1074: if (strncmp(l, p->name, p->len) == 0 &&
1075: isspace((unsigned char)l[p->len])) {
1076: if (t)
1077: *t = l + p->len;
1078: break;
1079: }
1080: }
1081: return p->cond;
1082: }
1083:
1084: private int
1085: check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1086: {
1087: int last_cond;
1088: last_cond = ms->c.li[cont_level].last_cond;
1089:
1090: switch (cond) {
1091: case COND_IF:
1092: if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1093: if (ms->flags & MAGIC_CHECK)
1094: file_magwarn(ms, "syntax error: `if'");
1095: return -1;
1096: }
1097: last_cond = COND_IF;
1098: break;
1099:
1100: case COND_ELIF:
1101: if (last_cond != COND_IF && last_cond != COND_ELIF) {
1102: if (ms->flags & MAGIC_CHECK)
1103: file_magwarn(ms, "syntax error: `elif'");
1104: return -1;
1105: }
1106: last_cond = COND_ELIF;
1107: break;
1108:
1109: case COND_ELSE:
1110: if (last_cond != COND_IF && last_cond != COND_ELIF) {
1111: if (ms->flags & MAGIC_CHECK)
1112: file_magwarn(ms, "syntax error: `else'");
1113: return -1;
1114: }
1115: last_cond = COND_NONE;
1116: break;
1117:
1118: case COND_NONE:
1119: last_cond = COND_NONE;
1120: break;
1121: }
1122:
1123: ms->c.li[cont_level].last_cond = last_cond;
1124: return 0;
1125: }
1126: #endif /* ENABLE_CONDITIONALS */
1127:
1128: /*
1129: * parse one line from magic file, put into magic[index++] if valid
1130: */
1131: private int
1132: parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
1133: const char *line, size_t lineno, int action)
1134: {
1135: #ifdef ENABLE_CONDITIONALS
1136: static uint32_t last_cont_level = 0;
1137: #endif
1138: size_t i;
1139: struct magic_entry *me;
1140: struct magic *m;
1141: const char *l = line;
1142: char *t;
1143: int op;
1144: uint32_t cont_level;
1145:
1146: cont_level = 0;
1147:
1148: while (*l == '>') {
1149: ++l; /* step over */
1150: cont_level++;
1151: }
1152: #ifdef ENABLE_CONDITIONALS
1153: if (cont_level == 0 || cont_level > last_cont_level)
1154: if (file_check_mem(ms, cont_level) == -1)
1155: return -1;
1156: last_cont_level = cont_level;
1157: #endif
1158:
1159: #define ALLOC_CHUNK (size_t)10
1160: #define ALLOC_INCR (size_t)200
1161:
1162: if (cont_level != 0) {
1163: if (*nmentryp == 0) {
1164: file_error(ms, 0, "No current entry for continuation");
1165: return -1;
1166: }
1167: me = &(*mentryp)[*nmentryp - 1];
1168: if (me->cont_count == me->max_count) {
1169: struct magic *nm;
1170: size_t cnt = me->max_count + ALLOC_CHUNK;
1171: nm = erealloc(me->mp, sizeof(*nm) * cnt);
1172: me->mp = m = nm;
1.1.1.2 ! misho 1173: me->max_count = CAST(uint32_t, cnt);
1.1 misho 1174: }
1175: m = &me->mp[me->cont_count++];
1176: (void)memset(m, 0, sizeof(*m));
1177: m->cont_level = cont_level;
1178: } else {
1179: if (*nmentryp == maxmagic) {
1180: struct magic_entry *mp;
1181:
1182: maxmagic += ALLOC_INCR;
1183: mp = erealloc(*mentryp, sizeof(*mp) * maxmagic);
1184: (void)memset(&mp[*nmentryp], 0, sizeof(*mp) * ALLOC_INCR);
1185: *mentryp = mp;
1186: }
1187: me = &(*mentryp)[*nmentryp];
1188: if (me->mp == NULL) {
1189: m = safe_emalloc(sizeof(*m), ALLOC_CHUNK, 0);
1190: me->mp = m;
1191: me->max_count = ALLOC_CHUNK;
1192: } else
1193: m = me->mp;
1194: (void)memset(m, 0, sizeof(*m));
1195: m->factor_op = FILE_FACTOR_OP_NONE;
1196: m->cont_level = 0;
1197: me->cont_count = 1;
1198: }
1.1.1.2 ! misho 1199: m->lineno = CAST(uint32_t, lineno);
1.1 misho 1200:
1201: if (*l == '&') { /* m->cont_level == 0 checked below. */
1202: ++l; /* step over */
1203: m->flag |= OFFADD;
1204: }
1205: if (*l == '(') {
1206: ++l; /* step over */
1207: m->flag |= INDIR;
1208: if (m->flag & OFFADD)
1209: m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1210:
1211: if (*l == '&') { /* m->cont_level == 0 checked below */
1212: ++l; /* step over */
1213: m->flag |= OFFADD;
1214: }
1215: }
1216: /* Indirect offsets are not valid at level 0. */
1217: if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1218: if (ms->flags & MAGIC_CHECK)
1219: file_magwarn(ms, "relative offset at level 0");
1220:
1221: /* get offset, then skip over it */
1222: m->offset = (uint32_t)strtoul(l, &t, 0);
1223: if (l == t)
1224: if (ms->flags & MAGIC_CHECK)
1225: file_magwarn(ms, "offset `%s' invalid", l);
1226: l = t;
1227:
1228: if (m->flag & INDIR) {
1229: m->in_type = FILE_LONG;
1230: m->in_offset = 0;
1231: /*
1232: * read [.lbs][+-]nnnnn)
1233: */
1234: if (*l == '.') {
1235: l++;
1236: switch (*l) {
1237: case 'l':
1238: m->in_type = FILE_LELONG;
1239: break;
1240: case 'L':
1241: m->in_type = FILE_BELONG;
1242: break;
1243: case 'm':
1244: m->in_type = FILE_MELONG;
1245: break;
1246: case 'h':
1247: case 's':
1248: m->in_type = FILE_LESHORT;
1249: break;
1250: case 'H':
1251: case 'S':
1252: m->in_type = FILE_BESHORT;
1253: break;
1254: case 'c':
1255: case 'b':
1256: case 'C':
1257: case 'B':
1258: m->in_type = FILE_BYTE;
1259: break;
1260: case 'e':
1261: case 'f':
1262: case 'g':
1263: m->in_type = FILE_LEDOUBLE;
1264: break;
1265: case 'E':
1266: case 'F':
1267: case 'G':
1268: m->in_type = FILE_BEDOUBLE;
1269: break;
1270: case 'i':
1271: m->in_type = FILE_LEID3;
1272: break;
1273: case 'I':
1274: m->in_type = FILE_BEID3;
1275: break;
1276: default:
1277: if (ms->flags & MAGIC_CHECK)
1278: file_magwarn(ms,
1279: "indirect offset type `%c' invalid",
1280: *l);
1281: break;
1282: }
1283: l++;
1284: }
1285:
1286: m->in_op = 0;
1287: if (*l == '~') {
1288: m->in_op |= FILE_OPINVERSE;
1289: l++;
1290: }
1291: if ((op = get_op(*l)) != -1) {
1292: m->in_op |= op;
1293: l++;
1294: }
1295: if (*l == '(') {
1296: m->in_op |= FILE_OPINDIRECT;
1297: l++;
1298: }
1299: if (isdigit((unsigned char)*l) || *l == '-') {
1300: m->in_offset = (int32_t)strtol(l, &t, 0);
1301: if (l == t)
1302: if (ms->flags & MAGIC_CHECK)
1303: file_magwarn(ms,
1304: "in_offset `%s' invalid", l);
1305: l = t;
1306: }
1307: if (*l++ != ')' ||
1308: ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1309: if (ms->flags & MAGIC_CHECK)
1310: file_magwarn(ms,
1311: "missing ')' in indirect offset");
1312: }
1313: EATAB;
1314:
1315: #ifdef ENABLE_CONDITIONALS
1316: m->cond = get_cond(l, &l);
1317: if (check_cond(ms, m->cond, cont_level) == -1)
1318: return -1;
1319:
1320: EATAB;
1321: #endif
1322:
1323: if (*l == 'u') {
1324: ++l;
1325: m->flag |= UNSIGNED;
1326: }
1327:
1328: m->type = get_type(l, &l);
1329: if (m->type == FILE_INVALID) {
1330: if (ms->flags & MAGIC_CHECK)
1331: file_magwarn(ms, "type `%s' invalid", l);
1332: return -1;
1333: }
1334:
1335: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1336: /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1337:
1338: m->mask_op = 0;
1339: if (*l == '~') {
1340: if (!IS_LIBMAGIC_STRING(m->type))
1341: m->mask_op |= FILE_OPINVERSE;
1342: else if (ms->flags & MAGIC_CHECK)
1343: file_magwarn(ms, "'~' invalid for string types");
1344: ++l;
1345: }
1346: m->str_range = 0;
1.1.1.2 ! misho 1347: m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
1.1 misho 1348: if ((op = get_op(*l)) != -1) {
1349: if (!IS_LIBMAGIC_STRING(m->type)) {
1350: uint64_t val;
1351: ++l;
1352: m->mask_op |= op;
1353: val = (uint64_t)strtoull(l, &t, 0);
1354: l = t;
1355: m->num_mask = file_signextend(ms, m, val);
1356: eatsize(&l);
1357: }
1358: else if (op == FILE_OPDIVIDE) {
1359: int have_range = 0;
1360: while (!isspace((unsigned char)*++l)) {
1361: switch (*l) {
1362: case '0': case '1': case '2':
1363: case '3': case '4': case '5':
1364: case '6': case '7': case '8':
1365: case '9':
1366: if (have_range &&
1367: (ms->flags & MAGIC_CHECK))
1368: file_magwarn(ms,
1369: "multiple ranges");
1370: have_range = 1;
1.1.1.2 ! misho 1371: m->str_range = CAST(uint32_t,
! 1372: strtoul(l, &t, 0));
1.1 misho 1373: if (m->str_range == 0)
1374: file_magwarn(ms,
1375: "zero range");
1376: l = t - 1;
1377: break;
1.1.1.2 ! misho 1378: case CHAR_COMPACT_WHITESPACE:
! 1379: m->str_flags |=
! 1380: STRING_COMPACT_WHITESPACE;
1.1 misho 1381: break;
1.1.1.2 ! misho 1382: case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1.1 misho 1383: m->str_flags |=
1.1.1.2 ! misho 1384: STRING_COMPACT_OPTIONAL_WHITESPACE;
1.1 misho 1385: break;
1386: case CHAR_IGNORE_LOWERCASE:
1387: m->str_flags |= STRING_IGNORE_LOWERCASE;
1388: break;
1389: case CHAR_IGNORE_UPPERCASE:
1390: m->str_flags |= STRING_IGNORE_UPPERCASE;
1391: break;
1392: case CHAR_REGEX_OFFSET_START:
1393: m->str_flags |= REGEX_OFFSET_START;
1394: break;
1.1.1.2 ! misho 1395: case CHAR_BINTEST:
! 1396: m->str_flags |= STRING_BINTEST;
! 1397: break;
! 1398: case CHAR_TEXTTEST:
! 1399: m->str_flags |= STRING_TEXTTEST;
! 1400: break;
! 1401: case CHAR_PSTRING_1_LE:
! 1402: if (m->type != FILE_PSTRING)
! 1403: goto bad;
! 1404: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE;
! 1405: break;
! 1406: case CHAR_PSTRING_2_BE:
! 1407: if (m->type != FILE_PSTRING)
! 1408: goto bad;
! 1409: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE;
! 1410: break;
! 1411: case CHAR_PSTRING_2_LE:
! 1412: if (m->type != FILE_PSTRING)
! 1413: goto bad;
! 1414: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE;
! 1415: break;
! 1416: case CHAR_PSTRING_4_BE:
! 1417: if (m->type != FILE_PSTRING)
! 1418: goto bad;
! 1419: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
! 1420: break;
! 1421: case CHAR_PSTRING_4_LE:
! 1422: if (m->type != FILE_PSTRING)
! 1423: goto bad;
! 1424: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
! 1425: break;
! 1426: case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
! 1427: if (m->type != FILE_PSTRING)
! 1428: goto bad;
! 1429: m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
! 1430: break;
1.1 misho 1431: default:
1.1.1.2 ! misho 1432: bad:
1.1 misho 1433: if (ms->flags & MAGIC_CHECK)
1434: file_magwarn(ms,
1.1.1.2 ! misho 1435: "string extension `%c' "
! 1436: "invalid", *l);
1.1 misho 1437: return -1;
1438: }
1439: /* allow multiple '/' for readability */
1440: if (l[1] == '/' &&
1441: !isspace((unsigned char)l[2]))
1442: l++;
1443: }
1444: if (string_modifier_check(ms, m) == -1)
1445: return -1;
1446: }
1447: else {
1448: if (ms->flags & MAGIC_CHECK)
1449: file_magwarn(ms, "invalid string op: %c", *t);
1450: return -1;
1451: }
1452: }
1453: /*
1454: * We used to set mask to all 1's here, instead let's just not do
1455: * anything if mask = 0 (unless you have a better idea)
1456: */
1457: EATAB;
1458:
1459: switch (*l) {
1460: case '>':
1461: case '<':
1462: m->reln = *l;
1463: ++l;
1464: if (*l == '=') {
1465: if (ms->flags & MAGIC_CHECK) {
1466: file_magwarn(ms, "%c= not supported",
1467: m->reln);
1468: return -1;
1469: }
1470: ++l;
1471: }
1472: break;
1473: /* Old-style anding: "0 byte &0x80 dynamically linked" */
1474: case '&':
1475: case '^':
1476: case '=':
1477: m->reln = *l;
1478: ++l;
1479: if (*l == '=') {
1480: /* HP compat: ignore &= etc. */
1481: ++l;
1482: }
1483: break;
1484: case '!':
1485: m->reln = *l;
1486: ++l;
1487: break;
1488: default:
1489: m->reln = '='; /* the default relation */
1490: if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
1491: isspace((unsigned char)l[1])) || !l[1])) {
1492: m->reln = *l;
1493: ++l;
1494: }
1495: break;
1496: }
1497: /*
1498: * Grab the value part, except for an 'x' reln.
1499: */
1500: if (m->reln != 'x' && getvalue(ms, m, &l, action))
1501: return -1;
1502:
1503: /*
1504: * TODO finish this macro and start using it!
1505: * #define offsetcheck {if (offset > HOWMANY-1)
1506: * magwarn("offset too big"); }
1507: */
1508:
1509: /*
1510: * Now get last part - the description
1511: */
1512: EATAB;
1513: if (l[0] == '\b') {
1514: ++l;
1515: m->flag |= NOSPACE;
1516: } else if ((l[0] == '\\') && (l[1] == 'b')) {
1517: ++l;
1518: ++l;
1519: m->flag |= NOSPACE;
1520: }
1521: for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
1522: continue;
1523: if (i == sizeof(m->desc)) {
1524: m->desc[sizeof(m->desc) - 1] = '\0';
1525: if (ms->flags & MAGIC_CHECK)
1526: file_magwarn(ms, "description `%s' truncated", m->desc);
1527: }
1528:
1529: /*
1530: * We only do this check while compiling, or if any of the magic
1531: * files were not compiled.
1532: */
1533: if (ms->flags & MAGIC_CHECK) {
1534: if (check_format(ms, m) == -1)
1535: return -1;
1536: }
1537: m->mimetype[0] = '\0'; /* initialise MIME type to none */
1538: if (m->cont_level == 0)
1539: ++(*nmentryp); /* make room for next */
1540: return 0;
1541: }
1542:
1543: /*
1544: * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
1545: * if valid
1546: */
1547: private int
1548: parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
1549: {
1550: const char *l = line;
1551: char *el;
1552: unsigned long factor;
1553: struct magic *m = &me->mp[0];
1554:
1555: if (m->factor_op != FILE_FACTOR_OP_NONE) {
1556: file_magwarn(ms,
1557: "Current entry already has a strength type: %c %d",
1558: m->factor_op, m->factor);
1559: return -1;
1560: }
1561: EATAB;
1562: switch (*l) {
1563: case FILE_FACTOR_OP_NONE:
1564: case FILE_FACTOR_OP_PLUS:
1565: case FILE_FACTOR_OP_MINUS:
1566: case FILE_FACTOR_OP_TIMES:
1567: case FILE_FACTOR_OP_DIV:
1568: m->factor_op = *l++;
1569: break;
1570: default:
1571: file_magwarn(ms, "Unknown factor op `%c'", *l);
1572: return -1;
1573: }
1574: EATAB;
1575: factor = strtoul(l, &el, 0);
1576: if (factor > 255) {
1577: file_magwarn(ms, "Too large factor `%lu'", factor);
1578: goto out;
1579: }
1580: if (*el && !isspace((unsigned char)*el)) {
1581: file_magwarn(ms, "Bad factor `%s'", l);
1582: goto out;
1583: }
1584: m->factor = (uint8_t)factor;
1585: if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
1586: file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
1587: m->factor_op, m->factor);
1588: goto out;
1589: }
1590: return 0;
1591: out:
1592: m->factor_op = FILE_FACTOR_OP_NONE;
1593: m->factor = 0;
1594: return -1;
1595: }
1596:
1597: /*
1.1.1.2 ! misho 1598: * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
! 1599: * magic[index - 1]
1.1 misho 1600: */
1601: private int
1602: parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
1603: {
1604: size_t i;
1605: const char *l = line;
1606: struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
1607:
1608: if (m->apple[0] != '\0') {
1.1.1.2 ! misho 1609: file_magwarn(ms, "Current entry already has a APPLE type "
! 1610: "`%.8s', new type `%s'", m->mimetype, l);
1.1 misho 1611: return -1;
1612: }
1613:
1614: EATAB;
1.1.1.2 ! misho 1615: for (i = 0; *l && ((isascii((unsigned char)*l) &&
! 1616: isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
! 1617: i < sizeof(m->apple); m->apple[i++] = *l++)
1.1 misho 1618: continue;
1619: if (i == sizeof(m->apple) && *l) {
1.1.1.2 ! misho 1620: /* We don't need to NUL terminate here, printing handles it */
1.1 misho 1621: if (ms->flags & MAGIC_CHECK)
1.1.1.2 ! misho 1622: file_magwarn(ms, "APPLE type `%s' truncated %"
! 1623: SIZE_T_FORMAT "u", line, i);
1.1 misho 1624: }
1625:
1626: if (i > 0)
1627: return 0;
1628: else
1629: return -1;
1630: }
1631:
1632: /*
1633: * parse a MIME annotation line from magic file, put into magic[index - 1]
1634: * if valid
1635: */
1636: private int
1637: parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
1638: {
1639: size_t i;
1640: const char *l = line;
1641: struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
1642:
1643: if (m->mimetype[0] != '\0') {
1644: file_magwarn(ms, "Current entry already has a MIME type `%s',"
1645: " new type `%s'", m->mimetype, l);
1646: return -1;
1647: }
1648:
1649: EATAB;
1.1.1.2 ! misho 1650: for (i = 0; *l && ((isascii((unsigned char)*l) &&
! 1651: isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
! 1652: i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
1.1 misho 1653: continue;
1654: if (i == sizeof(m->mimetype)) {
1.1.1.2 ! misho 1655: m->mimetype[sizeof(m->mimetype) - 1] = '\0';
1.1 misho 1656: if (ms->flags & MAGIC_CHECK)
1.1.1.2 ! misho 1657: file_magwarn(ms, "MIME type `%s' truncated %"
! 1658: SIZE_T_FORMAT "u", m->mimetype, i);
1.1 misho 1659: } else
1660: m->mimetype[i] = '\0';
1661:
1662: if (i > 0)
1663: return 0;
1664: else
1665: return -1;
1666: }
1667:
1668: private int
1669: check_format_type(const char *ptr, int type)
1670: {
1671: int quad = 0;
1672: if (*ptr == '\0') {
1673: /* Missing format string; bad */
1674: return -1;
1675: }
1676:
1677: switch (type) {
1678: case FILE_FMT_QUAD:
1679: quad = 1;
1680: /*FALLTHROUGH*/
1681: case FILE_FMT_NUM:
1682: if (*ptr == '-')
1683: ptr++;
1684: if (*ptr == '.')
1685: ptr++;
1686: while (isdigit((unsigned char)*ptr)) ptr++;
1687: if (*ptr == '.')
1688: ptr++;
1689: while (isdigit((unsigned char)*ptr)) ptr++;
1690: if (quad) {
1691: if (*ptr++ != 'l')
1692: return -1;
1693: if (*ptr++ != 'l')
1694: return -1;
1695: }
1696:
1697: switch (*ptr++) {
1698: case 'l':
1699: switch (*ptr++) {
1700: case 'i':
1701: case 'd':
1702: case 'u':
1703: case 'x':
1704: case 'X':
1705: return 0;
1706: default:
1707: return -1;
1708: }
1709:
1710: case 'h':
1711: switch (*ptr++) {
1712: case 'h':
1713: switch (*ptr++) {
1714: case 'i':
1715: case 'd':
1716: case 'u':
1717: case 'x':
1718: case 'X':
1719: return 0;
1720: default:
1721: return -1;
1722: }
1723: case 'd':
1724: return 0;
1725: default:
1726: return -1;
1727: }
1728:
1729: case 'i':
1730: case 'c':
1731: case 'd':
1732: case 'u':
1733: case 'x':
1734: case 'X':
1735: return 0;
1736:
1737: default:
1738: return -1;
1739: }
1740:
1741: case FILE_FMT_FLOAT:
1742: case FILE_FMT_DOUBLE:
1743: if (*ptr == '-')
1744: ptr++;
1745: if (*ptr == '.')
1746: ptr++;
1747: while (isdigit((unsigned char)*ptr)) ptr++;
1748: if (*ptr == '.')
1749: ptr++;
1750: while (isdigit((unsigned char)*ptr)) ptr++;
1751:
1752: switch (*ptr++) {
1753: case 'e':
1754: case 'E':
1755: case 'f':
1756: case 'F':
1757: case 'g':
1758: case 'G':
1759: return 0;
1760:
1761: default:
1762: return -1;
1763: }
1764:
1765:
1766: case FILE_FMT_STR:
1767: if (*ptr == '-')
1768: ptr++;
1769: while (isdigit((unsigned char )*ptr))
1770: ptr++;
1771: if (*ptr == '.') {
1772: ptr++;
1773: while (isdigit((unsigned char )*ptr))
1774: ptr++;
1775: }
1776:
1777: switch (*ptr++) {
1778: case 's':
1779: return 0;
1780: default:
1781: return -1;
1782: }
1783:
1784: default:
1785: /* internal error */
1786: abort();
1787: }
1788: /*NOTREACHED*/
1789: return -1;
1790: }
1791:
1792: /*
1793: * Check that the optional printf format in description matches
1794: * the type of the magic.
1795: */
1796: private int
1797: check_format(struct magic_set *ms, struct magic *m)
1798: {
1799: char *ptr;
1800:
1801: for (ptr = m->desc; *ptr; ptr++)
1802: if (*ptr == '%')
1803: break;
1804: if (*ptr == '\0') {
1805: /* No format string; ok */
1806: return 1;
1807: }
1808:
1809: assert(file_nformats == file_nnames);
1810:
1811: if (m->type >= file_nformats) {
1812: file_magwarn(ms, "Internal error inconsistency between "
1813: "m->type and format strings");
1814: return -1;
1815: }
1816: if (file_formats[m->type] == FILE_FMT_NONE) {
1817: file_magwarn(ms, "No format string for `%s' with description "
1818: "`%s'", m->desc, file_names[m->type]);
1819: return -1;
1820: }
1821:
1822: ptr++;
1823: if (check_format_type(ptr, file_formats[m->type]) == -1) {
1824: /*
1825: * TODO: this error message is unhelpful if the format
1826: * string is not one character long
1827: */
1828: file_magwarn(ms, "Printf format `%c' is not valid for type "
1829: "`%s' in description `%s'", *ptr ? *ptr : '?',
1830: file_names[m->type], m->desc);
1831: return -1;
1832: }
1833:
1834: for (; *ptr; ptr++) {
1835: if (*ptr == '%') {
1836: file_magwarn(ms,
1837: "Too many format strings (should have at most one) "
1838: "for `%s' with description `%s'",
1839: file_names[m->type], m->desc);
1840: return -1;
1841: }
1842: }
1843: return 0;
1844: }
1845:
1846: /*
1847: * Read a numeric value from a pointer, into the value union of a magic
1848: * pointer, according to the magic type. Update the string pointer to point
1849: * just after the number read. Return 0 for success, non-zero for failure.
1850: */
1851: private int
1852: getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
1853: {
1854: switch (m->type) {
1855: case FILE_BESTRING16:
1856: case FILE_LESTRING16:
1857: case FILE_STRING:
1858: case FILE_PSTRING:
1859: case FILE_REGEX:
1860: case FILE_SEARCH:
1861: *p = getstr(ms, m, *p, action == FILE_COMPILE);
1862: if (*p == NULL) {
1863: if (ms->flags & MAGIC_CHECK)
1864: file_magwarn(ms, "cannot get string from `%s'",
1865: m->value.s);
1866: return -1;
1867: }
1868: return 0;
1869: case FILE_FLOAT:
1870: case FILE_BEFLOAT:
1871: case FILE_LEFLOAT:
1872: if (m->reln != 'x') {
1873: char *ep;
1874: #ifdef HAVE_STRTOF
1875: m->value.f = strtof(*p, &ep);
1876: #else
1877: m->value.f = (float)strtod(*p, &ep);
1878: #endif
1879: *p = ep;
1880: }
1881: return 0;
1882: case FILE_DOUBLE:
1883: case FILE_BEDOUBLE:
1884: case FILE_LEDOUBLE:
1885: if (m->reln != 'x') {
1886: char *ep;
1887: m->value.d = strtod(*p, &ep);
1888: *p = ep;
1889: }
1890: return 0;
1891: default:
1892: if (m->reln != 'x') {
1893: char *ep;
1894: m->value.q = file_signextend(ms, m,
1895: (uint64_t)strtoull(*p, &ep, 0));
1896: *p = ep;
1897: eatsize(p);
1898: }
1899: return 0;
1900: }
1901: }
1902:
1903: /*
1904: * Convert a string containing C character escapes. Stop at an unescaped
1905: * space or tab.
1906: * Copy the converted version to "m->value.s", and the length in m->vallen.
1907: * Return updated scan pointer as function result. Warn if set.
1908: */
1909: private const char *
1910: getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
1911: {
1912: const char *origs = s;
1913: char *p = m->value.s;
1914: size_t plen = sizeof(m->value.s);
1915: char *origp = p;
1916: char *pmax = p + plen - 1;
1917: int c;
1918: int val;
1919:
1920: while ((c = *s++) != '\0') {
1921: if (isspace((unsigned char) c))
1922: break;
1923: if (p >= pmax) {
1924: file_error(ms, 0, "string too long: `%s'", origs);
1925: return NULL;
1926: }
1927: if (c == '\\') {
1928: switch(c = *s++) {
1929:
1930: case '\0':
1931: if (warn)
1932: file_magwarn(ms, "incomplete escape");
1933: goto out;
1934:
1935: case '\t':
1936: if (warn) {
1937: file_magwarn(ms,
1938: "escaped tab found, use \\t instead");
1939: warn = 0; /* already did */
1940: }
1941: /*FALLTHROUGH*/
1942: default:
1943: if (warn) {
1944: if (isprint((unsigned char)c)) {
1945: /* Allow escaping of
1946: * ``relations'' */
1.1.1.2 ! misho 1947: if (strchr("<>&^=!", c) == NULL
! 1948: && (m->type != FILE_REGEX ||
! 1949: strchr("[]().*?^$|{}", c)
! 1950: == NULL)) {
1.1 misho 1951: file_magwarn(ms, "no "
1952: "need to escape "
1953: "`%c'", c);
1954: }
1955: } else {
1956: file_magwarn(ms,
1957: "unknown escape sequence: "
1958: "\\%03o", c);
1959: }
1960: }
1961: /*FALLTHROUGH*/
1962: /* space, perhaps force people to use \040? */
1963: case ' ':
1964: #if 0
1965: /*
1966: * Other things people escape, but shouldn't need to,
1967: * so we disallow them
1968: */
1969: case '\'':
1970: case '"':
1971: case '?':
1972: #endif
1973: /* Relations */
1974: case '>':
1975: case '<':
1976: case '&':
1977: case '^':
1978: case '=':
1979: case '!':
1980: /* and baskslash itself */
1981: case '\\':
1982: *p++ = (char) c;
1983: break;
1984:
1985: case 'a':
1986: *p++ = '\a';
1987: break;
1988:
1989: case 'b':
1990: *p++ = '\b';
1991: break;
1992:
1993: case 'f':
1994: *p++ = '\f';
1995: break;
1996:
1997: case 'n':
1998: *p++ = '\n';
1999: break;
2000:
2001: case 'r':
2002: *p++ = '\r';
2003: break;
2004:
2005: case 't':
2006: *p++ = '\t';
2007: break;
2008:
2009: case 'v':
2010: *p++ = '\v';
2011: break;
2012:
2013: /* \ and up to 3 octal digits */
2014: case '0':
2015: case '1':
2016: case '2':
2017: case '3':
2018: case '4':
2019: case '5':
2020: case '6':
2021: case '7':
2022: val = c - '0';
2023: c = *s++; /* try for 2 */
2024: if (c >= '0' && c <= '7') {
2025: val = (val << 3) | (c - '0');
2026: c = *s++; /* try for 3 */
2027: if (c >= '0' && c <= '7')
2028: val = (val << 3) | (c-'0');
2029: else
2030: --s;
2031: }
2032: else
2033: --s;
2034: *p++ = (char)val;
2035: break;
2036:
2037: /* \x and up to 2 hex digits */
2038: case 'x':
2039: val = 'x'; /* Default if no digits */
2040: c = hextoint(*s++); /* Get next char */
2041: if (c >= 0) {
2042: val = c;
2043: c = hextoint(*s++);
2044: if (c >= 0)
2045: val = (val << 4) + c;
2046: else
2047: --s;
2048: } else
2049: --s;
2050: *p++ = (char)val;
2051: break;
2052: }
2053: } else
2054: *p++ = (char)c;
2055: }
2056: out:
2057: *p = '\0';
1.1.1.2 ! misho 2058: m->vallen = CAST(unsigned char, (p - origp));
1.1 misho 2059: if (m->type == FILE_PSTRING)
1.1.1.2 ! misho 2060: m->vallen += (unsigned char)file_pstring_length_size(m);
1.1 misho 2061: return s;
2062: }
2063:
2064:
2065: /* Single hex char to int; -1 if not a hex char. */
2066: private int
2067: hextoint(int c)
2068: {
2069: if (!isascii((unsigned char) c))
2070: return -1;
2071: if (isdigit((unsigned char) c))
2072: return c - '0';
2073: if ((c >= 'a') && (c <= 'f'))
2074: return c + 10 - 'a';
2075: if (( c>= 'A') && (c <= 'F'))
2076: return c + 10 - 'A';
2077: return -1;
2078: }
2079:
2080:
2081: /*
2082: * Print a string containing C character escapes.
2083: */
2084: protected void
2085: file_showstr(FILE *fp, const char *s, size_t len)
2086: {
2087: char c;
2088:
2089: for (;;) {
2090: if (len == ~0U) {
1.1.1.2 ! misho 2091: c = *s++;
1.1 misho 2092: if (c == '\0')
2093: break;
2094: }
2095: else {
2096: if (len-- == 0)
2097: break;
1.1.1.2 ! misho 2098: c = *s++;
1.1 misho 2099: }
2100: if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
2101: (void) fputc(c, fp);
2102: else {
2103: (void) fputc('\\', fp);
2104: switch (c) {
2105: case '\a':
2106: (void) fputc('a', fp);
2107: break;
2108:
2109: case '\b':
2110: (void) fputc('b', fp);
2111: break;
2112:
2113: case '\f':
2114: (void) fputc('f', fp);
2115: break;
2116:
2117: case '\n':
2118: (void) fputc('n', fp);
2119: break;
2120:
2121: case '\r':
2122: (void) fputc('r', fp);
2123: break;
2124:
2125: case '\t':
2126: (void) fputc('t', fp);
2127: break;
2128:
2129: case '\v':
2130: (void) fputc('v', fp);
2131: break;
2132:
2133: default:
2134: (void) fprintf(fp, "%.3o", c & 0377);
2135: break;
2136: }
2137: }
2138: }
2139: }
2140:
2141: /*
2142: * eatsize(): Eat the size spec from a number [eg. 10UL]
2143: */
2144: private void
2145: eatsize(const char **p)
2146: {
2147: const char *l = *p;
2148:
2149: if (LOWCASE(*l) == 'u')
2150: l++;
2151:
2152: switch (LOWCASE(*l)) {
2153: case 'l': /* long */
2154: case 's': /* short */
2155: case 'h': /* short */
2156: case 'b': /* char/byte */
2157: case 'c': /* char/byte */
2158: l++;
2159: /*FALLTHROUGH*/
2160: default:
2161: break;
2162: }
2163:
2164: *p = l;
2165: }
2166:
2167: /*
2168: * handle a compiled file.
2169: * return -1 = error
2170: * return 1 = memory structure you can free
2171: * return 3 = bundled library from PHP
2172: */
2173: private int
2174: apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
2175: const char *fn)
2176: {
2177: uint32_t *ptr;
2178: uint32_t version;
2179: int needsbyteswap;
2180: char *dbname = NULL;
2181: void *mm = NULL;
2182: int ret = 0;
2183: php_stream *stream = NULL;
2184: php_stream_statbuf st;
2185:
2186:
2187: TSRMLS_FETCH();
2188:
2189: if (fn == NULL) {
2190: mm = (void *)&php_magic_database;
2191: ret = 3;
2192: goto internal_loaded;
2193: }
2194:
2195: dbname = mkdbname(ms, fn, 0);
2196: if (dbname == NULL)
2197: goto error2;
2198:
2199: #if PHP_API_VERSION < 20100412
2200: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
2201: #else
2202: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
2203: #endif
2204:
2205: if (!stream) {
2206: goto error2;
2207: }
2208:
2209: if (php_stream_stat(stream, &st) < 0) {
2210: file_error(ms, errno, "cannot stat `%s'", dbname);
2211: goto error1;
2212: }
2213:
2214: if (st.sb.st_size < 8) {
2215: file_error(ms, 0, "file `%s' is too small", dbname);
2216: goto error1;
2217: }
2218:
2219: mm = emalloc((size_t)st.sb.st_size);
2220: if (php_stream_read(stream, mm, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
2221: file_badread(ms);
2222: goto error1;
2223: }
1.1.1.2 ! misho 2224: ret = 1;
1.1 misho 2225:
2226: php_stream_close(stream);
2227: stream = NULL;
2228:
2229: internal_loaded:
2230: *magicp = mm;
2231: ptr = (uint32_t *)(void *)*magicp;
2232: if (*ptr != MAGICNO) {
2233: if (swap4(*ptr) != MAGICNO) {
2234: file_error(ms, 0, "bad magic in `%s'", dbname);
2235: goto error1;
2236: }
2237: needsbyteswap = 1;
1.1.1.2 ! misho 2238: } else
1.1 misho 2239: needsbyteswap = 0;
2240: if (needsbyteswap)
2241: version = swap4(ptr[1]);
2242: else
2243: version = ptr[1];
2244: if (version != VERSIONNO) {
2245: file_error(ms, 0, "File %d.%d supports only version %d magic "
2246: "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
2247: VERSIONNO, dbname, version);
2248: goto error1;
2249: }
2250:
2251: /* php_magic_database is a const, performing writes will segfault. This is for big-endian
2252: machines only, PPC and Sparc specifically. Consider static variable or MINIT in
2253: future. */
2254: if (needsbyteswap && fn == NULL) {
2255: mm = emalloc(sizeof(php_magic_database));
2256: mm = memcpy(mm, php_magic_database, sizeof(php_magic_database));
2257: *magicp = mm;
2258: ret = 1;
2259: }
2260:
2261: if (fn == NULL) {
2262: *nmagicp = (sizeof(php_magic_database) / sizeof(struct magic));
2263: } else {
2264: *nmagicp = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2265: }
2266: if (*nmagicp > 0) {
2267: (*nmagicp)--;
2268: }
2269: (*magicp)++;
2270: if (needsbyteswap) {
2271: byteswap(*magicp, *nmagicp);
2272: }
2273:
2274: if (dbname) {
2275: efree(dbname);
2276: }
2277: return ret;
2278:
2279: error1:
2280: if (stream) {
2281: php_stream_close(stream);
2282: }
2283:
2284: if (mm && ret == 1) {
2285: efree(mm);
2286: } else {
2287: *magicp = NULL;
2288: *nmagicp = 0;
2289: }
2290: error2:
2291: if (dbname) {
2292: efree(dbname);
2293: }
2294: return -1;
2295: }
2296:
2297: private const uint32_t ar[] = {
2298: MAGICNO, VERSIONNO
2299: };
2300: /*
2301: * handle an mmaped file.
2302: */
2303: private int
2304: apprentice_compile(struct magic_set *ms, struct magic **magicp,
2305: uint32_t *nmagicp, const char *fn)
2306: {
2307: char *dbname;
2308: int rv = -1;
2309: php_stream *stream;
2310:
2311: TSRMLS_FETCH();
2312:
2313: dbname = mkdbname(ms, fn, 0);
2314:
1.1.1.2 ! misho 2315: if (dbname == NULL)
1.1 misho 2316: goto out;
2317:
2318: /* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
2319: #if PHP_API_VERSION < 20100412
2320: stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
2321: #else
2322: stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
2323: #endif
2324:
2325: if (!stream) {
2326: file_error(ms, errno, "cannot open `%s'", dbname);
2327: goto out;
2328: }
2329:
2330: if (php_stream_write(stream, (char *)ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
2331: file_error(ms, errno, "error writing `%s'", dbname);
2332: goto out;
2333: }
2334:
2335: if (php_stream_seek(stream,(off_t)sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) {
2336: file_error(ms, errno, "error seeking `%s'", dbname);
2337: goto out;
2338: }
2339:
2340: if (php_stream_write(stream, (char *)*magicp, (sizeof(struct magic) * *nmagicp) != (ssize_t)(sizeof(struct magic) * *nmagicp))) {
2341: file_error(ms, errno, "error writing `%s'", dbname);
2342: goto out;
2343: }
2344:
2345: php_stream_close(stream);
2346:
2347: rv = 0;
2348: out:
2349: efree(dbname);
2350: return rv;
2351: }
2352:
2353: private const char ext[] = ".mgc";
2354: /*
2355: * make a dbname
2356: */
2357: private char *
2358: mkdbname(struct magic_set *ms, const char *fn, int strip)
2359: {
2360: const char *p, *q;
2361: char *buf;
2362: TSRMLS_FETCH();
2363:
2364: if (strip) {
2365: if ((p = strrchr(fn, '/')) != NULL)
2366: fn = ++p;
2367: }
2368:
2369: for (q = fn; *q; q++)
2370: continue;
2371: /* Look for .mgc */
2372: for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
2373: if (*p != *q)
2374: break;
2375:
2376: /* Did not find .mgc, restore q */
2377: if (p >= ext)
2378: while (*q)
2379: q++;
2380:
2381: q++;
2382: /* Compatibility with old code that looked in .mime */
2383: if (ms->flags & MAGIC_MIME) {
2384: spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext);
2385: if (VCWD_ACCESS(buf, R_OK) != -1) {
2386: ms->flags &= MAGIC_MIME_TYPE;
2387: return buf;
2388: }
2389: efree(buf);
2390: }
2391: spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext);
2392:
2393: /* Compatibility with old code that looked in .mime */
2394: if (strstr(p, ".mime") != NULL)
2395: ms->flags &= MAGIC_MIME_TYPE;
2396: return buf;
2397: }
2398:
2399: /*
2400: * Byteswap an mmap'ed file if needed
2401: */
2402: private void
2403: byteswap(struct magic *magic, uint32_t nmagic)
2404: {
2405: uint32_t i;
2406: for (i = 0; i < nmagic; i++)
2407: bs1(&magic[i]);
2408: }
2409:
2410: /*
2411: * swap a short
2412: */
2413: private uint16_t
2414: swap2(uint16_t sv)
2415: {
2416: uint16_t rv;
2417: uint8_t *s = (uint8_t *)(void *)&sv;
2418: uint8_t *d = (uint8_t *)(void *)&rv;
2419: d[0] = s[1];
2420: d[1] = s[0];
2421: return rv;
2422: }
2423:
2424: /*
2425: * swap an int
2426: */
2427: private uint32_t
2428: swap4(uint32_t sv)
2429: {
2430: uint32_t rv;
2431: uint8_t *s = (uint8_t *)(void *)&sv;
2432: uint8_t *d = (uint8_t *)(void *)&rv;
2433: d[0] = s[3];
2434: d[1] = s[2];
2435: d[2] = s[1];
2436: d[3] = s[0];
2437: return rv;
2438: }
2439:
2440: /*
2441: * swap a quad
2442: */
2443: private uint64_t
2444: swap8(uint64_t sv)
2445: {
2446: uint64_t rv;
2447: uint8_t *s = (uint8_t *)(void *)&sv;
2448: uint8_t *d = (uint8_t *)(void *)&rv;
2449: #if 0
2450: d[0] = s[3];
2451: d[1] = s[2];
2452: d[2] = s[1];
2453: d[3] = s[0];
2454: d[4] = s[7];
2455: d[5] = s[6];
2456: d[6] = s[5];
2457: d[7] = s[4];
2458: #else
2459: d[0] = s[7];
2460: d[1] = s[6];
2461: d[2] = s[5];
2462: d[3] = s[4];
2463: d[4] = s[3];
2464: d[5] = s[2];
2465: d[6] = s[1];
2466: d[7] = s[0];
2467: #endif
2468: return rv;
2469: }
2470:
2471: /*
2472: * byteswap a single magic entry
2473: */
2474: private void
2475: bs1(struct magic *m)
2476: {
2477: m->cont_level = swap2(m->cont_level);
2478: m->offset = swap4((uint32_t)m->offset);
2479: m->in_offset = swap4((uint32_t)m->in_offset);
2480: m->lineno = swap4((uint32_t)m->lineno);
2481: if (IS_LIBMAGIC_STRING(m->type)) {
2482: m->str_range = swap4(m->str_range);
2483: m->str_flags = swap4(m->str_flags);
2484: }
2485: else {
2486: m->value.q = swap8(m->value.q);
2487: m->num_mask = swap8(m->num_mask);
2488: }
2489: }
1.1.1.2 ! misho 2490:
! 2491: protected size_t
! 2492: file_pstring_length_size(const struct magic *m)
! 2493: {
! 2494: switch (m->str_flags & PSTRING_LEN) {
! 2495: case PSTRING_1_LE:
! 2496: return 1;
! 2497: case PSTRING_2_LE:
! 2498: case PSTRING_2_BE:
! 2499: return 2;
! 2500: case PSTRING_4_LE:
! 2501: case PSTRING_4_BE:
! 2502: return 4;
! 2503: default:
! 2504: abort(); /* Impossible */
! 2505: return 1;
! 2506: }
! 2507: }
! 2508: protected size_t
! 2509: file_pstring_get_length(const struct magic *m, const char *s)
! 2510: {
! 2511: size_t len = 0;
! 2512:
! 2513: switch (m->str_flags & PSTRING_LEN) {
! 2514: case PSTRING_1_LE:
! 2515: len = *s;
! 2516: break;
! 2517: case PSTRING_2_LE:
! 2518: len = (s[1] << 8) | s[0];
! 2519: break;
! 2520: case PSTRING_2_BE:
! 2521: len = (s[0] << 8) | s[1];
! 2522: break;
! 2523: case PSTRING_4_LE:
! 2524: len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
! 2525: break;
! 2526: case PSTRING_4_BE:
! 2527: len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
! 2528: break;
! 2529: default:
! 2530: abort(); /* Impossible */
! 2531: }
! 2532:
! 2533: if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
! 2534: len -= file_pstring_length_size(m);
! 2535:
! 2536: return len;
! 2537: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>