Annotation of embedaddon/php/ext/fileinfo/libmagic/apprentice.c, revision 1.1.1.4
1.1 misho 1: /*
2: * Copyright (c) Ian F. Darwin 1986-1995.
3: * Software written by Ian F. Darwin and others;
4: * maintained 1995-present by Christos Zoulas and others.
5: *
6: * Redistribution and use in source and binary forms, with or without
7: * modification, are permitted provided that the following conditions
8: * are met:
9: * 1. Redistributions of source code must retain the above copyright
10: * notice immediately at the beginning of the file, without modification,
11: * this list of conditions, and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26: * SUCH DAMAGE.
27: */
28: /*
29: * apprentice - make one pass through /etc/magic, learning its secrets.
30: */
31:
32: #include "php.h"
33:
34: #include "file.h"
35:
36: #ifndef lint
1.1.1.3 misho 37: FILE_RCSID("@(#)$File: apprentice.c,v 1.191 2013/02/26 21:02:48 christos Exp $")
1.1 misho 38: #endif /* lint */
39:
40: #include "magic.h"
41: #include "patchlevel.h"
42: #include <stdlib.h>
43:
44: #if defined(__hpux) && !defined(HAVE_STRTOULL)
45: #if SIZEOF_LONG == 8
46: # define strtoull strtoul
47: #else
48: # define strtoull __strtoull
49: #endif
50: #endif
51:
52: #ifdef PHP_WIN32
53: #include "win32/unistd.h"
54: #if _MSC_VER <= 1300
55: # include "win32/php_strtoi64.h"
56: #endif
57: #define strtoull _strtoui64
58: #else
59: #include <unistd.h>
60: #endif
61: #include <string.h>
62: #include <assert.h>
63: #include <ctype.h>
64: #include <fcntl.h>
65:
66: #define EATAB {while (isascii((unsigned char) *l) && \
67: isspace((unsigned char) *l)) ++l;}
68: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
69: tolower((unsigned char) (l)) : (l))
70: /*
71: * Work around a bug in headers on Digital Unix.
72: * At least confirmed for: OSF1 V4.0 878
73: */
74: #if defined(__osf__) && defined(__DECC)
75: #ifdef MAP_FAILED
76: #undef MAP_FAILED
77: #endif
78: #endif
79:
80: #ifndef MAP_FAILED
81: #define MAP_FAILED (void *) -1
82: #endif
83:
84: #ifndef MAP_FILE
85: #define MAP_FILE 0
86: #endif
87:
1.1.1.3 misho 88: #define ALLOC_CHUNK (size_t)10
89: #define ALLOC_INCR (size_t)200
90:
1.1 misho 91: struct magic_entry {
92: struct magic *mp;
93: uint32_t cont_count;
94: uint32_t max_count;
95: };
96:
1.1.1.3 misho 97: struct magic_map {
98: void *p;
99: size_t len;
100: struct magic *magic[MAGIC_SETS];
101: uint32_t nmagic[MAGIC_SETS];
102: };
103:
1.1 misho 104: int file_formats[FILE_NAMES_SIZE];
105: const size_t file_nformats = FILE_NAMES_SIZE;
106: const char *file_names[FILE_NAMES_SIZE];
107: const size_t file_nnames = FILE_NAMES_SIZE;
108:
109: private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
110: private int hextoint(int);
111: private const char *getstr(struct magic_set *, struct magic *, const char *,
112: int);
1.1.1.3 misho 113: private int parse(struct magic_set *, struct magic_entry *, const char *,
114: size_t, int);
1.1 misho 115: private void eatsize(const char **);
1.1.1.3 misho 116: private int apprentice_1(struct magic_set *, const char *, int);
1.1 misho 117: private size_t apprentice_magic_strength(const struct magic *);
118: private int apprentice_sort(const void *, const void *);
1.1.1.2 misho 119: private void apprentice_list(struct mlist *, int );
1.1.1.3 misho 120: private struct magic_map *apprentice_load(struct magic_set *,
1.1 misho 121: const char *, int);
1.1.1.3 misho 122: private struct mlist *mlist_alloc(void);
123: private void mlist_free(struct mlist *);
1.1 misho 124: private void byteswap(struct magic *, uint32_t);
125: private void bs1(struct magic *);
126: private uint16_t swap2(uint16_t);
127: private uint32_t swap4(uint32_t);
128: private uint64_t swap8(uint64_t);
129: private char *mkdbname(struct magic_set *, const char *, int);
1.1.1.3 misho 130: private struct magic_map *apprentice_map(struct magic_set *, const char *);
131: private void apprentice_unmap(struct magic_map *);
132: private int apprentice_compile(struct magic_set *, struct magic_map *,
1.1 misho 133: const char *);
134: private int check_format_type(const char *, int);
135: private int check_format(struct magic_set *, struct magic *);
136: private int get_op(char);
137: private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
138: private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
139: private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
140:
1.1.1.3 misho 141:
142: private size_t maxmagic[MAGIC_SETS] = { 0 };
1.1 misho 143: private size_t magicsize = sizeof(struct magic);
144:
145: private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1.1.3 misho 146:
1.1 misho 147: private struct {
148: const char *name;
149: size_t len;
150: int (*fun)(struct magic_set *, struct magic_entry *, const char *);
151: } bang[] = {
152: #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
153: DECLARE_FIELD(mime),
154: DECLARE_FIELD(apple),
155: DECLARE_FIELD(strength),
156: #undef DECLARE_FIELD
157: { NULL, 0, NULL }
158: };
159:
160: #include "../data_file.c"
161:
1.1.1.3 misho 162: struct type_tbl_s {
1.1 misho 163: const char name[16];
164: const size_t len;
165: const int type;
166: const int format;
1.1.1.3 misho 167: };
168:
169: /*
170: * XXX - the actual Single UNIX Specification says that "long" means "long",
171: * as in the C data type, but we treat it as meaning "4-byte integer".
172: * Given that the OS X version of file 5.04 did the same, I guess that passes
173: * the actual test; having "long" be dependent on how big a "long" is on
174: * the machine running "file" is silly.
175: */
176: static const struct type_tbl_s type_tbl[] = {
1.1 misho 177: # define XX(s) s, (sizeof(s) - 1)
178: # define XX_NULL "", 0
1.1.1.3 misho 179: { XX("invalid"), FILE_INVALID, FILE_FMT_NONE },
1.1 misho 180: { XX("byte"), FILE_BYTE, FILE_FMT_NUM },
181: { XX("short"), FILE_SHORT, FILE_FMT_NUM },
182: { XX("default"), FILE_DEFAULT, FILE_FMT_STR },
183: { XX("long"), FILE_LONG, FILE_FMT_NUM },
184: { XX("string"), FILE_STRING, FILE_FMT_STR },
185: { XX("date"), FILE_DATE, FILE_FMT_STR },
186: { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM },
187: { XX("belong"), FILE_BELONG, FILE_FMT_NUM },
188: { XX("bedate"), FILE_BEDATE, FILE_FMT_STR },
189: { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM },
190: { XX("lelong"), FILE_LELONG, FILE_FMT_NUM },
191: { XX("ledate"), FILE_LEDATE, FILE_FMT_STR },
192: { XX("pstring"), FILE_PSTRING, FILE_FMT_STR },
193: { XX("ldate"), FILE_LDATE, FILE_FMT_STR },
194: { XX("beldate"), FILE_BELDATE, FILE_FMT_STR },
195: { XX("leldate"), FILE_LELDATE, FILE_FMT_STR },
196: { XX("regex"), FILE_REGEX, FILE_FMT_STR },
197: { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR },
198: { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR },
199: { XX("search"), FILE_SEARCH, FILE_FMT_STR },
200: { XX("medate"), FILE_MEDATE, FILE_FMT_STR },
201: { XX("meldate"), FILE_MELDATE, FILE_FMT_STR },
202: { XX("melong"), FILE_MELONG, FILE_FMT_NUM },
203: { XX("quad"), FILE_QUAD, FILE_FMT_QUAD },
204: { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD },
205: { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD },
206: { XX("qdate"), FILE_QDATE, FILE_FMT_STR },
207: { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR },
208: { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR },
209: { XX("qldate"), FILE_QLDATE, FILE_FMT_STR },
210: { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR },
211: { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR },
212: { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT },
213: { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT },
214: { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT },
215: { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE },
216: { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE },
217: { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE },
218: { XX("leid3"), FILE_LEID3, FILE_FMT_NUM },
219: { XX("beid3"), FILE_BEID3, FILE_FMT_NUM },
1.1.1.3 misho 220: { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM },
221: { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR },
222: { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR },
223: { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR },
224: { XX("name"), FILE_NAME, FILE_FMT_NONE },
225: { XX("use"), FILE_USE, FILE_FMT_NONE },
226: { XX_NULL, FILE_INVALID, FILE_FMT_NONE },
227: };
228:
229: /*
230: * These are not types, and cannot be preceded by "u" to make them
231: * unsigned.
232: */
233: static const struct type_tbl_s special_tbl[] = {
234: { XX("name"), FILE_NAME, FILE_FMT_STR },
235: { XX("use"), FILE_USE, FILE_FMT_STR },
1.1 misho 236: { XX_NULL, FILE_INVALID, FILE_FMT_NONE },
1.1.1.3 misho 237: };
1.1 misho 238: # undef XX
239: # undef XX_NULL
240:
241: #ifndef S_ISDIR
242: #define S_ISDIR(mode) ((mode) & _S_IFDIR)
243: #endif
244:
245: private int
1.1.1.3 misho 246: get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
1.1 misho 247: {
248: const struct type_tbl_s *p;
249:
1.1.1.3 misho 250: for (p = tbl; p->len; p++) {
1.1 misho 251: if (strncmp(l, p->name, p->len) == 0) {
252: if (t)
253: *t = l + p->len;
254: break;
255: }
256: }
257: return p->type;
258: }
259:
1.1.1.3 misho 260: private int
261: get_standard_integer_type(const char *l, const char **t)
262: {
263: int type;
264:
265: if (isalpha((unsigned char)l[1])) {
266: switch (l[1]) {
267: case 'C':
268: /* "dC" and "uC" */
269: type = FILE_BYTE;
270: break;
271: case 'S':
272: /* "dS" and "uS" */
273: type = FILE_SHORT;
274: break;
275: case 'I':
276: case 'L':
277: /*
278: * "dI", "dL", "uI", and "uL".
279: *
280: * XXX - the actual Single UNIX Specification says
281: * that "L" means "long", as in the C data type,
282: * but we treat it as meaning "4-byte integer".
283: * Given that the OS X version of file 5.04 did
284: * the same, I guess that passes the actual SUS
285: * validation suite; having "dL" be dependent on
286: * how big a "long" is on the machine running
287: * "file" is silly.
288: */
289: type = FILE_LONG;
290: break;
291: case 'Q':
292: /* "dQ" and "uQ" */
293: type = FILE_QUAD;
294: break;
295: default:
296: /* "d{anything else}", "u{anything else}" */
297: return FILE_INVALID;
298: }
299: l += 2;
300: } else if (isdigit((unsigned char)l[1])) {
301: /*
302: * "d{num}" and "u{num}"; we only support {num} values
303: * of 1, 2, 4, and 8 - the Single UNIX Specification
304: * doesn't say anything about whether arbitrary
305: * values should be supported, but both the Solaris 10
306: * and OS X Mountain Lion versions of file passed the
307: * Single UNIX Specification validation suite, and
308: * neither of them support values bigger than 8 or
309: * non-power-of-2 values.
310: */
311: if (isdigit((unsigned char)l[2])) {
312: /* Multi-digit, so > 9 */
313: return FILE_INVALID;
314: }
315: switch (l[1]) {
316: case '1':
317: type = FILE_BYTE;
318: break;
319: case '2':
320: type = FILE_SHORT;
321: break;
322: case '4':
323: type = FILE_LONG;
324: break;
325: case '8':
326: type = FILE_QUAD;
327: break;
328: default:
329: /* XXX - what about 3, 5, 6, or 7? */
330: return FILE_INVALID;
331: }
332: l += 2;
333: } else {
334: /*
335: * "d" or "u" by itself.
336: */
337: type = FILE_LONG;
338: ++l;
339: }
340: if (t)
341: *t = l;
342: return type;
343: }
344:
1.1 misho 345: private void
346: init_file_tables(void)
347: {
348: static int done = 0;
349: const struct type_tbl_s *p;
350:
351: if (done)
352: return;
353: done++;
354:
355: for (p = type_tbl; p->len; p++) {
356: assert(p->type < FILE_NAMES_SIZE);
357: file_names[p->type] = p->name;
358: file_formats[p->type] = p->format;
359: }
1.1.1.3 misho 360: assert(p - type_tbl == FILE_NAMES_SIZE);
361: }
362:
363: private int
364: add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
365: {
366: struct mlist *ml;
367:
368: if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
369: return -1;
370:
371: ml->map = idx == 0 ? map : NULL;
372: ml->magic = map->magic[idx];
373: ml->nmagic = map->nmagic[idx];
374:
375: mlp->prev->next = ml;
376: ml->prev = mlp->prev;
377: ml->next = mlp;
378: mlp->prev = ml;
379: return 0;
1.1 misho 380: }
381:
382: /*
383: * Handle one file or directory.
384: */
385: private int
1.1.1.3 misho 386: apprentice_1(struct magic_set *ms, const char *fn, int action)
1.1 misho 387: {
388: struct mlist *ml;
1.1.1.3 misho 389: struct magic_map *map;
390: size_t i;
1.1 misho 391:
392: if (magicsize != FILE_MAGICSIZE) {
393: file_error(ms, 0, "magic element size %lu != %lu",
1.1.1.3 misho 394: (unsigned long)sizeof(*map->magic[0]),
1.1 misho 395: (unsigned long)FILE_MAGICSIZE);
396: return -1;
397: }
398:
399: if (action == FILE_COMPILE) {
1.1.1.3 misho 400: map = apprentice_load(ms, fn, action);
401: if (map == NULL)
1.1 misho 402: return -1;
1.1.1.3 misho 403: return apprentice_compile(ms, map, fn);
1.1 misho 404: }
405:
1.1.1.3 misho 406: map = apprentice_map(ms, fn);
407: if (map == NULL) {
1.1 misho 408: if (fn) {
409: if (ms->flags & MAGIC_CHECK)
410: file_magwarn(ms, "using regular magic file `%s'", fn);
1.1.1.3 misho 411: map = apprentice_load(ms, fn, action);
1.1 misho 412: }
1.1.1.3 misho 413: if (map == NULL)
1.1 misho 414: return -1;
415: }
416:
1.1.1.3 misho 417: for (i = 0; i < MAGIC_SETS; i++) {
418: if (add_mlist(ms->mlist[i], map, i) == -1) {
419: file_oomem(ms, sizeof(*ml));
420: apprentice_unmap(map);
421: return -1;
422: }
1.1 misho 423: }
424:
1.1.1.2 misho 425: if (action == FILE_LIST) {
1.1.1.3 misho 426: for (i = 0; i < MAGIC_SETS; i++) {
427: printf("Set %zu:\nBinary patterns:\n", i);
428: apprentice_list(ms->mlist[i], BINTEST);
429: printf("Text patterns:\n");
430: apprentice_list(ms->mlist[i], TEXTTEST);
431: }
1.1.1.2 misho 432: }
433:
1.1 misho 434: return 0;
435: }
436:
437: protected void
1.1.1.3 misho 438: file_ms_free(struct magic_set *ms)
1.1 misho 439: {
1.1.1.3 misho 440: size_t i;
441: if (ms == NULL)
1.1 misho 442: return;
1.1.1.3 misho 443: for (i = 0; i < MAGIC_SETS; i++)
444: mlist_free(ms->mlist[i]);
445: if (ms->o.pbuf) {
446: efree(ms->o.pbuf);
447: }
448: if (ms->o.buf) {
449: efree(ms->o.buf);
450: }
451: if (ms->c.li) {
452: efree(ms->c.li);
453: }
454: efree(ms);
455: }
1.1 misho 456:
1.1.1.3 misho 457: protected struct magic_set *
458: file_ms_alloc(int flags)
459: {
460: struct magic_set *ms;
461: size_t i, len;
1.1 misho 462:
1.1.1.3 misho 463: if ((ms = CAST(struct magic_set *, ecalloc((size_t)1,
464: sizeof(struct magic_set)))) == NULL)
465: return NULL;
1.1 misho 466:
1.1.1.3 misho 467: if (magic_setflags(ms, flags) == -1) {
468: errno = EINVAL;
469: goto free;
470: }
471:
472: ms->o.buf = ms->o.pbuf = NULL;
473: len = (ms->c.len = 10) * sizeof(*ms->c.li);
474:
475: if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
476: goto free;
477:
478: ms->event_flags = 0;
479: ms->error = -1;
480: for (i = 0; i < MAGIC_SETS; i++)
481: ms->mlist[i] = NULL;
482: ms->file = "unknown";
483: ms->line = 0;
484: return ms;
485: free:
486: efree(ms);
487: return NULL;
488: }
489:
490: private void
491: apprentice_unmap(struct magic_map *map)
492: {
493: if (map == NULL)
494: return;
495: if (map->p != php_magic_database) {
1.1.1.4 ! misho 496: if (map->p == NULL) {
! 497: int j;
! 498: for (j = 0; j < MAGIC_SETS; j++) {
! 499: if (map->magic[j]) {
! 500: efree(map->magic[j]);
! 501: }
! 502: }
! 503: } else {
1.1.1.3 misho 504: efree(map->p);
505: }
506: }
507: efree(map);
508: }
509:
510: private struct mlist *
511: mlist_alloc(void)
512: {
513: struct mlist *mlist;
514: if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
515: return NULL;
1.1 misho 516: }
1.1.1.3 misho 517: mlist->next = mlist->prev = mlist;
518: return mlist;
519: }
520:
521: private void
522: mlist_free(struct mlist *mlist)
523: {
524: struct mlist *ml;
525:
526: if (mlist == NULL)
527: return;
528:
529: for (ml = mlist->next; ml != mlist;) {
530: struct mlist *next = ml->next;
531: if (ml->map)
532: apprentice_unmap(ml->map);
533: efree(ml);
534: ml = next;
535: }
536: efree(ml);
1.1 misho 537: }
538:
539: /* const char *fn: list of magic files and directories */
1.1.1.3 misho 540: protected int
1.1 misho 541: file_apprentice(struct magic_set *ms, const char *fn, int action)
542: {
543: char *p, *mfn;
544: int file_err, errs = -1;
1.1.1.3 misho 545: size_t i;
1.1.1.2 misho 546: /* XXX disabling default magic loading so the compiled in data is used */
547: #if 0
548: if ((fn = magic_getpath(fn, action)) == NULL)
1.1.1.3 misho 549: return -1;
1.1.1.2 misho 550: #endif
1.1 misho 551:
552: init_file_tables();
553:
554: if (fn == NULL)
555: fn = getenv("MAGIC");
556: if (fn == NULL) {
1.1.1.3 misho 557: for (i = 0; i < MAGIC_SETS; i++) {
558: mlist_free(ms->mlist[i]);
559: if ((ms->mlist[i] = mlist_alloc()) == NULL) {
560: file_oomem(ms, sizeof(*ms->mlist[i]));
561: return -1;
562: }
563: }
564: return apprentice_1(ms, fn, action);
1.1 misho 565: }
566:
1.1.1.3 misho 567: if ((mfn = estrdup(fn)) == NULL) {
568: file_oomem(ms, strlen(fn));
569: return -1;
570: }
1.1 misho 571:
1.1.1.3 misho 572: for (i = 0; i < MAGIC_SETS; i++) {
573: mlist_free(ms->mlist[i]);
574: if ((ms->mlist[i] = mlist_alloc()) == NULL) {
575: file_oomem(ms, sizeof(*ms->mlist[i]));
576: if (i != 0) {
577: --i;
578: do
579: mlist_free(ms->mlist[i]);
580: while (i != 0);
581: }
582: efree(mfn);
583: return -1;
584: }
585: }
586: fn = mfn;
1.1 misho 587:
588: while (fn) {
589: p = strchr(fn, PATHSEP);
590: if (p)
591: *p++ = '\0';
592: if (*fn == '\0')
593: break;
1.1.1.3 misho 594: file_err = apprentice_1(ms, fn, action);
1.1 misho 595: errs = MAX(errs, file_err);
596: fn = p;
597: }
1.1.1.3 misho 598:
599: efree(mfn);
600:
1.1 misho 601: if (errs == -1) {
1.1.1.3 misho 602: for (i = 0; i < MAGIC_SETS; i++) {
603: mlist_free(ms->mlist[i]);
604: ms->mlist[i] = NULL;
605: }
606: file_error(ms, 0, "could not find any valid magic files!");
607: return -1;
608: }
609:
610: if (action == FILE_LOAD)
611: return 0;
612:
613: for (i = 0; i < MAGIC_SETS; i++) {
614: mlist_free(ms->mlist[i]);
615: ms->mlist[i] = NULL;
616: }
617:
618: switch (action) {
619: case FILE_COMPILE:
620: case FILE_CHECK:
621: case FILE_LIST:
622: return 0;
623: default:
624: file_error(ms, 0, "Invalid action %d", action);
625: return -1;
1.1 misho 626: }
627: }
628:
629: /*
630: * Get weight of this magic entry, for sorting purposes.
631: */
632: private size_t
633: apprentice_magic_strength(const struct magic *m)
634: {
635: #define MULT 10
636: size_t val = 2 * MULT; /* baseline strength */
637:
638: switch (m->type) {
639: case FILE_DEFAULT: /* make sure this sorts last */
640: if (m->factor_op != FILE_FACTOR_OP_NONE)
641: abort();
642: return 0;
643:
644: case FILE_BYTE:
645: val += 1 * MULT;
646: break;
647:
648: case FILE_SHORT:
649: case FILE_LESHORT:
650: case FILE_BESHORT:
651: val += 2 * MULT;
652: break;
653:
654: case FILE_LONG:
655: case FILE_LELONG:
656: case FILE_BELONG:
657: case FILE_MELONG:
658: val += 4 * MULT;
659: break;
660:
661: case FILE_PSTRING:
662: case FILE_STRING:
663: val += m->vallen * MULT;
664: break;
665:
666: case FILE_BESTRING16:
667: case FILE_LESTRING16:
668: val += m->vallen * MULT / 2;
669: break;
670:
671: case FILE_SEARCH:
672: case FILE_REGEX:
673: val += m->vallen * MAX(MULT / m->vallen, 1);
674: break;
675:
676: case FILE_DATE:
677: case FILE_LEDATE:
678: case FILE_BEDATE:
679: case FILE_MEDATE:
680: case FILE_LDATE:
681: case FILE_LELDATE:
682: case FILE_BELDATE:
683: case FILE_MELDATE:
684: case FILE_FLOAT:
685: case FILE_BEFLOAT:
686: case FILE_LEFLOAT:
687: val += 4 * MULT;
688: break;
689:
690: case FILE_QUAD:
691: case FILE_BEQUAD:
692: case FILE_LEQUAD:
693: case FILE_QDATE:
694: case FILE_LEQDATE:
695: case FILE_BEQDATE:
696: case FILE_QLDATE:
697: case FILE_LEQLDATE:
698: case FILE_BEQLDATE:
1.1.1.3 misho 699: case FILE_QWDATE:
700: case FILE_LEQWDATE:
701: case FILE_BEQWDATE:
1.1 misho 702: case FILE_DOUBLE:
703: case FILE_BEDOUBLE:
704: case FILE_LEDOUBLE:
705: val += 8 * MULT;
706: break;
707:
1.1.1.3 misho 708: case FILE_INDIRECT:
709: case FILE_NAME:
710: case FILE_USE:
711: break;
712:
1.1 misho 713: default:
714: val = 0;
715: (void)fprintf(stderr, "Bad type %d\n", m->type);
716: abort();
717: }
718:
719: switch (m->reln) {
720: case 'x': /* matches anything penalize */
721: case '!': /* matches almost anything penalize */
722: val = 0;
723: break;
724:
725: case '=': /* Exact match, prefer */
726: val += MULT;
727: break;
728:
729: case '>':
730: case '<': /* comparison match reduce strength */
731: val -= 2 * MULT;
732: break;
733:
734: case '^':
735: case '&': /* masking bits, we could count them too */
736: val -= MULT;
737: break;
738:
739: default:
740: (void)fprintf(stderr, "Bad relation %c\n", m->reln);
741: abort();
742: }
743:
744: if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */
745: val = 1;
746:
747: switch (m->factor_op) {
748: case FILE_FACTOR_OP_NONE:
749: break;
750: case FILE_FACTOR_OP_PLUS:
751: val += m->factor;
752: break;
753: case FILE_FACTOR_OP_MINUS:
754: val -= m->factor;
755: break;
756: case FILE_FACTOR_OP_TIMES:
757: val *= m->factor;
758: break;
759: case FILE_FACTOR_OP_DIV:
760: val /= m->factor;
761: break;
762: default:
763: abort();
764: }
765:
766: /*
767: * Magic entries with no description get a bonus because they depend
768: * on subsequent magic entries to print something.
769: */
770: if (m->desc[0] == '\0')
771: val++;
772: return val;
773: }
774:
775: /*
776: * Sort callback for sorting entries by "strength" (basically length)
777: */
778: private int
779: apprentice_sort(const void *a, const void *b)
780: {
1.1.1.3 misho 781: const struct magic_entry *ma = CAST(const struct magic_entry *, a);
782: const struct magic_entry *mb = CAST(const struct magic_entry *, b);
1.1 misho 783: size_t sa = apprentice_magic_strength(ma->mp);
784: size_t sb = apprentice_magic_strength(mb->mp);
785: if (sa == sb)
786: return 0;
787: else if (sa > sb)
788: return -1;
789: else
790: return 1;
791: }
792:
1.1.1.2 misho 793: /*
794: * Shows sorted patterns list in the order which is used for the matching
795: */
796: private void
797: apprentice_list(struct mlist *mlist, int mode)
798: {
799: uint32_t magindex = 0;
800: struct mlist *ml;
801: for (ml = mlist->next; ml != mlist; ml = ml->next) {
802: for (magindex = 0; magindex < ml->nmagic; magindex++) {
803: struct magic *m = &ml->magic[magindex];
804: if ((m->flag & mode) != mode) {
805: /* Skip sub-tests */
806: while (magindex + 1 < ml->nmagic &&
807: ml->magic[magindex + 1].cont_level != 0)
808: ++magindex;
809: continue; /* Skip to next top-level test*/
810: }
811:
812: /*
813: * Try to iterate over the tree until we find item with
814: * description/mimetype.
815: */
816: while (magindex + 1 < ml->nmagic &&
817: ml->magic[magindex + 1].cont_level != 0 &&
818: *ml->magic[magindex].desc == '\0' &&
819: *ml->magic[magindex].mimetype == '\0')
820: magindex++;
821:
822: printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n",
823: apprentice_magic_strength(m),
824: ml->magic[magindex].desc,
825: ml->magic[magindex].mimetype);
826: }
827: }
828: }
829:
1.1 misho 830: private void
831: set_test_type(struct magic *mstart, struct magic *m)
832: {
833: switch (m->type) {
834: case FILE_BYTE:
835: case FILE_SHORT:
836: case FILE_LONG:
837: case FILE_DATE:
838: case FILE_BESHORT:
839: case FILE_BELONG:
840: case FILE_BEDATE:
841: case FILE_LESHORT:
842: case FILE_LELONG:
843: case FILE_LEDATE:
844: case FILE_LDATE:
845: case FILE_BELDATE:
846: case FILE_LELDATE:
847: case FILE_MEDATE:
848: case FILE_MELDATE:
849: case FILE_MELONG:
850: case FILE_QUAD:
851: case FILE_LEQUAD:
852: case FILE_BEQUAD:
853: case FILE_QDATE:
854: case FILE_LEQDATE:
855: case FILE_BEQDATE:
856: case FILE_QLDATE:
857: case FILE_LEQLDATE:
858: case FILE_BEQLDATE:
1.1.1.3 misho 859: case FILE_QWDATE:
860: case FILE_LEQWDATE:
861: case FILE_BEQWDATE:
1.1 misho 862: case FILE_FLOAT:
863: case FILE_BEFLOAT:
864: case FILE_LEFLOAT:
865: case FILE_DOUBLE:
866: case FILE_BEDOUBLE:
867: case FILE_LEDOUBLE:
1.1.1.2 misho 868: mstart->flag |= BINTEST;
869: break;
1.1 misho 870: case FILE_STRING:
871: case FILE_PSTRING:
872: case FILE_BESTRING16:
873: case FILE_LESTRING16:
1.1.1.2 misho 874: /* Allow text overrides */
875: if (mstart->str_flags & STRING_TEXTTEST)
876: mstart->flag |= TEXTTEST;
877: else
878: mstart->flag |= BINTEST;
1.1 misho 879: break;
880: case FILE_REGEX:
881: case FILE_SEARCH:
1.1.1.2 misho 882: /* Check for override */
883: if (mstart->str_flags & STRING_BINTEST)
884: mstart->flag |= BINTEST;
885: if (mstart->str_flags & STRING_TEXTTEST)
886: mstart->flag |= TEXTTEST;
887:
888: if (mstart->flag & (TEXTTEST|BINTEST))
889: break;
890:
1.1 misho 891: /* binary test if pattern is not text */
892: if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
893: NULL) <= 0)
894: mstart->flag |= BINTEST;
1.1.1.2 misho 895: else
896: mstart->flag |= TEXTTEST;
1.1 misho 897: break;
898: case FILE_DEFAULT:
899: /* can't deduce anything; we shouldn't see this at the
900: top level anyway */
901: break;
902: case FILE_INVALID:
903: default:
904: /* invalid search type, but no need to complain here */
905: break;
906: }
907: }
908:
1.1.1.3 misho 909: private int
910: addentry(struct magic_set *ms, struct magic_entry *me,
911: struct magic_entry **mentry, uint32_t *mentrycount)
912: {
913: size_t i = me->mp->type == FILE_NAME ? 1 : 0;
914: if (mentrycount[i] == maxmagic[i]) {
915: struct magic_entry *mp;
916:
917: maxmagic[i] += ALLOC_INCR;
918: if ((mp = CAST(struct magic_entry *,
919: erealloc(mentry[i], sizeof(*mp) * maxmagic[i]))) ==
920: NULL) {
921: file_oomem(ms, sizeof(*mp) * maxmagic[i]);
922: return -1;
923: }
924: (void)memset(&mp[mentrycount[i]], 0, sizeof(*mp) *
925: ALLOC_INCR);
926: mentry[i] = mp;
927: }
928: mentry[i][mentrycount[i]++] = *me;
929: memset(me, 0, sizeof(*me));
930: return 0;
931: }
932:
1.1 misho 933: /*
934: * Load and parse one file.
935: */
936: private void
937: load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1.1.1.3 misho 938: struct magic_entry **mentry, uint32_t *mentrycount)
1.1 misho 939: {
940: char buffer[BUFSIZ + 1];
1.1.1.2 misho 941: char *line = NULL;
942: size_t len;
1.1 misho 943: size_t lineno = 0;
1.1.1.3 misho 944: struct magic_entry me;
1.1 misho 945:
946: php_stream *stream;
947:
948: TSRMLS_FETCH();
949:
1.1.1.3 misho 950: ms->file = fn;
1.1 misho 951: #if PHP_API_VERSION < 20100412
952: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
953: #else
954: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
955: #endif
956:
957: if (stream == NULL) {
958: if (errno != ENOENT)
959: file_error(ms, errno, "cannot read magic file `%s'",
960: fn);
961: (*errs)++;
1.1.1.2 misho 962: return;
963: }
1.1 misho 964:
1.1.1.3 misho 965: memset(&me, 0, sizeof(me));
966: /* read and parse this file */
1.1.1.2 misho 967: for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
968: if (len == 0) /* null line, garbage, etc */
969: continue;
970: if (line[len - 1] == '\n') {
971: lineno++;
972: line[len - 1] = '\0'; /* delete newline */
973: }
974: switch (line[0]) {
975: case '\0': /* empty, do not parse */
976: case '#': /* comment, do not parse */
977: continue;
978: case '!':
979: if (line[1] == ':') {
1.1 misho 980: size_t i;
981:
982: for (i = 0; bang[i].name != NULL; i++) {
1.1.1.2 misho 983: if ((size_t)(len - 2) > bang[i].len &&
1.1 misho 984: memcmp(bang[i].name, line + 2,
985: bang[i].len) == 0)
986: break;
987: }
988: if (bang[i].name == NULL) {
989: file_error(ms, 0,
990: "Unknown !: entry `%s'", line);
991: (*errs)++;
992: continue;
993: }
1.1.1.3 misho 994: if (me.mp == NULL) {
1.1 misho 995: file_error(ms, 0,
996: "No current entry for :!%s type",
997: bang[i].name);
998: (*errs)++;
999: continue;
1000: }
1.1.1.3 misho 1001: if ((*bang[i].fun)(ms, &me,
1.1 misho 1002: line + bang[i].len + 2) != 0) {
1003: (*errs)++;
1004: continue;
1005: }
1006: continue;
1007: }
1.1.1.2 misho 1008: /*FALLTHROUGH*/
1009: default:
1.1.1.3 misho 1010: again:
1011: switch (parse(ms, &me, line, lineno, action)) {
1012: case 0:
1013: continue;
1014: case 1:
1015: (void)addentry(ms, &me, mentry, mentrycount);
1016: goto again;
1017: default:
1.1 misho 1018: (*errs)++;
1.1.1.2 misho 1019: break;
1.1 misho 1020: }
1021: }
1.1.1.3 misho 1022: }
1023: if (me.mp)
1024: (void)addentry(ms, &me, mentry, mentrycount);
1.1.1.2 misho 1025: php_stream_close(stream);
1.1 misho 1026: }
1027:
1028: /*
1029: * parse a file or directory of files
1030: * const char *fn: name of magic file or directory
1031: */
1032: private int
1.1.1.2 misho 1033: cmpstrp(const void *p1, const void *p2)
1034: {
1035: return strcmp(*(char *const *)p1, *(char *const *)p2);
1036: }
1037:
1.1.1.3 misho 1038:
1039: private uint32_t
1040: set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1041: uint32_t starttest)
1042: {
1043: static const char text[] = "text";
1044: static const char binary[] = "binary";
1045: static const size_t len = sizeof(text);
1046:
1047: uint32_t i = starttest;
1048:
1049: do {
1050: set_test_type(me[starttest].mp, me[i].mp);
1051: if ((ms->flags & MAGIC_DEBUG) == 0)
1052: continue;
1053: (void)fprintf(stderr, "%s%s%s: %s\n",
1054: me[i].mp->mimetype,
1055: me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1056: me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1057: me[i].mp->flag & BINTEST ? binary : text);
1058: if (me[i].mp->flag & BINTEST) {
1059: char *p = strstr(me[i].mp->desc, text);
1060: if (p && (p == me[i].mp->desc ||
1061: isspace((unsigned char)p[-1])) &&
1062: (p + len - me[i].mp->desc == MAXstring
1063: || (p[len] == '\0' ||
1064: isspace((unsigned char)p[len]))))
1065: (void)fprintf(stderr, "*** Possible "
1066: "binary test for text type\n");
1067: }
1068: } while (++i < nme && me[i].mp->cont_level != 0);
1069: return i;
1070: }
1071:
1072: private void
1073: set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1074: {
1075: uint32_t i;
1076: for (i = 0; i < nme; i++) {
1077: if (me[i].mp->cont_level == 0 &&
1078: me[i].mp->type == FILE_DEFAULT) {
1079: while (++i < nme)
1080: if (me[i].mp->cont_level == 0)
1081: break;
1082: if (i != nme) {
1083: /* XXX - Ugh! */
1084: ms->line = me[i].mp->lineno;
1085: file_magwarn(ms,
1086: "level 0 \"default\" did not sort last");
1087: }
1088: return;
1089: }
1090: }
1091: }
1092:
1.1.1.2 misho 1093: private int
1.1.1.3 misho 1094: coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1095: struct magic **ma, uint32_t *nma)
1.1 misho 1096: {
1.1.1.3 misho 1097: uint32_t i, mentrycount = 0;
1098: size_t slen;
1099:
1100: for (i = 0; i < nme; i++)
1101: mentrycount += me[i].cont_count;
1102:
1103: slen = sizeof(**ma) * mentrycount;
1104: if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
1105: file_oomem(ms, slen);
1106: return -1;
1107: }
1108:
1109: mentrycount = 0;
1110: for (i = 0; i < nme; i++) {
1111: (void)memcpy(*ma + mentrycount, me[i].mp,
1112: me[i].cont_count * sizeof(**ma));
1113: mentrycount += me[i].cont_count;
1114: }
1115: *nma = mentrycount;
1116: return 0;
1117: }
1118:
1119: private void
1120: magic_entry_free(struct magic_entry *me, uint32_t nme)
1121: {
1122: uint32_t i;
1123: if (me == NULL)
1124: return;
1125: for (i = 0; i < nme; i++)
1126: efree(me[i].mp);
1127: efree(me);
1128: }
1129:
1130: private struct magic_map *
1131: apprentice_load(struct magic_set *ms, const char *fn, int action)
1132: {
1133: int errs = 0;
1134: struct magic_entry *mentry[MAGIC_SETS] = { NULL };
1135: uint32_t mentrycount[MAGIC_SETS] = { 0 };
1136: uint32_t i, j;
1.1.1.2 misho 1137: size_t files = 0, maxfiles = 0;
1.1.1.3 misho 1138: char **filearr = NULL;
1.1 misho 1139: struct stat st;
1.1.1.3 misho 1140: struct magic_map *map;
1141: php_stream *dir;
1142: php_stream_dirent d;
1143:
1144: TSRMLS_FETCH();
1.1 misho 1145:
1146: ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */
1147:
1.1.1.3 misho 1148: if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
1149: file_oomem(ms, sizeof(*map));
1150: return NULL;
1151: }
1.1 misho 1152:
1153: /* print silly verbose header for USG compat. */
1154: if (action == FILE_CHECK)
1155: (void)fprintf(stderr, "%s\n", usg_hdr);
1156:
1.1.1.3 misho 1157: {
1158: /* XXX the maxmagic has to be reset each time we load some new magic file.
1159: Where file commando is used it's not essential as the CLI process
1160: ends, multiple loading within the same process wouldn't work. */
1161: int k;
1162: for (k = 0; k < MAGIC_SETS; k++) {
1163: maxmagic[k] = 0;
1164: }
1165: }
1166:
1.1 misho 1167: /* load directory or file */
1.1.1.3 misho 1168: /* FIXME: Read file names and sort them to prevent
1169: non-determinism. See Debian bug #488562. */
1.1 misho 1170: if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1.1.1.3 misho 1171: int mflen;
1172: char mfn[MAXPATHLEN];
1173:
1174: dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1.1.1.2 misho 1175: if (!dir) {
1176: errs++;
1177: goto out;
1178: }
1.1.1.3 misho 1179: while (php_stream_readdir(dir, &d)) {
1180: if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1.1.1.2 misho 1181: file_oomem(ms,
1.1.1.3 misho 1182: strlen(fn) + strlen(d.d_name) + 2);
1.1.1.2 misho 1183: errs++;
1.1.1.3 misho 1184: php_stream_closedir(dir);
1.1.1.2 misho 1185: goto out;
1186: }
1187: if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1188: continue;
1189: }
1190: if (files >= maxfiles) {
1191: size_t mlen;
1192: maxfiles = (maxfiles + 1) * 2;
1193: mlen = maxfiles * sizeof(*filearr);
1194: if ((filearr = CAST(char **,
1.1.1.3 misho 1195: erealloc(filearr, mlen))) == NULL) {
1.1.1.2 misho 1196: file_oomem(ms, mlen);
1.1.1.3 misho 1197: php_stream_closedir(dir);
1.1.1.2 misho 1198: errs++;
1199: goto out;
1.1 misho 1200: }
1201: }
1.1.1.3 misho 1202: filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1.1.1.2 misho 1203: }
1.1.1.3 misho 1204: php_stream_closedir(dir);
1.1.1.2 misho 1205: qsort(filearr, files, sizeof(*filearr), cmpstrp);
1206: for (i = 0; i < files; i++) {
1.1.1.3 misho 1207: load_1(ms, action, filearr[i], &errs, mentry,
1208: mentrycount);
1209: efree(filearr[i]);
1.1.1.2 misho 1210: }
1.1.1.3 misho 1211: efree(filearr);
1.1 misho 1212: } else
1.1.1.3 misho 1213: load_1(ms, action, fn, &errs, mentry, mentrycount);
1.1 misho 1214: if (errs)
1215: goto out;
1216:
1.1.1.3 misho 1217: for (j = 0; j < MAGIC_SETS; j++) {
1218: /* Set types of tests */
1219: for (i = 0; i < mentrycount[j]; ) {
1220: if (mentry[j][i].mp->cont_level != 0) {
1221: i++;
1.1 misho 1222: continue;
1223: }
1.1.1.3 misho 1224: i = set_text_binary(ms, mentry[j], mentrycount[j], i);
1225: }
1226: qsort(mentry[j], mentrycount[j], sizeof(*mentry[j]),
1227: apprentice_sort);
1.1 misho 1228:
1.1.1.3 misho 1229: /*
1230: * Make sure that any level 0 "default" line is last
1231: * (if one exists).
1232: */
1233: set_last_default(ms, mentry[j], mentrycount[j]);
1.1 misho 1234:
1.1.1.3 misho 1235: /* coalesce per file arrays into a single one */
1236: if (coalesce_entries(ms, mentry[j], mentrycount[j],
1237: &map->magic[j], &map->nmagic[j]) == -1) {
1238: errs++;
1239: goto out;
1.1 misho 1240: }
1241: }
1242:
1243: out:
1.1.1.3 misho 1244: for (j = 0; j < MAGIC_SETS; j++)
1245: magic_entry_free(mentry[j], mentrycount[j]);
1246:
1.1 misho 1247: if (errs) {
1.1.1.3 misho 1248: for (j = 0; j < MAGIC_SETS; j++) {
1249: if (map->magic[j])
1250: efree(map->magic[j]);
1251: }
1252: efree(map);
1253: return NULL;
1.1 misho 1254: }
1.1.1.3 misho 1255: return map;
1.1 misho 1256: }
1257:
1258: /*
1259: * extend the sign bit if the comparison is to be signed
1260: */
1261: protected uint64_t
1262: file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1263: {
1264: if (!(m->flag & UNSIGNED)) {
1265: switch(m->type) {
1266: /*
1267: * Do not remove the casts below. They are
1268: * vital. When later compared with the data,
1269: * the sign extension must have happened.
1270: */
1271: case FILE_BYTE:
1272: v = (char) v;
1273: break;
1274: case FILE_SHORT:
1275: case FILE_BESHORT:
1276: case FILE_LESHORT:
1277: v = (short) v;
1278: break;
1279: case FILE_DATE:
1280: case FILE_BEDATE:
1281: case FILE_LEDATE:
1282: case FILE_MEDATE:
1283: case FILE_LDATE:
1284: case FILE_BELDATE:
1285: case FILE_LELDATE:
1286: case FILE_MELDATE:
1287: case FILE_LONG:
1288: case FILE_BELONG:
1289: case FILE_LELONG:
1290: case FILE_MELONG:
1291: case FILE_FLOAT:
1292: case FILE_BEFLOAT:
1293: case FILE_LEFLOAT:
1294: v = (int32_t) v;
1295: break;
1296: case FILE_QUAD:
1297: case FILE_BEQUAD:
1298: case FILE_LEQUAD:
1299: case FILE_QDATE:
1300: case FILE_QLDATE:
1.1.1.3 misho 1301: case FILE_QWDATE:
1.1 misho 1302: case FILE_BEQDATE:
1303: case FILE_BEQLDATE:
1.1.1.3 misho 1304: case FILE_BEQWDATE:
1.1 misho 1305: case FILE_LEQDATE:
1306: case FILE_LEQLDATE:
1.1.1.3 misho 1307: case FILE_LEQWDATE:
1.1 misho 1308: case FILE_DOUBLE:
1309: case FILE_BEDOUBLE:
1310: case FILE_LEDOUBLE:
1311: v = (int64_t) v;
1312: break;
1313: case FILE_STRING:
1314: case FILE_PSTRING:
1315: case FILE_BESTRING16:
1316: case FILE_LESTRING16:
1317: case FILE_REGEX:
1318: case FILE_SEARCH:
1319: case FILE_DEFAULT:
1320: case FILE_INDIRECT:
1.1.1.3 misho 1321: case FILE_NAME:
1322: case FILE_USE:
1.1 misho 1323: break;
1324: default:
1325: if (ms->flags & MAGIC_CHECK)
1326: file_magwarn(ms, "cannot happen: m->type=%d\n",
1327: m->type);
1328: return ~0U;
1329: }
1330: }
1331: return v;
1332: }
1333:
1334: private int
1335: string_modifier_check(struct magic_set *ms, struct magic *m)
1336: {
1337: if ((ms->flags & MAGIC_CHECK) == 0)
1338: return 0;
1339:
1.1.1.2 misho 1340: if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
1341: file_magwarn(ms,
1342: "'/BHhLl' modifiers are only allowed for pascal strings\n");
1343: return -1;
1344: }
1.1 misho 1345: switch (m->type) {
1346: case FILE_BESTRING16:
1347: case FILE_LESTRING16:
1348: if (m->str_flags != 0) {
1349: file_magwarn(ms,
1350: "no modifiers allowed for 16-bit strings\n");
1351: return -1;
1352: }
1353: break;
1354: case FILE_STRING:
1355: case FILE_PSTRING:
1356: if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1357: file_magwarn(ms,
1358: "'/%c' only allowed on regex and search\n",
1359: CHAR_REGEX_OFFSET_START);
1360: return -1;
1361: }
1362: break;
1363: case FILE_SEARCH:
1364: if (m->str_range == 0) {
1365: file_magwarn(ms,
1366: "missing range; defaulting to %d\n",
1367: STRING_DEFAULT_RANGE);
1368: m->str_range = STRING_DEFAULT_RANGE;
1369: return -1;
1370: }
1371: break;
1372: case FILE_REGEX:
1.1.1.2 misho 1373: if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1.1 misho 1374: file_magwarn(ms, "'/%c' not allowed on regex\n",
1.1.1.2 misho 1375: CHAR_COMPACT_WHITESPACE);
1.1 misho 1376: return -1;
1377: }
1.1.1.2 misho 1378: if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1.1 misho 1379: file_magwarn(ms, "'/%c' not allowed on regex\n",
1.1.1.2 misho 1380: CHAR_COMPACT_OPTIONAL_WHITESPACE);
1.1 misho 1381: return -1;
1382: }
1383: break;
1384: default:
1385: file_magwarn(ms, "coding error: m->type=%d\n",
1386: m->type);
1387: return -1;
1388: }
1389: return 0;
1390: }
1391:
1392: private int
1393: get_op(char c)
1394: {
1395: switch (c) {
1396: case '&':
1397: return FILE_OPAND;
1398: case '|':
1399: return FILE_OPOR;
1400: case '^':
1401: return FILE_OPXOR;
1402: case '+':
1403: return FILE_OPADD;
1404: case '-':
1405: return FILE_OPMINUS;
1406: case '*':
1407: return FILE_OPMULTIPLY;
1408: case '/':
1409: return FILE_OPDIVIDE;
1410: case '%':
1411: return FILE_OPMODULO;
1412: default:
1413: return -1;
1414: }
1415: }
1416:
1417: #ifdef ENABLE_CONDITIONALS
1418: private int
1419: get_cond(const char *l, const char **t)
1420: {
1421: static const struct cond_tbl_s {
1422: char name[8];
1423: size_t len;
1424: int cond;
1425: } cond_tbl[] = {
1426: { "if", 2, COND_IF },
1427: { "elif", 4, COND_ELIF },
1428: { "else", 4, COND_ELSE },
1429: { "", 0, COND_NONE },
1430: };
1431: const struct cond_tbl_s *p;
1432:
1433: for (p = cond_tbl; p->len; p++) {
1434: if (strncmp(l, p->name, p->len) == 0 &&
1435: isspace((unsigned char)l[p->len])) {
1436: if (t)
1437: *t = l + p->len;
1438: break;
1439: }
1440: }
1441: return p->cond;
1442: }
1443:
1444: private int
1445: check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1446: {
1447: int last_cond;
1448: last_cond = ms->c.li[cont_level].last_cond;
1449:
1450: switch (cond) {
1451: case COND_IF:
1452: if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1453: if (ms->flags & MAGIC_CHECK)
1454: file_magwarn(ms, "syntax error: `if'");
1455: return -1;
1456: }
1457: last_cond = COND_IF;
1458: break;
1459:
1460: case COND_ELIF:
1461: if (last_cond != COND_IF && last_cond != COND_ELIF) {
1462: if (ms->flags & MAGIC_CHECK)
1463: file_magwarn(ms, "syntax error: `elif'");
1464: return -1;
1465: }
1466: last_cond = COND_ELIF;
1467: break;
1468:
1469: case COND_ELSE:
1470: if (last_cond != COND_IF && last_cond != COND_ELIF) {
1471: if (ms->flags & MAGIC_CHECK)
1472: file_magwarn(ms, "syntax error: `else'");
1473: return -1;
1474: }
1475: last_cond = COND_NONE;
1476: break;
1477:
1478: case COND_NONE:
1479: last_cond = COND_NONE;
1480: break;
1481: }
1482:
1483: ms->c.li[cont_level].last_cond = last_cond;
1484: return 0;
1485: }
1486: #endif /* ENABLE_CONDITIONALS */
1487:
1488: /*
1489: * parse one line from magic file, put into magic[index++] if valid
1490: */
1491: private int
1.1.1.3 misho 1492: parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1493: size_t lineno, int action)
1.1 misho 1494: {
1495: #ifdef ENABLE_CONDITIONALS
1496: static uint32_t last_cont_level = 0;
1497: #endif
1498: size_t i;
1499: struct magic *m;
1500: const char *l = line;
1501: char *t;
1502: int op;
1503: uint32_t cont_level;
1.1.1.3 misho 1504: int32_t diff;
1.1 misho 1505:
1506: cont_level = 0;
1507:
1.1.1.3 misho 1508: /*
1509: * Parse the offset.
1510: */
1.1 misho 1511: while (*l == '>') {
1512: ++l; /* step over */
1513: cont_level++;
1514: }
1515: #ifdef ENABLE_CONDITIONALS
1516: if (cont_level == 0 || cont_level > last_cont_level)
1517: if (file_check_mem(ms, cont_level) == -1)
1518: return -1;
1519: last_cont_level = cont_level;
1520: #endif
1521: if (cont_level != 0) {
1.1.1.3 misho 1522: if (me->mp == NULL) {
1523: file_magerror(ms, "No current entry for continuation");
1524: return -1;
1525: }
1526: if (me->cont_count == 0) {
1527: file_magerror(ms, "Continuations present with 0 count");
1.1 misho 1528: return -1;
1529: }
1.1.1.3 misho 1530: m = &me->mp[me->cont_count - 1];
1531: diff = (int32_t)cont_level - (int32_t)m->cont_level;
1532: if (diff > 1)
1533: file_magwarn(ms, "New continuation level %u is more "
1534: "than one larger than current level %u", cont_level,
1535: m->cont_level);
1.1 misho 1536: if (me->cont_count == me->max_count) {
1537: struct magic *nm;
1538: size_t cnt = me->max_count + ALLOC_CHUNK;
1.1.1.3 misho 1539: if ((nm = CAST(struct magic *, erealloc(me->mp,
1540: sizeof(*nm) * cnt))) == NULL) {
1541: file_oomem(ms, sizeof(*nm) * cnt);
1542: return -1;
1543: }
1.1 misho 1544: me->mp = m = nm;
1.1.1.2 misho 1545: me->max_count = CAST(uint32_t, cnt);
1.1 misho 1546: }
1547: m = &me->mp[me->cont_count++];
1548: (void)memset(m, 0, sizeof(*m));
1549: m->cont_level = cont_level;
1550: } else {
1.1.1.3 misho 1551: static const size_t len = sizeof(*m) * ALLOC_CHUNK;
1552: if (me->mp != NULL)
1553: return 1;
1554: if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
1555: file_oomem(ms, len);
1556: return -1;
1.1 misho 1557: }
1.1.1.3 misho 1558: me->mp = m;
1559: me->max_count = ALLOC_CHUNK;
1.1 misho 1560: (void)memset(m, 0, sizeof(*m));
1561: m->factor_op = FILE_FACTOR_OP_NONE;
1562: m->cont_level = 0;
1563: me->cont_count = 1;
1564: }
1.1.1.2 misho 1565: m->lineno = CAST(uint32_t, lineno);
1.1 misho 1566:
1567: if (*l == '&') { /* m->cont_level == 0 checked below. */
1568: ++l; /* step over */
1569: m->flag |= OFFADD;
1570: }
1571: if (*l == '(') {
1572: ++l; /* step over */
1573: m->flag |= INDIR;
1574: if (m->flag & OFFADD)
1575: m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1576:
1577: if (*l == '&') { /* m->cont_level == 0 checked below */
1578: ++l; /* step over */
1579: m->flag |= OFFADD;
1580: }
1581: }
1582: /* Indirect offsets are not valid at level 0. */
1583: if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1584: if (ms->flags & MAGIC_CHECK)
1585: file_magwarn(ms, "relative offset at level 0");
1586:
1587: /* get offset, then skip over it */
1588: m->offset = (uint32_t)strtoul(l, &t, 0);
1589: if (l == t)
1590: if (ms->flags & MAGIC_CHECK)
1591: file_magwarn(ms, "offset `%s' invalid", l);
1592: l = t;
1593:
1594: if (m->flag & INDIR) {
1595: m->in_type = FILE_LONG;
1596: m->in_offset = 0;
1597: /*
1598: * read [.lbs][+-]nnnnn)
1599: */
1600: if (*l == '.') {
1601: l++;
1602: switch (*l) {
1603: case 'l':
1604: m->in_type = FILE_LELONG;
1605: break;
1606: case 'L':
1607: m->in_type = FILE_BELONG;
1608: break;
1609: case 'm':
1610: m->in_type = FILE_MELONG;
1611: break;
1612: case 'h':
1613: case 's':
1614: m->in_type = FILE_LESHORT;
1615: break;
1616: case 'H':
1617: case 'S':
1618: m->in_type = FILE_BESHORT;
1619: break;
1620: case 'c':
1621: case 'b':
1622: case 'C':
1623: case 'B':
1624: m->in_type = FILE_BYTE;
1625: break;
1626: case 'e':
1627: case 'f':
1628: case 'g':
1629: m->in_type = FILE_LEDOUBLE;
1630: break;
1631: case 'E':
1632: case 'F':
1633: case 'G':
1634: m->in_type = FILE_BEDOUBLE;
1635: break;
1636: case 'i':
1637: m->in_type = FILE_LEID3;
1638: break;
1639: case 'I':
1640: m->in_type = FILE_BEID3;
1641: break;
1642: default:
1643: if (ms->flags & MAGIC_CHECK)
1644: file_magwarn(ms,
1645: "indirect offset type `%c' invalid",
1646: *l);
1647: break;
1648: }
1649: l++;
1650: }
1651:
1652: m->in_op = 0;
1653: if (*l == '~') {
1654: m->in_op |= FILE_OPINVERSE;
1655: l++;
1656: }
1657: if ((op = get_op(*l)) != -1) {
1658: m->in_op |= op;
1659: l++;
1660: }
1661: if (*l == '(') {
1662: m->in_op |= FILE_OPINDIRECT;
1663: l++;
1664: }
1665: if (isdigit((unsigned char)*l) || *l == '-') {
1666: m->in_offset = (int32_t)strtol(l, &t, 0);
1667: if (l == t)
1668: if (ms->flags & MAGIC_CHECK)
1669: file_magwarn(ms,
1670: "in_offset `%s' invalid", l);
1671: l = t;
1672: }
1673: if (*l++ != ')' ||
1674: ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1675: if (ms->flags & MAGIC_CHECK)
1676: file_magwarn(ms,
1677: "missing ')' in indirect offset");
1678: }
1679: EATAB;
1680:
1681: #ifdef ENABLE_CONDITIONALS
1682: m->cond = get_cond(l, &l);
1683: if (check_cond(ms, m->cond, cont_level) == -1)
1684: return -1;
1685:
1686: EATAB;
1687: #endif
1688:
1.1.1.3 misho 1689: /*
1690: * Parse the type.
1691: */
1.1 misho 1692: if (*l == 'u') {
1.1.1.3 misho 1693: /*
1694: * Try it as a keyword type prefixed by "u"; match what
1695: * follows the "u". If that fails, try it as an SUS
1696: * integer type.
1697: */
1698: m->type = get_type(type_tbl, l + 1, &l);
1699: if (m->type == FILE_INVALID) {
1700: /*
1701: * Not a keyword type; parse it as an SUS type,
1702: * 'u' possibly followed by a number or C/S/L.
1703: */
1704: m->type = get_standard_integer_type(l, &l);
1705: }
1706: // It's unsigned.
1707: if (m->type != FILE_INVALID)
1708: m->flag |= UNSIGNED;
1709: } else {
1710: /*
1711: * Try it as a keyword type. If that fails, try it as
1712: * an SUS integer type if it begins with "d" or as an
1713: * SUS string type if it begins with "s". In any case,
1714: * it's not unsigned.
1715: */
1716: m->type = get_type(type_tbl, l, &l);
1717: if (m->type == FILE_INVALID) {
1718: /*
1719: * Not a keyword type; parse it as an SUS type,
1720: * either 'd' possibly followed by a number or
1721: * C/S/L, or just 's'.
1722: */
1723: if (*l == 'd')
1724: m->type = get_standard_integer_type(l, &l);
1725: else if (*l == 's' && !isalpha((unsigned char)l[1])) {
1726: m->type = FILE_STRING;
1.1 misho 1727: ++l;
1.1.1.3 misho 1728: }
1729: }
1.1 misho 1730: }
1731:
1.1.1.3 misho 1732: if (m->type == FILE_INVALID) {
1733: /* Not found - try it as a special keyword. */
1734: m->type = get_type(special_tbl, l, &l);
1735: }
1736:
1.1 misho 1737: if (m->type == FILE_INVALID) {
1738: if (ms->flags & MAGIC_CHECK)
1739: file_magwarn(ms, "type `%s' invalid", l);
1.1.1.3 misho 1740: if (me->mp) {
1741: efree(me->mp);
1742: me->mp = NULL;
1743: }
1.1 misho 1744: return -1;
1745: }
1746:
1747: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1748: /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1749:
1750: m->mask_op = 0;
1751: if (*l == '~') {
1752: if (!IS_LIBMAGIC_STRING(m->type))
1753: m->mask_op |= FILE_OPINVERSE;
1754: else if (ms->flags & MAGIC_CHECK)
1755: file_magwarn(ms, "'~' invalid for string types");
1756: ++l;
1757: }
1758: m->str_range = 0;
1.1.1.2 misho 1759: m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
1.1 misho 1760: if ((op = get_op(*l)) != -1) {
1761: if (!IS_LIBMAGIC_STRING(m->type)) {
1762: uint64_t val;
1763: ++l;
1764: m->mask_op |= op;
1765: val = (uint64_t)strtoull(l, &t, 0);
1766: l = t;
1767: m->num_mask = file_signextend(ms, m, val);
1768: eatsize(&l);
1769: }
1770: else if (op == FILE_OPDIVIDE) {
1771: int have_range = 0;
1772: while (!isspace((unsigned char)*++l)) {
1773: switch (*l) {
1774: case '0': case '1': case '2':
1775: case '3': case '4': case '5':
1776: case '6': case '7': case '8':
1777: case '9':
1778: if (have_range &&
1779: (ms->flags & MAGIC_CHECK))
1780: file_magwarn(ms,
1781: "multiple ranges");
1782: have_range = 1;
1.1.1.2 misho 1783: m->str_range = CAST(uint32_t,
1784: strtoul(l, &t, 0));
1.1 misho 1785: if (m->str_range == 0)
1786: file_magwarn(ms,
1787: "zero range");
1788: l = t - 1;
1789: break;
1.1.1.2 misho 1790: case CHAR_COMPACT_WHITESPACE:
1791: m->str_flags |=
1792: STRING_COMPACT_WHITESPACE;
1.1 misho 1793: break;
1.1.1.2 misho 1794: case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1.1 misho 1795: m->str_flags |=
1.1.1.2 misho 1796: STRING_COMPACT_OPTIONAL_WHITESPACE;
1.1 misho 1797: break;
1798: case CHAR_IGNORE_LOWERCASE:
1799: m->str_flags |= STRING_IGNORE_LOWERCASE;
1800: break;
1801: case CHAR_IGNORE_UPPERCASE:
1802: m->str_flags |= STRING_IGNORE_UPPERCASE;
1803: break;
1804: case CHAR_REGEX_OFFSET_START:
1805: m->str_flags |= REGEX_OFFSET_START;
1806: break;
1.1.1.2 misho 1807: case CHAR_BINTEST:
1808: m->str_flags |= STRING_BINTEST;
1809: break;
1810: case CHAR_TEXTTEST:
1811: m->str_flags |= STRING_TEXTTEST;
1812: break;
1.1.1.3 misho 1813: case CHAR_TRIM:
1814: m->str_flags |= STRING_TRIM;
1815: break;
1.1.1.2 misho 1816: case CHAR_PSTRING_1_LE:
1817: if (m->type != FILE_PSTRING)
1818: goto bad;
1819: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE;
1820: break;
1821: case CHAR_PSTRING_2_BE:
1822: if (m->type != FILE_PSTRING)
1823: goto bad;
1824: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE;
1825: break;
1826: case CHAR_PSTRING_2_LE:
1827: if (m->type != FILE_PSTRING)
1828: goto bad;
1829: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE;
1830: break;
1831: case CHAR_PSTRING_4_BE:
1832: if (m->type != FILE_PSTRING)
1833: goto bad;
1834: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
1835: break;
1836: case CHAR_PSTRING_4_LE:
1837: if (m->type != FILE_PSTRING)
1838: goto bad;
1839: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
1840: break;
1841: case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1842: if (m->type != FILE_PSTRING)
1843: goto bad;
1844: m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1845: break;
1.1 misho 1846: default:
1.1.1.2 misho 1847: bad:
1.1 misho 1848: if (ms->flags & MAGIC_CHECK)
1849: file_magwarn(ms,
1.1.1.2 misho 1850: "string extension `%c' "
1851: "invalid", *l);
1.1 misho 1852: return -1;
1853: }
1854: /* allow multiple '/' for readability */
1855: if (l[1] == '/' &&
1856: !isspace((unsigned char)l[2]))
1857: l++;
1858: }
1859: if (string_modifier_check(ms, m) == -1)
1860: return -1;
1861: }
1862: else {
1863: if (ms->flags & MAGIC_CHECK)
1864: file_magwarn(ms, "invalid string op: %c", *t);
1865: return -1;
1866: }
1867: }
1868: /*
1869: * We used to set mask to all 1's here, instead let's just not do
1870: * anything if mask = 0 (unless you have a better idea)
1871: */
1872: EATAB;
1873:
1874: switch (*l) {
1875: case '>':
1876: case '<':
1877: m->reln = *l;
1878: ++l;
1879: if (*l == '=') {
1880: if (ms->flags & MAGIC_CHECK) {
1881: file_magwarn(ms, "%c= not supported",
1882: m->reln);
1883: return -1;
1884: }
1885: ++l;
1886: }
1887: break;
1888: /* Old-style anding: "0 byte &0x80 dynamically linked" */
1889: case '&':
1890: case '^':
1891: case '=':
1892: m->reln = *l;
1893: ++l;
1894: if (*l == '=') {
1895: /* HP compat: ignore &= etc. */
1896: ++l;
1897: }
1898: break;
1899: case '!':
1900: m->reln = *l;
1901: ++l;
1902: break;
1903: default:
1904: m->reln = '='; /* the default relation */
1905: if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
1906: isspace((unsigned char)l[1])) || !l[1])) {
1907: m->reln = *l;
1908: ++l;
1909: }
1910: break;
1911: }
1912: /*
1913: * Grab the value part, except for an 'x' reln.
1914: */
1915: if (m->reln != 'x' && getvalue(ms, m, &l, action))
1916: return -1;
1917:
1918: /*
1919: * TODO finish this macro and start using it!
1920: * #define offsetcheck {if (offset > HOWMANY-1)
1921: * magwarn("offset too big"); }
1922: */
1923:
1924: /*
1925: * Now get last part - the description
1926: */
1927: EATAB;
1928: if (l[0] == '\b') {
1929: ++l;
1930: m->flag |= NOSPACE;
1931: } else if ((l[0] == '\\') && (l[1] == 'b')) {
1932: ++l;
1933: ++l;
1934: m->flag |= NOSPACE;
1935: }
1936: for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
1937: continue;
1938: if (i == sizeof(m->desc)) {
1939: m->desc[sizeof(m->desc) - 1] = '\0';
1940: if (ms->flags & MAGIC_CHECK)
1941: file_magwarn(ms, "description `%s' truncated", m->desc);
1942: }
1943:
1944: /*
1945: * We only do this check while compiling, or if any of the magic
1946: * files were not compiled.
1947: */
1948: if (ms->flags & MAGIC_CHECK) {
1949: if (check_format(ms, m) == -1)
1950: return -1;
1951: }
1952: m->mimetype[0] = '\0'; /* initialise MIME type to none */
1953: return 0;
1954: }
1955:
1956: /*
1957: * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
1958: * if valid
1959: */
1960: private int
1961: parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
1962: {
1963: const char *l = line;
1964: char *el;
1965: unsigned long factor;
1966: struct magic *m = &me->mp[0];
1967:
1968: if (m->factor_op != FILE_FACTOR_OP_NONE) {
1969: file_magwarn(ms,
1970: "Current entry already has a strength type: %c %d",
1971: m->factor_op, m->factor);
1972: return -1;
1973: }
1974: EATAB;
1975: switch (*l) {
1976: case FILE_FACTOR_OP_NONE:
1977: case FILE_FACTOR_OP_PLUS:
1978: case FILE_FACTOR_OP_MINUS:
1979: case FILE_FACTOR_OP_TIMES:
1980: case FILE_FACTOR_OP_DIV:
1981: m->factor_op = *l++;
1982: break;
1983: default:
1984: file_magwarn(ms, "Unknown factor op `%c'", *l);
1985: return -1;
1986: }
1987: EATAB;
1988: factor = strtoul(l, &el, 0);
1989: if (factor > 255) {
1990: file_magwarn(ms, "Too large factor `%lu'", factor);
1991: goto out;
1992: }
1993: if (*el && !isspace((unsigned char)*el)) {
1994: file_magwarn(ms, "Bad factor `%s'", l);
1995: goto out;
1996: }
1997: m->factor = (uint8_t)factor;
1998: if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
1999: file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2000: m->factor_op, m->factor);
2001: goto out;
2002: }
2003: return 0;
2004: out:
2005: m->factor_op = FILE_FACTOR_OP_NONE;
2006: m->factor = 0;
2007: return -1;
2008: }
2009:
2010: /*
1.1.1.2 misho 2011: * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2012: * magic[index - 1]
1.1 misho 2013: */
2014: private int
2015: parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
2016: {
2017: size_t i;
2018: const char *l = line;
2019: struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2020:
2021: if (m->apple[0] != '\0') {
1.1.1.2 misho 2022: file_magwarn(ms, "Current entry already has a APPLE type "
2023: "`%.8s', new type `%s'", m->mimetype, l);
1.1 misho 2024: return -1;
2025: }
2026:
2027: EATAB;
1.1.1.2 misho 2028: for (i = 0; *l && ((isascii((unsigned char)*l) &&
2029: isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
2030: i < sizeof(m->apple); m->apple[i++] = *l++)
1.1 misho 2031: continue;
2032: if (i == sizeof(m->apple) && *l) {
1.1.1.2 misho 2033: /* We don't need to NUL terminate here, printing handles it */
1.1 misho 2034: if (ms->flags & MAGIC_CHECK)
1.1.1.2 misho 2035: file_magwarn(ms, "APPLE type `%s' truncated %"
2036: SIZE_T_FORMAT "u", line, i);
1.1 misho 2037: }
2038:
2039: if (i > 0)
2040: return 0;
2041: else
2042: return -1;
2043: }
2044:
2045: /*
2046: * parse a MIME annotation line from magic file, put into magic[index - 1]
2047: * if valid
2048: */
2049: private int
2050: parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
2051: {
2052: size_t i;
2053: const char *l = line;
2054: struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2055:
2056: if (m->mimetype[0] != '\0') {
2057: file_magwarn(ms, "Current entry already has a MIME type `%s',"
2058: " new type `%s'", m->mimetype, l);
2059: return -1;
2060: }
2061:
2062: EATAB;
1.1.1.2 misho 2063: for (i = 0; *l && ((isascii((unsigned char)*l) &&
2064: isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
2065: i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
1.1 misho 2066: continue;
2067: if (i == sizeof(m->mimetype)) {
1.1.1.2 misho 2068: m->mimetype[sizeof(m->mimetype) - 1] = '\0';
1.1 misho 2069: if (ms->flags & MAGIC_CHECK)
1.1.1.2 misho 2070: file_magwarn(ms, "MIME type `%s' truncated %"
2071: SIZE_T_FORMAT "u", m->mimetype, i);
1.1 misho 2072: } else
2073: m->mimetype[i] = '\0';
2074:
2075: if (i > 0)
2076: return 0;
2077: else
2078: return -1;
2079: }
2080:
2081: private int
2082: check_format_type(const char *ptr, int type)
2083: {
2084: int quad = 0;
2085: if (*ptr == '\0') {
2086: /* Missing format string; bad */
2087: return -1;
2088: }
2089:
2090: switch (type) {
2091: case FILE_FMT_QUAD:
2092: quad = 1;
2093: /*FALLTHROUGH*/
2094: case FILE_FMT_NUM:
2095: if (*ptr == '-')
2096: ptr++;
2097: if (*ptr == '.')
2098: ptr++;
2099: while (isdigit((unsigned char)*ptr)) ptr++;
2100: if (*ptr == '.')
2101: ptr++;
2102: while (isdigit((unsigned char)*ptr)) ptr++;
2103: if (quad) {
2104: if (*ptr++ != 'l')
2105: return -1;
2106: if (*ptr++ != 'l')
2107: return -1;
2108: }
2109:
2110: switch (*ptr++) {
2111: case 'l':
2112: switch (*ptr++) {
2113: case 'i':
2114: case 'd':
2115: case 'u':
1.1.1.3 misho 2116: case 'o':
1.1 misho 2117: case 'x':
2118: case 'X':
2119: return 0;
2120: default:
2121: return -1;
2122: }
2123:
2124: case 'h':
2125: switch (*ptr++) {
2126: case 'h':
2127: switch (*ptr++) {
2128: case 'i':
2129: case 'd':
2130: case 'u':
1.1.1.3 misho 2131: case 'o':
1.1 misho 2132: case 'x':
2133: case 'X':
2134: return 0;
2135: default:
2136: return -1;
2137: }
2138: case 'd':
2139: return 0;
2140: default:
2141: return -1;
2142: }
2143:
2144: case 'i':
2145: case 'c':
2146: case 'd':
2147: case 'u':
1.1.1.3 misho 2148: case 'o':
1.1 misho 2149: case 'x':
2150: case 'X':
2151: return 0;
2152:
2153: default:
2154: return -1;
2155: }
2156:
2157: case FILE_FMT_FLOAT:
2158: case FILE_FMT_DOUBLE:
2159: if (*ptr == '-')
2160: ptr++;
2161: if (*ptr == '.')
2162: ptr++;
2163: while (isdigit((unsigned char)*ptr)) ptr++;
2164: if (*ptr == '.')
2165: ptr++;
2166: while (isdigit((unsigned char)*ptr)) ptr++;
2167:
2168: switch (*ptr++) {
2169: case 'e':
2170: case 'E':
2171: case 'f':
2172: case 'F':
2173: case 'g':
2174: case 'G':
2175: return 0;
2176:
2177: default:
2178: return -1;
2179: }
2180:
2181:
2182: case FILE_FMT_STR:
2183: if (*ptr == '-')
2184: ptr++;
2185: while (isdigit((unsigned char )*ptr))
2186: ptr++;
2187: if (*ptr == '.') {
2188: ptr++;
2189: while (isdigit((unsigned char )*ptr))
2190: ptr++;
2191: }
2192:
2193: switch (*ptr++) {
2194: case 's':
2195: return 0;
2196: default:
2197: return -1;
2198: }
2199:
2200: default:
2201: /* internal error */
2202: abort();
2203: }
2204: /*NOTREACHED*/
2205: return -1;
2206: }
2207:
2208: /*
2209: * Check that the optional printf format in description matches
2210: * the type of the magic.
2211: */
2212: private int
2213: check_format(struct magic_set *ms, struct magic *m)
2214: {
2215: char *ptr;
2216:
2217: for (ptr = m->desc; *ptr; ptr++)
2218: if (*ptr == '%')
2219: break;
2220: if (*ptr == '\0') {
2221: /* No format string; ok */
2222: return 1;
2223: }
2224:
2225: assert(file_nformats == file_nnames);
2226:
2227: if (m->type >= file_nformats) {
2228: file_magwarn(ms, "Internal error inconsistency between "
2229: "m->type and format strings");
2230: return -1;
2231: }
2232: if (file_formats[m->type] == FILE_FMT_NONE) {
2233: file_magwarn(ms, "No format string for `%s' with description "
2234: "`%s'", m->desc, file_names[m->type]);
2235: return -1;
2236: }
2237:
2238: ptr++;
2239: if (check_format_type(ptr, file_formats[m->type]) == -1) {
2240: /*
2241: * TODO: this error message is unhelpful if the format
2242: * string is not one character long
2243: */
2244: file_magwarn(ms, "Printf format `%c' is not valid for type "
2245: "`%s' in description `%s'", *ptr ? *ptr : '?',
2246: file_names[m->type], m->desc);
2247: return -1;
2248: }
2249:
2250: for (; *ptr; ptr++) {
2251: if (*ptr == '%') {
2252: file_magwarn(ms,
2253: "Too many format strings (should have at most one) "
2254: "for `%s' with description `%s'",
2255: file_names[m->type], m->desc);
2256: return -1;
2257: }
2258: }
2259: return 0;
2260: }
2261:
2262: /*
2263: * Read a numeric value from a pointer, into the value union of a magic
2264: * pointer, according to the magic type. Update the string pointer to point
2265: * just after the number read. Return 0 for success, non-zero for failure.
2266: */
2267: private int
2268: getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2269: {
2270: switch (m->type) {
2271: case FILE_BESTRING16:
2272: case FILE_LESTRING16:
2273: case FILE_STRING:
2274: case FILE_PSTRING:
2275: case FILE_REGEX:
2276: case FILE_SEARCH:
1.1.1.3 misho 2277: case FILE_NAME:
2278: case FILE_USE:
1.1 misho 2279: *p = getstr(ms, m, *p, action == FILE_COMPILE);
2280: if (*p == NULL) {
2281: if (ms->flags & MAGIC_CHECK)
2282: file_magwarn(ms, "cannot get string from `%s'",
2283: m->value.s);
2284: return -1;
2285: }
2286: return 0;
2287: case FILE_FLOAT:
2288: case FILE_BEFLOAT:
2289: case FILE_LEFLOAT:
2290: if (m->reln != 'x') {
2291: char *ep;
2292: #ifdef HAVE_STRTOF
2293: m->value.f = strtof(*p, &ep);
2294: #else
2295: m->value.f = (float)strtod(*p, &ep);
2296: #endif
2297: *p = ep;
2298: }
2299: return 0;
2300: case FILE_DOUBLE:
2301: case FILE_BEDOUBLE:
2302: case FILE_LEDOUBLE:
2303: if (m->reln != 'x') {
2304: char *ep;
2305: m->value.d = strtod(*p, &ep);
2306: *p = ep;
2307: }
2308: return 0;
2309: default:
2310: if (m->reln != 'x') {
2311: char *ep;
2312: m->value.q = file_signextend(ms, m,
2313: (uint64_t)strtoull(*p, &ep, 0));
2314: *p = ep;
2315: eatsize(p);
2316: }
2317: return 0;
2318: }
2319: }
2320:
2321: /*
2322: * Convert a string containing C character escapes. Stop at an unescaped
2323: * space or tab.
2324: * Copy the converted version to "m->value.s", and the length in m->vallen.
2325: * Return updated scan pointer as function result. Warn if set.
2326: */
2327: private const char *
2328: getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2329: {
2330: const char *origs = s;
2331: char *p = m->value.s;
2332: size_t plen = sizeof(m->value.s);
2333: char *origp = p;
2334: char *pmax = p + plen - 1;
2335: int c;
2336: int val;
2337:
2338: while ((c = *s++) != '\0') {
2339: if (isspace((unsigned char) c))
2340: break;
2341: if (p >= pmax) {
2342: file_error(ms, 0, "string too long: `%s'", origs);
2343: return NULL;
2344: }
2345: if (c == '\\') {
2346: switch(c = *s++) {
2347:
2348: case '\0':
2349: if (warn)
2350: file_magwarn(ms, "incomplete escape");
2351: goto out;
2352:
2353: case '\t':
2354: if (warn) {
2355: file_magwarn(ms,
2356: "escaped tab found, use \\t instead");
2357: warn = 0; /* already did */
2358: }
2359: /*FALLTHROUGH*/
2360: default:
2361: if (warn) {
2362: if (isprint((unsigned char)c)) {
2363: /* Allow escaping of
2364: * ``relations'' */
1.1.1.2 misho 2365: if (strchr("<>&^=!", c) == NULL
2366: && (m->type != FILE_REGEX ||
2367: strchr("[]().*?^$|{}", c)
2368: == NULL)) {
1.1 misho 2369: file_magwarn(ms, "no "
2370: "need to escape "
2371: "`%c'", c);
2372: }
2373: } else {
2374: file_magwarn(ms,
2375: "unknown escape sequence: "
2376: "\\%03o", c);
2377: }
2378: }
2379: /*FALLTHROUGH*/
2380: /* space, perhaps force people to use \040? */
2381: case ' ':
2382: #if 0
2383: /*
2384: * Other things people escape, but shouldn't need to,
2385: * so we disallow them
2386: */
2387: case '\'':
2388: case '"':
2389: case '?':
2390: #endif
2391: /* Relations */
2392: case '>':
2393: case '<':
2394: case '&':
2395: case '^':
2396: case '=':
2397: case '!':
2398: /* and baskslash itself */
2399: case '\\':
2400: *p++ = (char) c;
2401: break;
2402:
2403: case 'a':
2404: *p++ = '\a';
2405: break;
2406:
2407: case 'b':
2408: *p++ = '\b';
2409: break;
2410:
2411: case 'f':
2412: *p++ = '\f';
2413: break;
2414:
2415: case 'n':
2416: *p++ = '\n';
2417: break;
2418:
2419: case 'r':
2420: *p++ = '\r';
2421: break;
2422:
2423: case 't':
2424: *p++ = '\t';
2425: break;
2426:
2427: case 'v':
2428: *p++ = '\v';
2429: break;
2430:
2431: /* \ and up to 3 octal digits */
2432: case '0':
2433: case '1':
2434: case '2':
2435: case '3':
2436: case '4':
2437: case '5':
2438: case '6':
2439: case '7':
2440: val = c - '0';
2441: c = *s++; /* try for 2 */
2442: if (c >= '0' && c <= '7') {
2443: val = (val << 3) | (c - '0');
2444: c = *s++; /* try for 3 */
2445: if (c >= '0' && c <= '7')
2446: val = (val << 3) | (c-'0');
2447: else
2448: --s;
2449: }
2450: else
2451: --s;
2452: *p++ = (char)val;
2453: break;
2454:
2455: /* \x and up to 2 hex digits */
2456: case 'x':
2457: val = 'x'; /* Default if no digits */
2458: c = hextoint(*s++); /* Get next char */
2459: if (c >= 0) {
2460: val = c;
2461: c = hextoint(*s++);
2462: if (c >= 0)
2463: val = (val << 4) + c;
2464: else
2465: --s;
2466: } else
2467: --s;
2468: *p++ = (char)val;
2469: break;
2470: }
2471: } else
2472: *p++ = (char)c;
2473: }
2474: out:
2475: *p = '\0';
1.1.1.2 misho 2476: m->vallen = CAST(unsigned char, (p - origp));
1.1 misho 2477: if (m->type == FILE_PSTRING)
1.1.1.2 misho 2478: m->vallen += (unsigned char)file_pstring_length_size(m);
1.1 misho 2479: return s;
2480: }
2481:
2482:
2483: /* Single hex char to int; -1 if not a hex char. */
2484: private int
2485: hextoint(int c)
2486: {
2487: if (!isascii((unsigned char) c))
2488: return -1;
2489: if (isdigit((unsigned char) c))
2490: return c - '0';
2491: if ((c >= 'a') && (c <= 'f'))
2492: return c + 10 - 'a';
2493: if (( c>= 'A') && (c <= 'F'))
2494: return c + 10 - 'A';
2495: return -1;
2496: }
2497:
2498:
2499: /*
2500: * Print a string containing C character escapes.
2501: */
2502: protected void
2503: file_showstr(FILE *fp, const char *s, size_t len)
2504: {
2505: char c;
2506:
2507: for (;;) {
2508: if (len == ~0U) {
1.1.1.2 misho 2509: c = *s++;
1.1 misho 2510: if (c == '\0')
2511: break;
2512: }
2513: else {
2514: if (len-- == 0)
2515: break;
1.1.1.2 misho 2516: c = *s++;
1.1 misho 2517: }
2518: if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
2519: (void) fputc(c, fp);
2520: else {
2521: (void) fputc('\\', fp);
2522: switch (c) {
2523: case '\a':
2524: (void) fputc('a', fp);
2525: break;
2526:
2527: case '\b':
2528: (void) fputc('b', fp);
2529: break;
2530:
2531: case '\f':
2532: (void) fputc('f', fp);
2533: break;
2534:
2535: case '\n':
2536: (void) fputc('n', fp);
2537: break;
2538:
2539: case '\r':
2540: (void) fputc('r', fp);
2541: break;
2542:
2543: case '\t':
2544: (void) fputc('t', fp);
2545: break;
2546:
2547: case '\v':
2548: (void) fputc('v', fp);
2549: break;
2550:
2551: default:
2552: (void) fprintf(fp, "%.3o", c & 0377);
2553: break;
2554: }
2555: }
2556: }
2557: }
2558:
2559: /*
2560: * eatsize(): Eat the size spec from a number [eg. 10UL]
2561: */
2562: private void
2563: eatsize(const char **p)
2564: {
2565: const char *l = *p;
2566:
2567: if (LOWCASE(*l) == 'u')
2568: l++;
2569:
2570: switch (LOWCASE(*l)) {
2571: case 'l': /* long */
2572: case 's': /* short */
2573: case 'h': /* short */
2574: case 'b': /* char/byte */
2575: case 'c': /* char/byte */
2576: l++;
2577: /*FALLTHROUGH*/
2578: default:
2579: break;
2580: }
2581:
2582: *p = l;
2583: }
2584:
2585: /*
2586: * handle a compiled file.
2587: */
1.1.1.3 misho 2588:
2589: private struct magic_map *
2590: apprentice_map(struct magic_set *ms, const char *fn)
1.1 misho 2591: {
2592: uint32_t *ptr;
1.1.1.3 misho 2593: uint32_t version, entries, nentries;
1.1 misho 2594: int needsbyteswap;
2595: char *dbname = NULL;
1.1.1.3 misho 2596: struct magic_map *map;
2597: size_t i;
1.1 misho 2598: php_stream *stream = NULL;
2599: php_stream_statbuf st;
2600:
2601:
2602: TSRMLS_FETCH();
2603:
1.1.1.3 misho 2604: if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
2605: file_oomem(ms, sizeof(*map));
2606: efree(map);
2607: goto error;
2608: }
2609:
1.1 misho 2610: if (fn == NULL) {
1.1.1.3 misho 2611: map->p = (void *)&php_magic_database;
1.1 misho 2612: goto internal_loaded;
2613: }
2614:
1.1.1.3 misho 2615: #ifdef PHP_WIN32
2616: /* Don't bother on windows with php_stream_open_wrapper,
2617: return to give apprentice_load() a chance. */
2618: if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
2619: if (st.sb.st_mode & S_IFDIR) {
2620: goto error;
2621: }
2622: }
2623: #endif
2624:
1.1 misho 2625: dbname = mkdbname(ms, fn, 0);
2626: if (dbname == NULL)
1.1.1.3 misho 2627: goto error;
1.1 misho 2628:
2629: #if PHP_API_VERSION < 20100412
2630: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
2631: #else
2632: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
2633: #endif
2634:
2635: if (!stream) {
1.1.1.3 misho 2636: goto error;
1.1 misho 2637: }
2638:
2639: if (php_stream_stat(stream, &st) < 0) {
2640: file_error(ms, errno, "cannot stat `%s'", dbname);
1.1.1.3 misho 2641: goto error;
1.1 misho 2642: }
2643:
2644: if (st.sb.st_size < 8) {
2645: file_error(ms, 0, "file `%s' is too small", dbname);
1.1.1.3 misho 2646: goto error;
1.1 misho 2647: }
2648:
1.1.1.3 misho 2649: map->len = (size_t)st.sb.st_size;
2650: if ((map->p = CAST(void *, emalloc(map->len))) == NULL) {
2651: file_oomem(ms, map->len);
2652: goto error;
2653: }
2654: if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
1.1 misho 2655: file_badread(ms);
1.1.1.3 misho 2656: goto error;
1.1 misho 2657: }
1.1.1.3 misho 2658: map->len = 0;
2659: #define RET 1
1.1 misho 2660:
2661: php_stream_close(stream);
2662: stream = NULL;
2663:
2664: internal_loaded:
1.1.1.3 misho 2665: ptr = (uint32_t *)(void *)map->p;
1.1 misho 2666: if (*ptr != MAGICNO) {
2667: if (swap4(*ptr) != MAGICNO) {
2668: file_error(ms, 0, "bad magic in `%s'", dbname);
1.1.1.3 misho 2669: goto error;
1.1 misho 2670: }
2671: needsbyteswap = 1;
1.1.1.2 misho 2672: } else
1.1 misho 2673: needsbyteswap = 0;
2674: if (needsbyteswap)
2675: version = swap4(ptr[1]);
2676: else
2677: version = ptr[1];
2678: if (version != VERSIONNO) {
2679: file_error(ms, 0, "File %d.%d supports only version %d magic "
2680: "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
2681: VERSIONNO, dbname, version);
1.1.1.3 misho 2682: goto error;
1.1 misho 2683: }
2684:
2685: /* php_magic_database is a const, performing writes will segfault. This is for big-endian
2686: machines only, PPC and Sparc specifically. Consider static variable or MINIT in
2687: future. */
2688: if (needsbyteswap && fn == NULL) {
1.1.1.3 misho 2689: map->p = emalloc(sizeof(php_magic_database));
2690: map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
1.1 misho 2691: }
2692:
1.1.1.3 misho 2693: if (NULL != fn) {
2694: nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2695: entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2696: if ((off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
2697: file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
2698: dbname, (unsigned long long)st.sb.st_size,
2699: sizeof(struct magic));
2700: goto error;
2701: }
2702: }
2703: map->magic[0] = CAST(struct magic *, map->p) + 1;
2704: nentries = 0;
2705: for (i = 0; i < MAGIC_SETS; i++) {
2706: if (needsbyteswap)
2707: map->nmagic[i] = swap4(ptr[i + 2]);
2708: else
2709: map->nmagic[i] = ptr[i + 2];
2710: if (i != MAGIC_SETS - 1)
2711: map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2712: nentries += map->nmagic[i];
2713: }
2714: if (NULL != fn && entries != nentries + 1) {
2715: file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2716: dbname, entries, nentries + 1);
2717: goto error;
1.1 misho 2718: }
2719:
1.1.1.3 misho 2720: if (needsbyteswap)
2721: for (i = 0; i < MAGIC_SETS; i++)
2722: byteswap(map->magic[i], map->nmagic[i]);
2723:
1.1 misho 2724: if (dbname) {
2725: efree(dbname);
2726: }
1.1.1.3 misho 2727: return map;
1.1 misho 2728:
1.1.1.3 misho 2729: error:
1.1 misho 2730: if (stream) {
2731: php_stream_close(stream);
2732: }
1.1.1.3 misho 2733: apprentice_unmap(map);
1.1 misho 2734: if (dbname) {
2735: efree(dbname);
2736: }
1.1.1.3 misho 2737: return NULL;
1.1 misho 2738: }
2739:
2740: private const uint32_t ar[] = {
2741: MAGICNO, VERSIONNO
2742: };
1.1.1.3 misho 2743:
1.1 misho 2744: /*
2745: * handle an mmaped file.
2746: */
2747: private int
1.1.1.3 misho 2748: apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
1.1 misho 2749: {
1.1.1.3 misho 2750: static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
2751: static const size_t m = sizeof(**map->magic);
2752: int fd = -1;
2753: size_t len;
1.1 misho 2754: char *dbname;
2755: int rv = -1;
1.1.1.3 misho 2756: uint32_t i;
1.1 misho 2757: php_stream *stream;
2758:
2759: TSRMLS_FETCH();
2760:
2761: dbname = mkdbname(ms, fn, 0);
2762:
1.1.1.2 misho 2763: if (dbname == NULL)
1.1 misho 2764: goto out;
2765:
2766: /* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
2767: #if PHP_API_VERSION < 20100412
2768: stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
2769: #else
2770: stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
2771: #endif
2772:
2773: if (!stream) {
2774: file_error(ms, errno, "cannot open `%s'", dbname);
2775: goto out;
2776: }
2777:
1.1.1.3 misho 2778: if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
1.1 misho 2779: file_error(ms, errno, "error writing `%s'", dbname);
2780: goto out;
2781: }
2782:
1.1.1.3 misho 2783: if (php_stream_write(stream, (const char *)map->nmagic, nm) != (ssize_t)nm) {
2784: file_error(ms, errno, "error writing `%s'", dbname);
2785: goto out;
2786: }
2787:
2788: assert(nm + sizeof(ar) < m);
2789:
1.1 misho 2790: if (php_stream_seek(stream,(off_t)sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) {
2791: file_error(ms, errno, "error seeking `%s'", dbname);
2792: goto out;
2793: }
2794:
1.1.1.3 misho 2795: for (i = 0; i < MAGIC_SETS; i++) {
2796: len = m * map->nmagic[i];
2797: if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
2798: file_error(ms, errno, "error writing `%s'", dbname);
2799: goto out;
2800: }
1.1 misho 2801: }
2802:
1.1.1.3 misho 2803: if (stream) {
2804: php_stream_close(stream);
2805: }
1.1 misho 2806:
2807: rv = 0;
2808: out:
2809: efree(dbname);
2810: return rv;
2811: }
2812:
2813: private const char ext[] = ".mgc";
2814: /*
2815: * make a dbname
2816: */
2817: private char *
2818: mkdbname(struct magic_set *ms, const char *fn, int strip)
2819: {
2820: const char *p, *q;
2821: char *buf;
2822: TSRMLS_FETCH();
2823:
2824: if (strip) {
2825: if ((p = strrchr(fn, '/')) != NULL)
2826: fn = ++p;
2827: }
2828:
2829: for (q = fn; *q; q++)
2830: continue;
2831: /* Look for .mgc */
2832: for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
2833: if (*p != *q)
2834: break;
2835:
2836: /* Did not find .mgc, restore q */
2837: if (p >= ext)
2838: while (*q)
2839: q++;
2840:
2841: q++;
2842: /* Compatibility with old code that looked in .mime */
2843: if (ms->flags & MAGIC_MIME) {
2844: spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext);
1.1.1.3 misho 2845: #ifdef PHP_WIN32
2846: if (VCWD_ACCESS(buf, R_OK) == 0) {
2847: #else
1.1 misho 2848: if (VCWD_ACCESS(buf, R_OK) != -1) {
1.1.1.3 misho 2849: #endif
1.1 misho 2850: ms->flags &= MAGIC_MIME_TYPE;
2851: return buf;
2852: }
2853: efree(buf);
2854: }
2855: spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext);
2856:
2857: /* Compatibility with old code that looked in .mime */
2858: if (strstr(p, ".mime") != NULL)
2859: ms->flags &= MAGIC_MIME_TYPE;
2860: return buf;
2861: }
2862:
2863: /*
2864: * Byteswap an mmap'ed file if needed
2865: */
2866: private void
2867: byteswap(struct magic *magic, uint32_t nmagic)
2868: {
2869: uint32_t i;
2870: for (i = 0; i < nmagic; i++)
2871: bs1(&magic[i]);
2872: }
2873:
2874: /*
2875: * swap a short
2876: */
2877: private uint16_t
2878: swap2(uint16_t sv)
2879: {
2880: uint16_t rv;
2881: uint8_t *s = (uint8_t *)(void *)&sv;
2882: uint8_t *d = (uint8_t *)(void *)&rv;
2883: d[0] = s[1];
2884: d[1] = s[0];
2885: return rv;
2886: }
2887:
2888: /*
2889: * swap an int
2890: */
2891: private uint32_t
2892: swap4(uint32_t sv)
2893: {
2894: uint32_t rv;
2895: uint8_t *s = (uint8_t *)(void *)&sv;
2896: uint8_t *d = (uint8_t *)(void *)&rv;
2897: d[0] = s[3];
2898: d[1] = s[2];
2899: d[2] = s[1];
2900: d[3] = s[0];
2901: return rv;
2902: }
2903:
2904: /*
2905: * swap a quad
2906: */
2907: private uint64_t
2908: swap8(uint64_t sv)
2909: {
2910: uint64_t rv;
2911: uint8_t *s = (uint8_t *)(void *)&sv;
2912: uint8_t *d = (uint8_t *)(void *)&rv;
2913: #if 0
2914: d[0] = s[3];
2915: d[1] = s[2];
2916: d[2] = s[1];
2917: d[3] = s[0];
2918: d[4] = s[7];
2919: d[5] = s[6];
2920: d[6] = s[5];
2921: d[7] = s[4];
2922: #else
2923: d[0] = s[7];
2924: d[1] = s[6];
2925: d[2] = s[5];
2926: d[3] = s[4];
2927: d[4] = s[3];
2928: d[5] = s[2];
2929: d[6] = s[1];
2930: d[7] = s[0];
2931: #endif
2932: return rv;
2933: }
2934:
2935: /*
2936: * byteswap a single magic entry
2937: */
2938: private void
2939: bs1(struct magic *m)
2940: {
2941: m->cont_level = swap2(m->cont_level);
2942: m->offset = swap4((uint32_t)m->offset);
2943: m->in_offset = swap4((uint32_t)m->in_offset);
2944: m->lineno = swap4((uint32_t)m->lineno);
2945: if (IS_LIBMAGIC_STRING(m->type)) {
2946: m->str_range = swap4(m->str_range);
2947: m->str_flags = swap4(m->str_flags);
2948: }
2949: else {
2950: m->value.q = swap8(m->value.q);
2951: m->num_mask = swap8(m->num_mask);
2952: }
2953: }
1.1.1.2 misho 2954:
2955: protected size_t
2956: file_pstring_length_size(const struct magic *m)
2957: {
2958: switch (m->str_flags & PSTRING_LEN) {
2959: case PSTRING_1_LE:
2960: return 1;
2961: case PSTRING_2_LE:
2962: case PSTRING_2_BE:
2963: return 2;
2964: case PSTRING_4_LE:
2965: case PSTRING_4_BE:
2966: return 4;
2967: default:
2968: abort(); /* Impossible */
2969: return 1;
2970: }
2971: }
2972: protected size_t
2973: file_pstring_get_length(const struct magic *m, const char *s)
2974: {
2975: size_t len = 0;
2976:
2977: switch (m->str_flags & PSTRING_LEN) {
2978: case PSTRING_1_LE:
2979: len = *s;
2980: break;
2981: case PSTRING_2_LE:
2982: len = (s[1] << 8) | s[0];
2983: break;
2984: case PSTRING_2_BE:
2985: len = (s[0] << 8) | s[1];
2986: break;
2987: case PSTRING_4_LE:
2988: len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
2989: break;
2990: case PSTRING_4_BE:
2991: len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
2992: break;
2993: default:
2994: abort(); /* Impossible */
2995: }
2996:
2997: if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
2998: len -= file_pstring_length_size(m);
2999:
3000: return len;
3001: }
1.1.1.3 misho 3002:
3003: protected int
3004: file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3005: {
3006: uint32_t i, j;
3007: struct mlist *mlist, *ml;
3008:
3009: mlist = ms->mlist[1];
3010:
3011: for (ml = mlist->next; ml != mlist; ml = ml->next) {
3012: struct magic *ma = ml->magic;
3013: uint32_t nma = ml->nmagic;
3014: for (i = 0; i < nma; i++) {
3015: if (ma[i].type != FILE_NAME)
3016: continue;
3017: if (strcmp(ma[i].value.s, name) == 0) {
3018: v->magic = &ma[i];
3019: for (j = i + 1; j < nma; j++)
3020: if (ma[j].cont_level == 0)
3021: break;
3022: v->nmagic = j - i;
3023: return 0;
3024: }
3025: }
3026: }
3027: return -1;
3028: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>