Annotation of embedaddon/php/ext/fileinfo/libmagic/apprentice.c, revision 1.1.1.3
1.1 misho 1: /*
2: * Copyright (c) Ian F. Darwin 1986-1995.
3: * Software written by Ian F. Darwin and others;
4: * maintained 1995-present by Christos Zoulas and others.
5: *
6: * Redistribution and use in source and binary forms, with or without
7: * modification, are permitted provided that the following conditions
8: * are met:
9: * 1. Redistributions of source code must retain the above copyright
10: * notice immediately at the beginning of the file, without modification,
11: * this list of conditions, and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26: * SUCH DAMAGE.
27: */
28: /*
29: * apprentice - make one pass through /etc/magic, learning its secrets.
30: */
31:
32: #include "php.h"
33:
34: #include "file.h"
35:
36: #ifndef lint
1.1.1.3 ! misho 37: FILE_RCSID("@(#)$File: apprentice.c,v 1.191 2013/02/26 21:02:48 christos Exp $")
1.1 misho 38: #endif /* lint */
39:
40: #include "magic.h"
41: #include "patchlevel.h"
42: #include <stdlib.h>
43:
44: #if defined(__hpux) && !defined(HAVE_STRTOULL)
45: #if SIZEOF_LONG == 8
46: # define strtoull strtoul
47: #else
48: # define strtoull __strtoull
49: #endif
50: #endif
51:
52: #ifdef PHP_WIN32
53: #include "win32/unistd.h"
54: #if _MSC_VER <= 1300
55: # include "win32/php_strtoi64.h"
56: #endif
57: #define strtoull _strtoui64
58: #else
59: #include <unistd.h>
60: #endif
61: #include <string.h>
62: #include <assert.h>
63: #include <ctype.h>
64: #include <fcntl.h>
65:
66: #define EATAB {while (isascii((unsigned char) *l) && \
67: isspace((unsigned char) *l)) ++l;}
68: #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
69: tolower((unsigned char) (l)) : (l))
70: /*
71: * Work around a bug in headers on Digital Unix.
72: * At least confirmed for: OSF1 V4.0 878
73: */
74: #if defined(__osf__) && defined(__DECC)
75: #ifdef MAP_FAILED
76: #undef MAP_FAILED
77: #endif
78: #endif
79:
80: #ifndef MAP_FAILED
81: #define MAP_FAILED (void *) -1
82: #endif
83:
84: #ifndef MAP_FILE
85: #define MAP_FILE 0
86: #endif
87:
1.1.1.3 ! misho 88: #define ALLOC_CHUNK (size_t)10
! 89: #define ALLOC_INCR (size_t)200
! 90:
1.1 misho 91: struct magic_entry {
92: struct magic *mp;
93: uint32_t cont_count;
94: uint32_t max_count;
95: };
96:
1.1.1.3 ! misho 97: struct magic_map {
! 98: void *p;
! 99: size_t len;
! 100: struct magic *magic[MAGIC_SETS];
! 101: uint32_t nmagic[MAGIC_SETS];
! 102: };
! 103:
1.1 misho 104: int file_formats[FILE_NAMES_SIZE];
105: const size_t file_nformats = FILE_NAMES_SIZE;
106: const char *file_names[FILE_NAMES_SIZE];
107: const size_t file_nnames = FILE_NAMES_SIZE;
108:
109: private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
110: private int hextoint(int);
111: private const char *getstr(struct magic_set *, struct magic *, const char *,
112: int);
1.1.1.3 ! misho 113: private int parse(struct magic_set *, struct magic_entry *, const char *,
! 114: size_t, int);
1.1 misho 115: private void eatsize(const char **);
1.1.1.3 ! misho 116: private int apprentice_1(struct magic_set *, const char *, int);
1.1 misho 117: private size_t apprentice_magic_strength(const struct magic *);
118: private int apprentice_sort(const void *, const void *);
1.1.1.2 misho 119: private void apprentice_list(struct mlist *, int );
1.1.1.3 ! misho 120: private struct magic_map *apprentice_load(struct magic_set *,
1.1 misho 121: const char *, int);
1.1.1.3 ! misho 122: private struct mlist *mlist_alloc(void);
! 123: private void mlist_free(struct mlist *);
1.1 misho 124: private void byteswap(struct magic *, uint32_t);
125: private void bs1(struct magic *);
126: private uint16_t swap2(uint16_t);
127: private uint32_t swap4(uint32_t);
128: private uint64_t swap8(uint64_t);
129: private char *mkdbname(struct magic_set *, const char *, int);
1.1.1.3 ! misho 130: private struct magic_map *apprentice_map(struct magic_set *, const char *);
! 131: private void apprentice_unmap(struct magic_map *);
! 132: private int apprentice_compile(struct magic_set *, struct magic_map *,
1.1 misho 133: const char *);
134: private int check_format_type(const char *, int);
135: private int check_format(struct magic_set *, struct magic *);
136: private int get_op(char);
137: private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
138: private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
139: private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
140:
1.1.1.3 ! misho 141:
! 142: private size_t maxmagic[MAGIC_SETS] = { 0 };
1.1 misho 143: private size_t magicsize = sizeof(struct magic);
144:
145: private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
1.1.1.3 ! misho 146:
1.1 misho 147: private struct {
148: const char *name;
149: size_t len;
150: int (*fun)(struct magic_set *, struct magic_entry *, const char *);
151: } bang[] = {
152: #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
153: DECLARE_FIELD(mime),
154: DECLARE_FIELD(apple),
155: DECLARE_FIELD(strength),
156: #undef DECLARE_FIELD
157: { NULL, 0, NULL }
158: };
159:
160: #include "../data_file.c"
161:
1.1.1.3 ! misho 162: struct type_tbl_s {
1.1 misho 163: const char name[16];
164: const size_t len;
165: const int type;
166: const int format;
1.1.1.3 ! misho 167: };
! 168:
! 169: /*
! 170: * XXX - the actual Single UNIX Specification says that "long" means "long",
! 171: * as in the C data type, but we treat it as meaning "4-byte integer".
! 172: * Given that the OS X version of file 5.04 did the same, I guess that passes
! 173: * the actual test; having "long" be dependent on how big a "long" is on
! 174: * the machine running "file" is silly.
! 175: */
! 176: static const struct type_tbl_s type_tbl[] = {
1.1 misho 177: # define XX(s) s, (sizeof(s) - 1)
178: # define XX_NULL "", 0
1.1.1.3 ! misho 179: { XX("invalid"), FILE_INVALID, FILE_FMT_NONE },
1.1 misho 180: { XX("byte"), FILE_BYTE, FILE_FMT_NUM },
181: { XX("short"), FILE_SHORT, FILE_FMT_NUM },
182: { XX("default"), FILE_DEFAULT, FILE_FMT_STR },
183: { XX("long"), FILE_LONG, FILE_FMT_NUM },
184: { XX("string"), FILE_STRING, FILE_FMT_STR },
185: { XX("date"), FILE_DATE, FILE_FMT_STR },
186: { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM },
187: { XX("belong"), FILE_BELONG, FILE_FMT_NUM },
188: { XX("bedate"), FILE_BEDATE, FILE_FMT_STR },
189: { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM },
190: { XX("lelong"), FILE_LELONG, FILE_FMT_NUM },
191: { XX("ledate"), FILE_LEDATE, FILE_FMT_STR },
192: { XX("pstring"), FILE_PSTRING, FILE_FMT_STR },
193: { XX("ldate"), FILE_LDATE, FILE_FMT_STR },
194: { XX("beldate"), FILE_BELDATE, FILE_FMT_STR },
195: { XX("leldate"), FILE_LELDATE, FILE_FMT_STR },
196: { XX("regex"), FILE_REGEX, FILE_FMT_STR },
197: { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR },
198: { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR },
199: { XX("search"), FILE_SEARCH, FILE_FMT_STR },
200: { XX("medate"), FILE_MEDATE, FILE_FMT_STR },
201: { XX("meldate"), FILE_MELDATE, FILE_FMT_STR },
202: { XX("melong"), FILE_MELONG, FILE_FMT_NUM },
203: { XX("quad"), FILE_QUAD, FILE_FMT_QUAD },
204: { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD },
205: { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD },
206: { XX("qdate"), FILE_QDATE, FILE_FMT_STR },
207: { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR },
208: { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR },
209: { XX("qldate"), FILE_QLDATE, FILE_FMT_STR },
210: { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR },
211: { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR },
212: { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT },
213: { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT },
214: { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT },
215: { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE },
216: { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE },
217: { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE },
218: { XX("leid3"), FILE_LEID3, FILE_FMT_NUM },
219: { XX("beid3"), FILE_BEID3, FILE_FMT_NUM },
1.1.1.3 ! misho 220: { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM },
! 221: { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR },
! 222: { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR },
! 223: { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR },
! 224: { XX("name"), FILE_NAME, FILE_FMT_NONE },
! 225: { XX("use"), FILE_USE, FILE_FMT_NONE },
! 226: { XX_NULL, FILE_INVALID, FILE_FMT_NONE },
! 227: };
! 228:
! 229: /*
! 230: * These are not types, and cannot be preceded by "u" to make them
! 231: * unsigned.
! 232: */
! 233: static const struct type_tbl_s special_tbl[] = {
! 234: { XX("name"), FILE_NAME, FILE_FMT_STR },
! 235: { XX("use"), FILE_USE, FILE_FMT_STR },
1.1 misho 236: { XX_NULL, FILE_INVALID, FILE_FMT_NONE },
1.1.1.3 ! misho 237: };
1.1 misho 238: # undef XX
239: # undef XX_NULL
240:
241: #ifndef S_ISDIR
242: #define S_ISDIR(mode) ((mode) & _S_IFDIR)
243: #endif
244:
245: private int
1.1.1.3 ! misho 246: get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
1.1 misho 247: {
248: const struct type_tbl_s *p;
249:
1.1.1.3 ! misho 250: for (p = tbl; p->len; p++) {
1.1 misho 251: if (strncmp(l, p->name, p->len) == 0) {
252: if (t)
253: *t = l + p->len;
254: break;
255: }
256: }
257: return p->type;
258: }
259:
1.1.1.3 ! misho 260: private int
! 261: get_standard_integer_type(const char *l, const char **t)
! 262: {
! 263: int type;
! 264:
! 265: if (isalpha((unsigned char)l[1])) {
! 266: switch (l[1]) {
! 267: case 'C':
! 268: /* "dC" and "uC" */
! 269: type = FILE_BYTE;
! 270: break;
! 271: case 'S':
! 272: /* "dS" and "uS" */
! 273: type = FILE_SHORT;
! 274: break;
! 275: case 'I':
! 276: case 'L':
! 277: /*
! 278: * "dI", "dL", "uI", and "uL".
! 279: *
! 280: * XXX - the actual Single UNIX Specification says
! 281: * that "L" means "long", as in the C data type,
! 282: * but we treat it as meaning "4-byte integer".
! 283: * Given that the OS X version of file 5.04 did
! 284: * the same, I guess that passes the actual SUS
! 285: * validation suite; having "dL" be dependent on
! 286: * how big a "long" is on the machine running
! 287: * "file" is silly.
! 288: */
! 289: type = FILE_LONG;
! 290: break;
! 291: case 'Q':
! 292: /* "dQ" and "uQ" */
! 293: type = FILE_QUAD;
! 294: break;
! 295: default:
! 296: /* "d{anything else}", "u{anything else}" */
! 297: return FILE_INVALID;
! 298: }
! 299: l += 2;
! 300: } else if (isdigit((unsigned char)l[1])) {
! 301: /*
! 302: * "d{num}" and "u{num}"; we only support {num} values
! 303: * of 1, 2, 4, and 8 - the Single UNIX Specification
! 304: * doesn't say anything about whether arbitrary
! 305: * values should be supported, but both the Solaris 10
! 306: * and OS X Mountain Lion versions of file passed the
! 307: * Single UNIX Specification validation suite, and
! 308: * neither of them support values bigger than 8 or
! 309: * non-power-of-2 values.
! 310: */
! 311: if (isdigit((unsigned char)l[2])) {
! 312: /* Multi-digit, so > 9 */
! 313: return FILE_INVALID;
! 314: }
! 315: switch (l[1]) {
! 316: case '1':
! 317: type = FILE_BYTE;
! 318: break;
! 319: case '2':
! 320: type = FILE_SHORT;
! 321: break;
! 322: case '4':
! 323: type = FILE_LONG;
! 324: break;
! 325: case '8':
! 326: type = FILE_QUAD;
! 327: break;
! 328: default:
! 329: /* XXX - what about 3, 5, 6, or 7? */
! 330: return FILE_INVALID;
! 331: }
! 332: l += 2;
! 333: } else {
! 334: /*
! 335: * "d" or "u" by itself.
! 336: */
! 337: type = FILE_LONG;
! 338: ++l;
! 339: }
! 340: if (t)
! 341: *t = l;
! 342: return type;
! 343: }
! 344:
1.1 misho 345: private void
346: init_file_tables(void)
347: {
348: static int done = 0;
349: const struct type_tbl_s *p;
350:
351: if (done)
352: return;
353: done++;
354:
355: for (p = type_tbl; p->len; p++) {
356: assert(p->type < FILE_NAMES_SIZE);
357: file_names[p->type] = p->name;
358: file_formats[p->type] = p->format;
359: }
1.1.1.3 ! misho 360: assert(p - type_tbl == FILE_NAMES_SIZE);
! 361: }
! 362:
! 363: private int
! 364: add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
! 365: {
! 366: struct mlist *ml;
! 367:
! 368: if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
! 369: return -1;
! 370:
! 371: ml->map = idx == 0 ? map : NULL;
! 372: ml->magic = map->magic[idx];
! 373: ml->nmagic = map->nmagic[idx];
! 374:
! 375: mlp->prev->next = ml;
! 376: ml->prev = mlp->prev;
! 377: ml->next = mlp;
! 378: mlp->prev = ml;
! 379: return 0;
1.1 misho 380: }
381:
382: /*
383: * Handle one file or directory.
384: */
385: private int
1.1.1.3 ! misho 386: apprentice_1(struct magic_set *ms, const char *fn, int action)
1.1 misho 387: {
388: struct mlist *ml;
1.1.1.3 ! misho 389: struct magic_map *map;
! 390: size_t i;
1.1 misho 391:
392: if (magicsize != FILE_MAGICSIZE) {
393: file_error(ms, 0, "magic element size %lu != %lu",
1.1.1.3 ! misho 394: (unsigned long)sizeof(*map->magic[0]),
1.1 misho 395: (unsigned long)FILE_MAGICSIZE);
396: return -1;
397: }
398:
399: if (action == FILE_COMPILE) {
1.1.1.3 ! misho 400: map = apprentice_load(ms, fn, action);
! 401: if (map == NULL)
1.1 misho 402: return -1;
1.1.1.3 ! misho 403: return apprentice_compile(ms, map, fn);
1.1 misho 404: }
405:
1.1.1.3 ! misho 406: map = apprentice_map(ms, fn);
! 407: if (map == NULL) {
1.1 misho 408: if (fn) {
409: if (ms->flags & MAGIC_CHECK)
410: file_magwarn(ms, "using regular magic file `%s'", fn);
1.1.1.3 ! misho 411: map = apprentice_load(ms, fn, action);
1.1 misho 412: }
1.1.1.3 ! misho 413: if (map == NULL)
1.1 misho 414: return -1;
415: }
416:
1.1.1.3 ! misho 417: for (i = 0; i < MAGIC_SETS; i++) {
! 418: if (add_mlist(ms->mlist[i], map, i) == -1) {
! 419: file_oomem(ms, sizeof(*ml));
! 420: apprentice_unmap(map);
! 421: return -1;
! 422: }
1.1 misho 423: }
424:
1.1.1.2 misho 425: if (action == FILE_LIST) {
1.1.1.3 ! misho 426: for (i = 0; i < MAGIC_SETS; i++) {
! 427: printf("Set %zu:\nBinary patterns:\n", i);
! 428: apprentice_list(ms->mlist[i], BINTEST);
! 429: printf("Text patterns:\n");
! 430: apprentice_list(ms->mlist[i], TEXTTEST);
! 431: }
1.1.1.2 misho 432: }
433:
1.1 misho 434: return 0;
435: }
436:
437: protected void
1.1.1.3 ! misho 438: file_ms_free(struct magic_set *ms)
1.1 misho 439: {
1.1.1.3 ! misho 440: size_t i;
! 441: if (ms == NULL)
1.1 misho 442: return;
1.1.1.3 ! misho 443: for (i = 0; i < MAGIC_SETS; i++)
! 444: mlist_free(ms->mlist[i]);
! 445: if (ms->o.pbuf) {
! 446: efree(ms->o.pbuf);
! 447: }
! 448: if (ms->o.buf) {
! 449: efree(ms->o.buf);
! 450: }
! 451: if (ms->c.li) {
! 452: efree(ms->c.li);
! 453: }
! 454: efree(ms);
! 455: }
1.1 misho 456:
1.1.1.3 ! misho 457: protected struct magic_set *
! 458: file_ms_alloc(int flags)
! 459: {
! 460: struct magic_set *ms;
! 461: size_t i, len;
1.1 misho 462:
1.1.1.3 ! misho 463: if ((ms = CAST(struct magic_set *, ecalloc((size_t)1,
! 464: sizeof(struct magic_set)))) == NULL)
! 465: return NULL;
1.1 misho 466:
1.1.1.3 ! misho 467: if (magic_setflags(ms, flags) == -1) {
! 468: errno = EINVAL;
! 469: goto free;
! 470: }
! 471:
! 472: ms->o.buf = ms->o.pbuf = NULL;
! 473: len = (ms->c.len = 10) * sizeof(*ms->c.li);
! 474:
! 475: if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
! 476: goto free;
! 477:
! 478: ms->event_flags = 0;
! 479: ms->error = -1;
! 480: for (i = 0; i < MAGIC_SETS; i++)
! 481: ms->mlist[i] = NULL;
! 482: ms->file = "unknown";
! 483: ms->line = 0;
! 484: return ms;
! 485: free:
! 486: efree(ms);
! 487: return NULL;
! 488: }
! 489:
! 490: private void
! 491: apprentice_unmap(struct magic_map *map)
! 492: {
! 493: if (map == NULL)
! 494: return;
! 495: if (map->p != php_magic_database) {
! 496: int j;
! 497: for (j = 0; j < MAGIC_SETS; j++) {
! 498: if (map->magic[j])
! 499: efree(map->magic[j]);
! 500: }
! 501: if (map->p != NULL) {
! 502: efree(map->p);
! 503: }
! 504: }
! 505: efree(map);
! 506: }
! 507:
! 508: private struct mlist *
! 509: mlist_alloc(void)
! 510: {
! 511: struct mlist *mlist;
! 512: if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
! 513: return NULL;
1.1 misho 514: }
1.1.1.3 ! misho 515: mlist->next = mlist->prev = mlist;
! 516: return mlist;
! 517: }
! 518:
! 519: private void
! 520: mlist_free(struct mlist *mlist)
! 521: {
! 522: struct mlist *ml;
! 523:
! 524: if (mlist == NULL)
! 525: return;
! 526:
! 527: for (ml = mlist->next; ml != mlist;) {
! 528: struct mlist *next = ml->next;
! 529: if (ml->map)
! 530: apprentice_unmap(ml->map);
! 531: efree(ml);
! 532: ml = next;
! 533: }
! 534: efree(ml);
1.1 misho 535: }
536:
537: /* const char *fn: list of magic files and directories */
1.1.1.3 ! misho 538: protected int
1.1 misho 539: file_apprentice(struct magic_set *ms, const char *fn, int action)
540: {
541: char *p, *mfn;
542: int file_err, errs = -1;
1.1.1.3 ! misho 543: size_t i;
1.1.1.2 misho 544: /* XXX disabling default magic loading so the compiled in data is used */
545: #if 0
546: if ((fn = magic_getpath(fn, action)) == NULL)
1.1.1.3 ! misho 547: return -1;
1.1.1.2 misho 548: #endif
1.1 misho 549:
550: init_file_tables();
551:
552: if (fn == NULL)
553: fn = getenv("MAGIC");
554: if (fn == NULL) {
1.1.1.3 ! misho 555: for (i = 0; i < MAGIC_SETS; i++) {
! 556: mlist_free(ms->mlist[i]);
! 557: if ((ms->mlist[i] = mlist_alloc()) == NULL) {
! 558: file_oomem(ms, sizeof(*ms->mlist[i]));
! 559: return -1;
! 560: }
! 561: }
! 562: return apprentice_1(ms, fn, action);
1.1 misho 563: }
564:
1.1.1.3 ! misho 565: if ((mfn = estrdup(fn)) == NULL) {
! 566: file_oomem(ms, strlen(fn));
! 567: return -1;
! 568: }
1.1 misho 569:
1.1.1.3 ! misho 570: for (i = 0; i < MAGIC_SETS; i++) {
! 571: mlist_free(ms->mlist[i]);
! 572: if ((ms->mlist[i] = mlist_alloc()) == NULL) {
! 573: file_oomem(ms, sizeof(*ms->mlist[i]));
! 574: if (i != 0) {
! 575: --i;
! 576: do
! 577: mlist_free(ms->mlist[i]);
! 578: while (i != 0);
! 579: }
! 580: efree(mfn);
! 581: return -1;
! 582: }
! 583: }
! 584: fn = mfn;
1.1 misho 585:
586: while (fn) {
587: p = strchr(fn, PATHSEP);
588: if (p)
589: *p++ = '\0';
590: if (*fn == '\0')
591: break;
1.1.1.3 ! misho 592: file_err = apprentice_1(ms, fn, action);
1.1 misho 593: errs = MAX(errs, file_err);
594: fn = p;
595: }
1.1.1.3 ! misho 596:
! 597: efree(mfn);
! 598:
1.1 misho 599: if (errs == -1) {
1.1.1.3 ! misho 600: for (i = 0; i < MAGIC_SETS; i++) {
! 601: mlist_free(ms->mlist[i]);
! 602: ms->mlist[i] = NULL;
! 603: }
! 604: file_error(ms, 0, "could not find any valid magic files!");
! 605: return -1;
! 606: }
! 607:
! 608: if (action == FILE_LOAD)
! 609: return 0;
! 610:
! 611: for (i = 0; i < MAGIC_SETS; i++) {
! 612: mlist_free(ms->mlist[i]);
! 613: ms->mlist[i] = NULL;
! 614: }
! 615:
! 616: switch (action) {
! 617: case FILE_COMPILE:
! 618: case FILE_CHECK:
! 619: case FILE_LIST:
! 620: return 0;
! 621: default:
! 622: file_error(ms, 0, "Invalid action %d", action);
! 623: return -1;
1.1 misho 624: }
625: }
626:
627: /*
628: * Get weight of this magic entry, for sorting purposes.
629: */
630: private size_t
631: apprentice_magic_strength(const struct magic *m)
632: {
633: #define MULT 10
634: size_t val = 2 * MULT; /* baseline strength */
635:
636: switch (m->type) {
637: case FILE_DEFAULT: /* make sure this sorts last */
638: if (m->factor_op != FILE_FACTOR_OP_NONE)
639: abort();
640: return 0;
641:
642: case FILE_BYTE:
643: val += 1 * MULT;
644: break;
645:
646: case FILE_SHORT:
647: case FILE_LESHORT:
648: case FILE_BESHORT:
649: val += 2 * MULT;
650: break;
651:
652: case FILE_LONG:
653: case FILE_LELONG:
654: case FILE_BELONG:
655: case FILE_MELONG:
656: val += 4 * MULT;
657: break;
658:
659: case FILE_PSTRING:
660: case FILE_STRING:
661: val += m->vallen * MULT;
662: break;
663:
664: case FILE_BESTRING16:
665: case FILE_LESTRING16:
666: val += m->vallen * MULT / 2;
667: break;
668:
669: case FILE_SEARCH:
670: case FILE_REGEX:
671: val += m->vallen * MAX(MULT / m->vallen, 1);
672: break;
673:
674: case FILE_DATE:
675: case FILE_LEDATE:
676: case FILE_BEDATE:
677: case FILE_MEDATE:
678: case FILE_LDATE:
679: case FILE_LELDATE:
680: case FILE_BELDATE:
681: case FILE_MELDATE:
682: case FILE_FLOAT:
683: case FILE_BEFLOAT:
684: case FILE_LEFLOAT:
685: val += 4 * MULT;
686: break;
687:
688: case FILE_QUAD:
689: case FILE_BEQUAD:
690: case FILE_LEQUAD:
691: case FILE_QDATE:
692: case FILE_LEQDATE:
693: case FILE_BEQDATE:
694: case FILE_QLDATE:
695: case FILE_LEQLDATE:
696: case FILE_BEQLDATE:
1.1.1.3 ! misho 697: case FILE_QWDATE:
! 698: case FILE_LEQWDATE:
! 699: case FILE_BEQWDATE:
1.1 misho 700: case FILE_DOUBLE:
701: case FILE_BEDOUBLE:
702: case FILE_LEDOUBLE:
703: val += 8 * MULT;
704: break;
705:
1.1.1.3 ! misho 706: case FILE_INDIRECT:
! 707: case FILE_NAME:
! 708: case FILE_USE:
! 709: break;
! 710:
1.1 misho 711: default:
712: val = 0;
713: (void)fprintf(stderr, "Bad type %d\n", m->type);
714: abort();
715: }
716:
717: switch (m->reln) {
718: case 'x': /* matches anything penalize */
719: case '!': /* matches almost anything penalize */
720: val = 0;
721: break;
722:
723: case '=': /* Exact match, prefer */
724: val += MULT;
725: break;
726:
727: case '>':
728: case '<': /* comparison match reduce strength */
729: val -= 2 * MULT;
730: break;
731:
732: case '^':
733: case '&': /* masking bits, we could count them too */
734: val -= MULT;
735: break;
736:
737: default:
738: (void)fprintf(stderr, "Bad relation %c\n", m->reln);
739: abort();
740: }
741:
742: if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */
743: val = 1;
744:
745: switch (m->factor_op) {
746: case FILE_FACTOR_OP_NONE:
747: break;
748: case FILE_FACTOR_OP_PLUS:
749: val += m->factor;
750: break;
751: case FILE_FACTOR_OP_MINUS:
752: val -= m->factor;
753: break;
754: case FILE_FACTOR_OP_TIMES:
755: val *= m->factor;
756: break;
757: case FILE_FACTOR_OP_DIV:
758: val /= m->factor;
759: break;
760: default:
761: abort();
762: }
763:
764: /*
765: * Magic entries with no description get a bonus because they depend
766: * on subsequent magic entries to print something.
767: */
768: if (m->desc[0] == '\0')
769: val++;
770: return val;
771: }
772:
773: /*
774: * Sort callback for sorting entries by "strength" (basically length)
775: */
776: private int
777: apprentice_sort(const void *a, const void *b)
778: {
1.1.1.3 ! misho 779: const struct magic_entry *ma = CAST(const struct magic_entry *, a);
! 780: const struct magic_entry *mb = CAST(const struct magic_entry *, b);
1.1 misho 781: size_t sa = apprentice_magic_strength(ma->mp);
782: size_t sb = apprentice_magic_strength(mb->mp);
783: if (sa == sb)
784: return 0;
785: else if (sa > sb)
786: return -1;
787: else
788: return 1;
789: }
790:
1.1.1.2 misho 791: /*
792: * Shows sorted patterns list in the order which is used for the matching
793: */
794: private void
795: apprentice_list(struct mlist *mlist, int mode)
796: {
797: uint32_t magindex = 0;
798: struct mlist *ml;
799: for (ml = mlist->next; ml != mlist; ml = ml->next) {
800: for (magindex = 0; magindex < ml->nmagic; magindex++) {
801: struct magic *m = &ml->magic[magindex];
802: if ((m->flag & mode) != mode) {
803: /* Skip sub-tests */
804: while (magindex + 1 < ml->nmagic &&
805: ml->magic[magindex + 1].cont_level != 0)
806: ++magindex;
807: continue; /* Skip to next top-level test*/
808: }
809:
810: /*
811: * Try to iterate over the tree until we find item with
812: * description/mimetype.
813: */
814: while (magindex + 1 < ml->nmagic &&
815: ml->magic[magindex + 1].cont_level != 0 &&
816: *ml->magic[magindex].desc == '\0' &&
817: *ml->magic[magindex].mimetype == '\0')
818: magindex++;
819:
820: printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n",
821: apprentice_magic_strength(m),
822: ml->magic[magindex].desc,
823: ml->magic[magindex].mimetype);
824: }
825: }
826: }
827:
1.1 misho 828: private void
829: set_test_type(struct magic *mstart, struct magic *m)
830: {
831: switch (m->type) {
832: case FILE_BYTE:
833: case FILE_SHORT:
834: case FILE_LONG:
835: case FILE_DATE:
836: case FILE_BESHORT:
837: case FILE_BELONG:
838: case FILE_BEDATE:
839: case FILE_LESHORT:
840: case FILE_LELONG:
841: case FILE_LEDATE:
842: case FILE_LDATE:
843: case FILE_BELDATE:
844: case FILE_LELDATE:
845: case FILE_MEDATE:
846: case FILE_MELDATE:
847: case FILE_MELONG:
848: case FILE_QUAD:
849: case FILE_LEQUAD:
850: case FILE_BEQUAD:
851: case FILE_QDATE:
852: case FILE_LEQDATE:
853: case FILE_BEQDATE:
854: case FILE_QLDATE:
855: case FILE_LEQLDATE:
856: case FILE_BEQLDATE:
1.1.1.3 ! misho 857: case FILE_QWDATE:
! 858: case FILE_LEQWDATE:
! 859: case FILE_BEQWDATE:
1.1 misho 860: case FILE_FLOAT:
861: case FILE_BEFLOAT:
862: case FILE_LEFLOAT:
863: case FILE_DOUBLE:
864: case FILE_BEDOUBLE:
865: case FILE_LEDOUBLE:
1.1.1.2 misho 866: mstart->flag |= BINTEST;
867: break;
1.1 misho 868: case FILE_STRING:
869: case FILE_PSTRING:
870: case FILE_BESTRING16:
871: case FILE_LESTRING16:
1.1.1.2 misho 872: /* Allow text overrides */
873: if (mstart->str_flags & STRING_TEXTTEST)
874: mstart->flag |= TEXTTEST;
875: else
876: mstart->flag |= BINTEST;
1.1 misho 877: break;
878: case FILE_REGEX:
879: case FILE_SEARCH:
1.1.1.2 misho 880: /* Check for override */
881: if (mstart->str_flags & STRING_BINTEST)
882: mstart->flag |= BINTEST;
883: if (mstart->str_flags & STRING_TEXTTEST)
884: mstart->flag |= TEXTTEST;
885:
886: if (mstart->flag & (TEXTTEST|BINTEST))
887: break;
888:
1.1 misho 889: /* binary test if pattern is not text */
890: if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
891: NULL) <= 0)
892: mstart->flag |= BINTEST;
1.1.1.2 misho 893: else
894: mstart->flag |= TEXTTEST;
1.1 misho 895: break;
896: case FILE_DEFAULT:
897: /* can't deduce anything; we shouldn't see this at the
898: top level anyway */
899: break;
900: case FILE_INVALID:
901: default:
902: /* invalid search type, but no need to complain here */
903: break;
904: }
905: }
906:
1.1.1.3 ! misho 907: private int
! 908: addentry(struct magic_set *ms, struct magic_entry *me,
! 909: struct magic_entry **mentry, uint32_t *mentrycount)
! 910: {
! 911: size_t i = me->mp->type == FILE_NAME ? 1 : 0;
! 912: if (mentrycount[i] == maxmagic[i]) {
! 913: struct magic_entry *mp;
! 914:
! 915: maxmagic[i] += ALLOC_INCR;
! 916: if ((mp = CAST(struct magic_entry *,
! 917: erealloc(mentry[i], sizeof(*mp) * maxmagic[i]))) ==
! 918: NULL) {
! 919: file_oomem(ms, sizeof(*mp) * maxmagic[i]);
! 920: return -1;
! 921: }
! 922: (void)memset(&mp[mentrycount[i]], 0, sizeof(*mp) *
! 923: ALLOC_INCR);
! 924: mentry[i] = mp;
! 925: }
! 926: mentry[i][mentrycount[i]++] = *me;
! 927: memset(me, 0, sizeof(*me));
! 928: return 0;
! 929: }
! 930:
1.1 misho 931: /*
932: * Load and parse one file.
933: */
934: private void
935: load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1.1.1.3 ! misho 936: struct magic_entry **mentry, uint32_t *mentrycount)
1.1 misho 937: {
938: char buffer[BUFSIZ + 1];
1.1.1.2 misho 939: char *line = NULL;
940: size_t len;
1.1 misho 941: size_t lineno = 0;
1.1.1.3 ! misho 942: struct magic_entry me;
1.1 misho 943:
944: php_stream *stream;
945:
946: TSRMLS_FETCH();
947:
1.1.1.3 ! misho 948: ms->file = fn;
1.1 misho 949: #if PHP_API_VERSION < 20100412
950: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
951: #else
952: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
953: #endif
954:
955: if (stream == NULL) {
956: if (errno != ENOENT)
957: file_error(ms, errno, "cannot read magic file `%s'",
958: fn);
959: (*errs)++;
1.1.1.2 misho 960: return;
961: }
1.1 misho 962:
1.1.1.3 ! misho 963: memset(&me, 0, sizeof(me));
! 964: /* read and parse this file */
1.1.1.2 misho 965: for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
966: if (len == 0) /* null line, garbage, etc */
967: continue;
968: if (line[len - 1] == '\n') {
969: lineno++;
970: line[len - 1] = '\0'; /* delete newline */
971: }
972: switch (line[0]) {
973: case '\0': /* empty, do not parse */
974: case '#': /* comment, do not parse */
975: continue;
976: case '!':
977: if (line[1] == ':') {
1.1 misho 978: size_t i;
979:
980: for (i = 0; bang[i].name != NULL; i++) {
1.1.1.2 misho 981: if ((size_t)(len - 2) > bang[i].len &&
1.1 misho 982: memcmp(bang[i].name, line + 2,
983: bang[i].len) == 0)
984: break;
985: }
986: if (bang[i].name == NULL) {
987: file_error(ms, 0,
988: "Unknown !: entry `%s'", line);
989: (*errs)++;
990: continue;
991: }
1.1.1.3 ! misho 992: if (me.mp == NULL) {
1.1 misho 993: file_error(ms, 0,
994: "No current entry for :!%s type",
995: bang[i].name);
996: (*errs)++;
997: continue;
998: }
1.1.1.3 ! misho 999: if ((*bang[i].fun)(ms, &me,
1.1 misho 1000: line + bang[i].len + 2) != 0) {
1001: (*errs)++;
1002: continue;
1003: }
1004: continue;
1005: }
1.1.1.2 misho 1006: /*FALLTHROUGH*/
1007: default:
1.1.1.3 ! misho 1008: again:
! 1009: switch (parse(ms, &me, line, lineno, action)) {
! 1010: case 0:
! 1011: continue;
! 1012: case 1:
! 1013: (void)addentry(ms, &me, mentry, mentrycount);
! 1014: goto again;
! 1015: default:
1.1 misho 1016: (*errs)++;
1.1.1.2 misho 1017: break;
1.1 misho 1018: }
1019: }
1.1.1.3 ! misho 1020: }
! 1021: if (me.mp)
! 1022: (void)addentry(ms, &me, mentry, mentrycount);
1.1.1.2 misho 1023: php_stream_close(stream);
1.1 misho 1024: }
1025:
1026: /*
1027: * parse a file or directory of files
1028: * const char *fn: name of magic file or directory
1029: */
1030: private int
1.1.1.2 misho 1031: cmpstrp(const void *p1, const void *p2)
1032: {
1033: return strcmp(*(char *const *)p1, *(char *const *)p2);
1034: }
1035:
1.1.1.3 ! misho 1036:
! 1037: private uint32_t
! 1038: set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
! 1039: uint32_t starttest)
! 1040: {
! 1041: static const char text[] = "text";
! 1042: static const char binary[] = "binary";
! 1043: static const size_t len = sizeof(text);
! 1044:
! 1045: uint32_t i = starttest;
! 1046:
! 1047: do {
! 1048: set_test_type(me[starttest].mp, me[i].mp);
! 1049: if ((ms->flags & MAGIC_DEBUG) == 0)
! 1050: continue;
! 1051: (void)fprintf(stderr, "%s%s%s: %s\n",
! 1052: me[i].mp->mimetype,
! 1053: me[i].mp->mimetype[0] == '\0' ? "" : "; ",
! 1054: me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
! 1055: me[i].mp->flag & BINTEST ? binary : text);
! 1056: if (me[i].mp->flag & BINTEST) {
! 1057: char *p = strstr(me[i].mp->desc, text);
! 1058: if (p && (p == me[i].mp->desc ||
! 1059: isspace((unsigned char)p[-1])) &&
! 1060: (p + len - me[i].mp->desc == MAXstring
! 1061: || (p[len] == '\0' ||
! 1062: isspace((unsigned char)p[len]))))
! 1063: (void)fprintf(stderr, "*** Possible "
! 1064: "binary test for text type\n");
! 1065: }
! 1066: } while (++i < nme && me[i].mp->cont_level != 0);
! 1067: return i;
! 1068: }
! 1069:
! 1070: private void
! 1071: set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
! 1072: {
! 1073: uint32_t i;
! 1074: for (i = 0; i < nme; i++) {
! 1075: if (me[i].mp->cont_level == 0 &&
! 1076: me[i].mp->type == FILE_DEFAULT) {
! 1077: while (++i < nme)
! 1078: if (me[i].mp->cont_level == 0)
! 1079: break;
! 1080: if (i != nme) {
! 1081: /* XXX - Ugh! */
! 1082: ms->line = me[i].mp->lineno;
! 1083: file_magwarn(ms,
! 1084: "level 0 \"default\" did not sort last");
! 1085: }
! 1086: return;
! 1087: }
! 1088: }
! 1089: }
! 1090:
1.1.1.2 misho 1091: private int
1.1.1.3 ! misho 1092: coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
! 1093: struct magic **ma, uint32_t *nma)
1.1 misho 1094: {
1.1.1.3 ! misho 1095: uint32_t i, mentrycount = 0;
! 1096: size_t slen;
! 1097:
! 1098: for (i = 0; i < nme; i++)
! 1099: mentrycount += me[i].cont_count;
! 1100:
! 1101: slen = sizeof(**ma) * mentrycount;
! 1102: if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
! 1103: file_oomem(ms, slen);
! 1104: return -1;
! 1105: }
! 1106:
! 1107: mentrycount = 0;
! 1108: for (i = 0; i < nme; i++) {
! 1109: (void)memcpy(*ma + mentrycount, me[i].mp,
! 1110: me[i].cont_count * sizeof(**ma));
! 1111: mentrycount += me[i].cont_count;
! 1112: }
! 1113: *nma = mentrycount;
! 1114: return 0;
! 1115: }
! 1116:
! 1117: private void
! 1118: magic_entry_free(struct magic_entry *me, uint32_t nme)
! 1119: {
! 1120: uint32_t i;
! 1121: if (me == NULL)
! 1122: return;
! 1123: for (i = 0; i < nme; i++)
! 1124: efree(me[i].mp);
! 1125: efree(me);
! 1126: }
! 1127:
! 1128: private struct magic_map *
! 1129: apprentice_load(struct magic_set *ms, const char *fn, int action)
! 1130: {
! 1131: int errs = 0;
! 1132: struct magic_entry *mentry[MAGIC_SETS] = { NULL };
! 1133: uint32_t mentrycount[MAGIC_SETS] = { 0 };
! 1134: uint32_t i, j;
1.1.1.2 misho 1135: size_t files = 0, maxfiles = 0;
1.1.1.3 ! misho 1136: char **filearr = NULL;
1.1 misho 1137: struct stat st;
1.1.1.3 ! misho 1138: struct magic_map *map;
! 1139: php_stream *dir;
! 1140: php_stream_dirent d;
! 1141:
! 1142: TSRMLS_FETCH();
1.1 misho 1143:
1144: ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */
1145:
1.1.1.3 ! misho 1146: if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
! 1147: file_oomem(ms, sizeof(*map));
! 1148: return NULL;
! 1149: }
1.1 misho 1150:
1151: /* print silly verbose header for USG compat. */
1152: if (action == FILE_CHECK)
1153: (void)fprintf(stderr, "%s\n", usg_hdr);
1154:
1.1.1.3 ! misho 1155: {
! 1156: /* XXX the maxmagic has to be reset each time we load some new magic file.
! 1157: Where file commando is used it's not essential as the CLI process
! 1158: ends, multiple loading within the same process wouldn't work. */
! 1159: int k;
! 1160: for (k = 0; k < MAGIC_SETS; k++) {
! 1161: maxmagic[k] = 0;
! 1162: }
! 1163: }
! 1164:
1.1 misho 1165: /* load directory or file */
1.1.1.3 ! misho 1166: /* FIXME: Read file names and sort them to prevent
! 1167: non-determinism. See Debian bug #488562. */
1.1 misho 1168: if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1.1.1.3 ! misho 1169: int mflen;
! 1170: char mfn[MAXPATHLEN];
! 1171:
! 1172: dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1.1.1.2 misho 1173: if (!dir) {
1174: errs++;
1175: goto out;
1176: }
1.1.1.3 ! misho 1177: while (php_stream_readdir(dir, &d)) {
! 1178: if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1.1.1.2 misho 1179: file_oomem(ms,
1.1.1.3 ! misho 1180: strlen(fn) + strlen(d.d_name) + 2);
1.1.1.2 misho 1181: errs++;
1.1.1.3 ! misho 1182: php_stream_closedir(dir);
1.1.1.2 misho 1183: goto out;
1184: }
1185: if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1186: continue;
1187: }
1188: if (files >= maxfiles) {
1189: size_t mlen;
1190: maxfiles = (maxfiles + 1) * 2;
1191: mlen = maxfiles * sizeof(*filearr);
1192: if ((filearr = CAST(char **,
1.1.1.3 ! misho 1193: erealloc(filearr, mlen))) == NULL) {
1.1.1.2 misho 1194: file_oomem(ms, mlen);
1.1.1.3 ! misho 1195: php_stream_closedir(dir);
1.1.1.2 misho 1196: errs++;
1197: goto out;
1.1 misho 1198: }
1199: }
1.1.1.3 ! misho 1200: filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1.1.1.2 misho 1201: }
1.1.1.3 ! misho 1202: php_stream_closedir(dir);
1.1.1.2 misho 1203: qsort(filearr, files, sizeof(*filearr), cmpstrp);
1204: for (i = 0; i < files; i++) {
1.1.1.3 ! misho 1205: load_1(ms, action, filearr[i], &errs, mentry,
! 1206: mentrycount);
! 1207: efree(filearr[i]);
1.1.1.2 misho 1208: }
1.1.1.3 ! misho 1209: efree(filearr);
1.1 misho 1210: } else
1.1.1.3 ! misho 1211: load_1(ms, action, fn, &errs, mentry, mentrycount);
1.1 misho 1212: if (errs)
1213: goto out;
1214:
1.1.1.3 ! misho 1215: for (j = 0; j < MAGIC_SETS; j++) {
! 1216: /* Set types of tests */
! 1217: for (i = 0; i < mentrycount[j]; ) {
! 1218: if (mentry[j][i].mp->cont_level != 0) {
! 1219: i++;
1.1 misho 1220: continue;
1221: }
1.1.1.3 ! misho 1222: i = set_text_binary(ms, mentry[j], mentrycount[j], i);
! 1223: }
! 1224: qsort(mentry[j], mentrycount[j], sizeof(*mentry[j]),
! 1225: apprentice_sort);
1.1 misho 1226:
1.1.1.3 ! misho 1227: /*
! 1228: * Make sure that any level 0 "default" line is last
! 1229: * (if one exists).
! 1230: */
! 1231: set_last_default(ms, mentry[j], mentrycount[j]);
1.1 misho 1232:
1.1.1.3 ! misho 1233: /* coalesce per file arrays into a single one */
! 1234: if (coalesce_entries(ms, mentry[j], mentrycount[j],
! 1235: &map->magic[j], &map->nmagic[j]) == -1) {
! 1236: errs++;
! 1237: goto out;
1.1 misho 1238: }
1239: }
1240:
1241: out:
1.1.1.3 ! misho 1242: for (j = 0; j < MAGIC_SETS; j++)
! 1243: magic_entry_free(mentry[j], mentrycount[j]);
! 1244:
1.1 misho 1245: if (errs) {
1.1.1.3 ! misho 1246: for (j = 0; j < MAGIC_SETS; j++) {
! 1247: if (map->magic[j])
! 1248: efree(map->magic[j]);
! 1249: }
! 1250: efree(map);
! 1251: return NULL;
1.1 misho 1252: }
1.1.1.3 ! misho 1253: return map;
1.1 misho 1254: }
1255:
1256: /*
1257: * extend the sign bit if the comparison is to be signed
1258: */
1259: protected uint64_t
1260: file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1261: {
1262: if (!(m->flag & UNSIGNED)) {
1263: switch(m->type) {
1264: /*
1265: * Do not remove the casts below. They are
1266: * vital. When later compared with the data,
1267: * the sign extension must have happened.
1268: */
1269: case FILE_BYTE:
1270: v = (char) v;
1271: break;
1272: case FILE_SHORT:
1273: case FILE_BESHORT:
1274: case FILE_LESHORT:
1275: v = (short) v;
1276: break;
1277: case FILE_DATE:
1278: case FILE_BEDATE:
1279: case FILE_LEDATE:
1280: case FILE_MEDATE:
1281: case FILE_LDATE:
1282: case FILE_BELDATE:
1283: case FILE_LELDATE:
1284: case FILE_MELDATE:
1285: case FILE_LONG:
1286: case FILE_BELONG:
1287: case FILE_LELONG:
1288: case FILE_MELONG:
1289: case FILE_FLOAT:
1290: case FILE_BEFLOAT:
1291: case FILE_LEFLOAT:
1292: v = (int32_t) v;
1293: break;
1294: case FILE_QUAD:
1295: case FILE_BEQUAD:
1296: case FILE_LEQUAD:
1297: case FILE_QDATE:
1298: case FILE_QLDATE:
1.1.1.3 ! misho 1299: case FILE_QWDATE:
1.1 misho 1300: case FILE_BEQDATE:
1301: case FILE_BEQLDATE:
1.1.1.3 ! misho 1302: case FILE_BEQWDATE:
1.1 misho 1303: case FILE_LEQDATE:
1304: case FILE_LEQLDATE:
1.1.1.3 ! misho 1305: case FILE_LEQWDATE:
1.1 misho 1306: case FILE_DOUBLE:
1307: case FILE_BEDOUBLE:
1308: case FILE_LEDOUBLE:
1309: v = (int64_t) v;
1310: break;
1311: case FILE_STRING:
1312: case FILE_PSTRING:
1313: case FILE_BESTRING16:
1314: case FILE_LESTRING16:
1315: case FILE_REGEX:
1316: case FILE_SEARCH:
1317: case FILE_DEFAULT:
1318: case FILE_INDIRECT:
1.1.1.3 ! misho 1319: case FILE_NAME:
! 1320: case FILE_USE:
1.1 misho 1321: break;
1322: default:
1323: if (ms->flags & MAGIC_CHECK)
1324: file_magwarn(ms, "cannot happen: m->type=%d\n",
1325: m->type);
1326: return ~0U;
1327: }
1328: }
1329: return v;
1330: }
1331:
1332: private int
1333: string_modifier_check(struct magic_set *ms, struct magic *m)
1334: {
1335: if ((ms->flags & MAGIC_CHECK) == 0)
1336: return 0;
1337:
1.1.1.2 misho 1338: if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
1339: file_magwarn(ms,
1340: "'/BHhLl' modifiers are only allowed for pascal strings\n");
1341: return -1;
1342: }
1.1 misho 1343: switch (m->type) {
1344: case FILE_BESTRING16:
1345: case FILE_LESTRING16:
1346: if (m->str_flags != 0) {
1347: file_magwarn(ms,
1348: "no modifiers allowed for 16-bit strings\n");
1349: return -1;
1350: }
1351: break;
1352: case FILE_STRING:
1353: case FILE_PSTRING:
1354: if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1355: file_magwarn(ms,
1356: "'/%c' only allowed on regex and search\n",
1357: CHAR_REGEX_OFFSET_START);
1358: return -1;
1359: }
1360: break;
1361: case FILE_SEARCH:
1362: if (m->str_range == 0) {
1363: file_magwarn(ms,
1364: "missing range; defaulting to %d\n",
1365: STRING_DEFAULT_RANGE);
1366: m->str_range = STRING_DEFAULT_RANGE;
1367: return -1;
1368: }
1369: break;
1370: case FILE_REGEX:
1.1.1.2 misho 1371: if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1.1 misho 1372: file_magwarn(ms, "'/%c' not allowed on regex\n",
1.1.1.2 misho 1373: CHAR_COMPACT_WHITESPACE);
1.1 misho 1374: return -1;
1375: }
1.1.1.2 misho 1376: if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1.1 misho 1377: file_magwarn(ms, "'/%c' not allowed on regex\n",
1.1.1.2 misho 1378: CHAR_COMPACT_OPTIONAL_WHITESPACE);
1.1 misho 1379: return -1;
1380: }
1381: break;
1382: default:
1383: file_magwarn(ms, "coding error: m->type=%d\n",
1384: m->type);
1385: return -1;
1386: }
1387: return 0;
1388: }
1389:
1390: private int
1391: get_op(char c)
1392: {
1393: switch (c) {
1394: case '&':
1395: return FILE_OPAND;
1396: case '|':
1397: return FILE_OPOR;
1398: case '^':
1399: return FILE_OPXOR;
1400: case '+':
1401: return FILE_OPADD;
1402: case '-':
1403: return FILE_OPMINUS;
1404: case '*':
1405: return FILE_OPMULTIPLY;
1406: case '/':
1407: return FILE_OPDIVIDE;
1408: case '%':
1409: return FILE_OPMODULO;
1410: default:
1411: return -1;
1412: }
1413: }
1414:
1415: #ifdef ENABLE_CONDITIONALS
1416: private int
1417: get_cond(const char *l, const char **t)
1418: {
1419: static const struct cond_tbl_s {
1420: char name[8];
1421: size_t len;
1422: int cond;
1423: } cond_tbl[] = {
1424: { "if", 2, COND_IF },
1425: { "elif", 4, COND_ELIF },
1426: { "else", 4, COND_ELSE },
1427: { "", 0, COND_NONE },
1428: };
1429: const struct cond_tbl_s *p;
1430:
1431: for (p = cond_tbl; p->len; p++) {
1432: if (strncmp(l, p->name, p->len) == 0 &&
1433: isspace((unsigned char)l[p->len])) {
1434: if (t)
1435: *t = l + p->len;
1436: break;
1437: }
1438: }
1439: return p->cond;
1440: }
1441:
1442: private int
1443: check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1444: {
1445: int last_cond;
1446: last_cond = ms->c.li[cont_level].last_cond;
1447:
1448: switch (cond) {
1449: case COND_IF:
1450: if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1451: if (ms->flags & MAGIC_CHECK)
1452: file_magwarn(ms, "syntax error: `if'");
1453: return -1;
1454: }
1455: last_cond = COND_IF;
1456: break;
1457:
1458: case COND_ELIF:
1459: if (last_cond != COND_IF && last_cond != COND_ELIF) {
1460: if (ms->flags & MAGIC_CHECK)
1461: file_magwarn(ms, "syntax error: `elif'");
1462: return -1;
1463: }
1464: last_cond = COND_ELIF;
1465: break;
1466:
1467: case COND_ELSE:
1468: if (last_cond != COND_IF && last_cond != COND_ELIF) {
1469: if (ms->flags & MAGIC_CHECK)
1470: file_magwarn(ms, "syntax error: `else'");
1471: return -1;
1472: }
1473: last_cond = COND_NONE;
1474: break;
1475:
1476: case COND_NONE:
1477: last_cond = COND_NONE;
1478: break;
1479: }
1480:
1481: ms->c.li[cont_level].last_cond = last_cond;
1482: return 0;
1483: }
1484: #endif /* ENABLE_CONDITIONALS */
1485:
1486: /*
1487: * parse one line from magic file, put into magic[index++] if valid
1488: */
1489: private int
1.1.1.3 ! misho 1490: parse(struct magic_set *ms, struct magic_entry *me, const char *line,
! 1491: size_t lineno, int action)
1.1 misho 1492: {
1493: #ifdef ENABLE_CONDITIONALS
1494: static uint32_t last_cont_level = 0;
1495: #endif
1496: size_t i;
1497: struct magic *m;
1498: const char *l = line;
1499: char *t;
1500: int op;
1501: uint32_t cont_level;
1.1.1.3 ! misho 1502: int32_t diff;
1.1 misho 1503:
1504: cont_level = 0;
1505:
1.1.1.3 ! misho 1506: /*
! 1507: * Parse the offset.
! 1508: */
1.1 misho 1509: while (*l == '>') {
1510: ++l; /* step over */
1511: cont_level++;
1512: }
1513: #ifdef ENABLE_CONDITIONALS
1514: if (cont_level == 0 || cont_level > last_cont_level)
1515: if (file_check_mem(ms, cont_level) == -1)
1516: return -1;
1517: last_cont_level = cont_level;
1518: #endif
1519: if (cont_level != 0) {
1.1.1.3 ! misho 1520: if (me->mp == NULL) {
! 1521: file_magerror(ms, "No current entry for continuation");
! 1522: return -1;
! 1523: }
! 1524: if (me->cont_count == 0) {
! 1525: file_magerror(ms, "Continuations present with 0 count");
1.1 misho 1526: return -1;
1527: }
1.1.1.3 ! misho 1528: m = &me->mp[me->cont_count - 1];
! 1529: diff = (int32_t)cont_level - (int32_t)m->cont_level;
! 1530: if (diff > 1)
! 1531: file_magwarn(ms, "New continuation level %u is more "
! 1532: "than one larger than current level %u", cont_level,
! 1533: m->cont_level);
1.1 misho 1534: if (me->cont_count == me->max_count) {
1535: struct magic *nm;
1536: size_t cnt = me->max_count + ALLOC_CHUNK;
1.1.1.3 ! misho 1537: if ((nm = CAST(struct magic *, erealloc(me->mp,
! 1538: sizeof(*nm) * cnt))) == NULL) {
! 1539: file_oomem(ms, sizeof(*nm) * cnt);
! 1540: return -1;
! 1541: }
1.1 misho 1542: me->mp = m = nm;
1.1.1.2 misho 1543: me->max_count = CAST(uint32_t, cnt);
1.1 misho 1544: }
1545: m = &me->mp[me->cont_count++];
1546: (void)memset(m, 0, sizeof(*m));
1547: m->cont_level = cont_level;
1548: } else {
1.1.1.3 ! misho 1549: static const size_t len = sizeof(*m) * ALLOC_CHUNK;
! 1550: if (me->mp != NULL)
! 1551: return 1;
! 1552: if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
! 1553: file_oomem(ms, len);
! 1554: return -1;
1.1 misho 1555: }
1.1.1.3 ! misho 1556: me->mp = m;
! 1557: me->max_count = ALLOC_CHUNK;
1.1 misho 1558: (void)memset(m, 0, sizeof(*m));
1559: m->factor_op = FILE_FACTOR_OP_NONE;
1560: m->cont_level = 0;
1561: me->cont_count = 1;
1562: }
1.1.1.2 misho 1563: m->lineno = CAST(uint32_t, lineno);
1.1 misho 1564:
1565: if (*l == '&') { /* m->cont_level == 0 checked below. */
1566: ++l; /* step over */
1567: m->flag |= OFFADD;
1568: }
1569: if (*l == '(') {
1570: ++l; /* step over */
1571: m->flag |= INDIR;
1572: if (m->flag & OFFADD)
1573: m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1574:
1575: if (*l == '&') { /* m->cont_level == 0 checked below */
1576: ++l; /* step over */
1577: m->flag |= OFFADD;
1578: }
1579: }
1580: /* Indirect offsets are not valid at level 0. */
1581: if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1582: if (ms->flags & MAGIC_CHECK)
1583: file_magwarn(ms, "relative offset at level 0");
1584:
1585: /* get offset, then skip over it */
1586: m->offset = (uint32_t)strtoul(l, &t, 0);
1587: if (l == t)
1588: if (ms->flags & MAGIC_CHECK)
1589: file_magwarn(ms, "offset `%s' invalid", l);
1590: l = t;
1591:
1592: if (m->flag & INDIR) {
1593: m->in_type = FILE_LONG;
1594: m->in_offset = 0;
1595: /*
1596: * read [.lbs][+-]nnnnn)
1597: */
1598: if (*l == '.') {
1599: l++;
1600: switch (*l) {
1601: case 'l':
1602: m->in_type = FILE_LELONG;
1603: break;
1604: case 'L':
1605: m->in_type = FILE_BELONG;
1606: break;
1607: case 'm':
1608: m->in_type = FILE_MELONG;
1609: break;
1610: case 'h':
1611: case 's':
1612: m->in_type = FILE_LESHORT;
1613: break;
1614: case 'H':
1615: case 'S':
1616: m->in_type = FILE_BESHORT;
1617: break;
1618: case 'c':
1619: case 'b':
1620: case 'C':
1621: case 'B':
1622: m->in_type = FILE_BYTE;
1623: break;
1624: case 'e':
1625: case 'f':
1626: case 'g':
1627: m->in_type = FILE_LEDOUBLE;
1628: break;
1629: case 'E':
1630: case 'F':
1631: case 'G':
1632: m->in_type = FILE_BEDOUBLE;
1633: break;
1634: case 'i':
1635: m->in_type = FILE_LEID3;
1636: break;
1637: case 'I':
1638: m->in_type = FILE_BEID3;
1639: break;
1640: default:
1641: if (ms->flags & MAGIC_CHECK)
1642: file_magwarn(ms,
1643: "indirect offset type `%c' invalid",
1644: *l);
1645: break;
1646: }
1647: l++;
1648: }
1649:
1650: m->in_op = 0;
1651: if (*l == '~') {
1652: m->in_op |= FILE_OPINVERSE;
1653: l++;
1654: }
1655: if ((op = get_op(*l)) != -1) {
1656: m->in_op |= op;
1657: l++;
1658: }
1659: if (*l == '(') {
1660: m->in_op |= FILE_OPINDIRECT;
1661: l++;
1662: }
1663: if (isdigit((unsigned char)*l) || *l == '-') {
1664: m->in_offset = (int32_t)strtol(l, &t, 0);
1665: if (l == t)
1666: if (ms->flags & MAGIC_CHECK)
1667: file_magwarn(ms,
1668: "in_offset `%s' invalid", l);
1669: l = t;
1670: }
1671: if (*l++ != ')' ||
1672: ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1673: if (ms->flags & MAGIC_CHECK)
1674: file_magwarn(ms,
1675: "missing ')' in indirect offset");
1676: }
1677: EATAB;
1678:
1679: #ifdef ENABLE_CONDITIONALS
1680: m->cond = get_cond(l, &l);
1681: if (check_cond(ms, m->cond, cont_level) == -1)
1682: return -1;
1683:
1684: EATAB;
1685: #endif
1686:
1.1.1.3 ! misho 1687: /*
! 1688: * Parse the type.
! 1689: */
1.1 misho 1690: if (*l == 'u') {
1.1.1.3 ! misho 1691: /*
! 1692: * Try it as a keyword type prefixed by "u"; match what
! 1693: * follows the "u". If that fails, try it as an SUS
! 1694: * integer type.
! 1695: */
! 1696: m->type = get_type(type_tbl, l + 1, &l);
! 1697: if (m->type == FILE_INVALID) {
! 1698: /*
! 1699: * Not a keyword type; parse it as an SUS type,
! 1700: * 'u' possibly followed by a number or C/S/L.
! 1701: */
! 1702: m->type = get_standard_integer_type(l, &l);
! 1703: }
! 1704: // It's unsigned.
! 1705: if (m->type != FILE_INVALID)
! 1706: m->flag |= UNSIGNED;
! 1707: } else {
! 1708: /*
! 1709: * Try it as a keyword type. If that fails, try it as
! 1710: * an SUS integer type if it begins with "d" or as an
! 1711: * SUS string type if it begins with "s". In any case,
! 1712: * it's not unsigned.
! 1713: */
! 1714: m->type = get_type(type_tbl, l, &l);
! 1715: if (m->type == FILE_INVALID) {
! 1716: /*
! 1717: * Not a keyword type; parse it as an SUS type,
! 1718: * either 'd' possibly followed by a number or
! 1719: * C/S/L, or just 's'.
! 1720: */
! 1721: if (*l == 'd')
! 1722: m->type = get_standard_integer_type(l, &l);
! 1723: else if (*l == 's' && !isalpha((unsigned char)l[1])) {
! 1724: m->type = FILE_STRING;
1.1 misho 1725: ++l;
1.1.1.3 ! misho 1726: }
! 1727: }
1.1 misho 1728: }
1729:
1.1.1.3 ! misho 1730: if (m->type == FILE_INVALID) {
! 1731: /* Not found - try it as a special keyword. */
! 1732: m->type = get_type(special_tbl, l, &l);
! 1733: }
! 1734:
1.1 misho 1735: if (m->type == FILE_INVALID) {
1736: if (ms->flags & MAGIC_CHECK)
1737: file_magwarn(ms, "type `%s' invalid", l);
1.1.1.3 ! misho 1738: if (me->mp) {
! 1739: efree(me->mp);
! 1740: me->mp = NULL;
! 1741: }
1.1 misho 1742: return -1;
1743: }
1744:
1745: /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1746: /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1747:
1748: m->mask_op = 0;
1749: if (*l == '~') {
1750: if (!IS_LIBMAGIC_STRING(m->type))
1751: m->mask_op |= FILE_OPINVERSE;
1752: else if (ms->flags & MAGIC_CHECK)
1753: file_magwarn(ms, "'~' invalid for string types");
1754: ++l;
1755: }
1756: m->str_range = 0;
1.1.1.2 misho 1757: m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
1.1 misho 1758: if ((op = get_op(*l)) != -1) {
1759: if (!IS_LIBMAGIC_STRING(m->type)) {
1760: uint64_t val;
1761: ++l;
1762: m->mask_op |= op;
1763: val = (uint64_t)strtoull(l, &t, 0);
1764: l = t;
1765: m->num_mask = file_signextend(ms, m, val);
1766: eatsize(&l);
1767: }
1768: else if (op == FILE_OPDIVIDE) {
1769: int have_range = 0;
1770: while (!isspace((unsigned char)*++l)) {
1771: switch (*l) {
1772: case '0': case '1': case '2':
1773: case '3': case '4': case '5':
1774: case '6': case '7': case '8':
1775: case '9':
1776: if (have_range &&
1777: (ms->flags & MAGIC_CHECK))
1778: file_magwarn(ms,
1779: "multiple ranges");
1780: have_range = 1;
1.1.1.2 misho 1781: m->str_range = CAST(uint32_t,
1782: strtoul(l, &t, 0));
1.1 misho 1783: if (m->str_range == 0)
1784: file_magwarn(ms,
1785: "zero range");
1786: l = t - 1;
1787: break;
1.1.1.2 misho 1788: case CHAR_COMPACT_WHITESPACE:
1789: m->str_flags |=
1790: STRING_COMPACT_WHITESPACE;
1.1 misho 1791: break;
1.1.1.2 misho 1792: case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1.1 misho 1793: m->str_flags |=
1.1.1.2 misho 1794: STRING_COMPACT_OPTIONAL_WHITESPACE;
1.1 misho 1795: break;
1796: case CHAR_IGNORE_LOWERCASE:
1797: m->str_flags |= STRING_IGNORE_LOWERCASE;
1798: break;
1799: case CHAR_IGNORE_UPPERCASE:
1800: m->str_flags |= STRING_IGNORE_UPPERCASE;
1801: break;
1802: case CHAR_REGEX_OFFSET_START:
1803: m->str_flags |= REGEX_OFFSET_START;
1804: break;
1.1.1.2 misho 1805: case CHAR_BINTEST:
1806: m->str_flags |= STRING_BINTEST;
1807: break;
1808: case CHAR_TEXTTEST:
1809: m->str_flags |= STRING_TEXTTEST;
1810: break;
1.1.1.3 ! misho 1811: case CHAR_TRIM:
! 1812: m->str_flags |= STRING_TRIM;
! 1813: break;
1.1.1.2 misho 1814: case CHAR_PSTRING_1_LE:
1815: if (m->type != FILE_PSTRING)
1816: goto bad;
1817: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE;
1818: break;
1819: case CHAR_PSTRING_2_BE:
1820: if (m->type != FILE_PSTRING)
1821: goto bad;
1822: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE;
1823: break;
1824: case CHAR_PSTRING_2_LE:
1825: if (m->type != FILE_PSTRING)
1826: goto bad;
1827: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE;
1828: break;
1829: case CHAR_PSTRING_4_BE:
1830: if (m->type != FILE_PSTRING)
1831: goto bad;
1832: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
1833: break;
1834: case CHAR_PSTRING_4_LE:
1835: if (m->type != FILE_PSTRING)
1836: goto bad;
1837: m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
1838: break;
1839: case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1840: if (m->type != FILE_PSTRING)
1841: goto bad;
1842: m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1843: break;
1.1 misho 1844: default:
1.1.1.2 misho 1845: bad:
1.1 misho 1846: if (ms->flags & MAGIC_CHECK)
1847: file_magwarn(ms,
1.1.1.2 misho 1848: "string extension `%c' "
1849: "invalid", *l);
1.1 misho 1850: return -1;
1851: }
1852: /* allow multiple '/' for readability */
1853: if (l[1] == '/' &&
1854: !isspace((unsigned char)l[2]))
1855: l++;
1856: }
1857: if (string_modifier_check(ms, m) == -1)
1858: return -1;
1859: }
1860: else {
1861: if (ms->flags & MAGIC_CHECK)
1862: file_magwarn(ms, "invalid string op: %c", *t);
1863: return -1;
1864: }
1865: }
1866: /*
1867: * We used to set mask to all 1's here, instead let's just not do
1868: * anything if mask = 0 (unless you have a better idea)
1869: */
1870: EATAB;
1871:
1872: switch (*l) {
1873: case '>':
1874: case '<':
1875: m->reln = *l;
1876: ++l;
1877: if (*l == '=') {
1878: if (ms->flags & MAGIC_CHECK) {
1879: file_magwarn(ms, "%c= not supported",
1880: m->reln);
1881: return -1;
1882: }
1883: ++l;
1884: }
1885: break;
1886: /* Old-style anding: "0 byte &0x80 dynamically linked" */
1887: case '&':
1888: case '^':
1889: case '=':
1890: m->reln = *l;
1891: ++l;
1892: if (*l == '=') {
1893: /* HP compat: ignore &= etc. */
1894: ++l;
1895: }
1896: break;
1897: case '!':
1898: m->reln = *l;
1899: ++l;
1900: break;
1901: default:
1902: m->reln = '='; /* the default relation */
1903: if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
1904: isspace((unsigned char)l[1])) || !l[1])) {
1905: m->reln = *l;
1906: ++l;
1907: }
1908: break;
1909: }
1910: /*
1911: * Grab the value part, except for an 'x' reln.
1912: */
1913: if (m->reln != 'x' && getvalue(ms, m, &l, action))
1914: return -1;
1915:
1916: /*
1917: * TODO finish this macro and start using it!
1918: * #define offsetcheck {if (offset > HOWMANY-1)
1919: * magwarn("offset too big"); }
1920: */
1921:
1922: /*
1923: * Now get last part - the description
1924: */
1925: EATAB;
1926: if (l[0] == '\b') {
1927: ++l;
1928: m->flag |= NOSPACE;
1929: } else if ((l[0] == '\\') && (l[1] == 'b')) {
1930: ++l;
1931: ++l;
1932: m->flag |= NOSPACE;
1933: }
1934: for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
1935: continue;
1936: if (i == sizeof(m->desc)) {
1937: m->desc[sizeof(m->desc) - 1] = '\0';
1938: if (ms->flags & MAGIC_CHECK)
1939: file_magwarn(ms, "description `%s' truncated", m->desc);
1940: }
1941:
1942: /*
1943: * We only do this check while compiling, or if any of the magic
1944: * files were not compiled.
1945: */
1946: if (ms->flags & MAGIC_CHECK) {
1947: if (check_format(ms, m) == -1)
1948: return -1;
1949: }
1950: m->mimetype[0] = '\0'; /* initialise MIME type to none */
1951: return 0;
1952: }
1953:
1954: /*
1955: * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
1956: * if valid
1957: */
1958: private int
1959: parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
1960: {
1961: const char *l = line;
1962: char *el;
1963: unsigned long factor;
1964: struct magic *m = &me->mp[0];
1965:
1966: if (m->factor_op != FILE_FACTOR_OP_NONE) {
1967: file_magwarn(ms,
1968: "Current entry already has a strength type: %c %d",
1969: m->factor_op, m->factor);
1970: return -1;
1971: }
1972: EATAB;
1973: switch (*l) {
1974: case FILE_FACTOR_OP_NONE:
1975: case FILE_FACTOR_OP_PLUS:
1976: case FILE_FACTOR_OP_MINUS:
1977: case FILE_FACTOR_OP_TIMES:
1978: case FILE_FACTOR_OP_DIV:
1979: m->factor_op = *l++;
1980: break;
1981: default:
1982: file_magwarn(ms, "Unknown factor op `%c'", *l);
1983: return -1;
1984: }
1985: EATAB;
1986: factor = strtoul(l, &el, 0);
1987: if (factor > 255) {
1988: file_magwarn(ms, "Too large factor `%lu'", factor);
1989: goto out;
1990: }
1991: if (*el && !isspace((unsigned char)*el)) {
1992: file_magwarn(ms, "Bad factor `%s'", l);
1993: goto out;
1994: }
1995: m->factor = (uint8_t)factor;
1996: if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
1997: file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
1998: m->factor_op, m->factor);
1999: goto out;
2000: }
2001: return 0;
2002: out:
2003: m->factor_op = FILE_FACTOR_OP_NONE;
2004: m->factor = 0;
2005: return -1;
2006: }
2007:
2008: /*
1.1.1.2 misho 2009: * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2010: * magic[index - 1]
1.1 misho 2011: */
2012: private int
2013: parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
2014: {
2015: size_t i;
2016: const char *l = line;
2017: struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2018:
2019: if (m->apple[0] != '\0') {
1.1.1.2 misho 2020: file_magwarn(ms, "Current entry already has a APPLE type "
2021: "`%.8s', new type `%s'", m->mimetype, l);
1.1 misho 2022: return -1;
2023: }
2024:
2025: EATAB;
1.1.1.2 misho 2026: for (i = 0; *l && ((isascii((unsigned char)*l) &&
2027: isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
2028: i < sizeof(m->apple); m->apple[i++] = *l++)
1.1 misho 2029: continue;
2030: if (i == sizeof(m->apple) && *l) {
1.1.1.2 misho 2031: /* We don't need to NUL terminate here, printing handles it */
1.1 misho 2032: if (ms->flags & MAGIC_CHECK)
1.1.1.2 misho 2033: file_magwarn(ms, "APPLE type `%s' truncated %"
2034: SIZE_T_FORMAT "u", line, i);
1.1 misho 2035: }
2036:
2037: if (i > 0)
2038: return 0;
2039: else
2040: return -1;
2041: }
2042:
2043: /*
2044: * parse a MIME annotation line from magic file, put into magic[index - 1]
2045: * if valid
2046: */
2047: private int
2048: parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
2049: {
2050: size_t i;
2051: const char *l = line;
2052: struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2053:
2054: if (m->mimetype[0] != '\0') {
2055: file_magwarn(ms, "Current entry already has a MIME type `%s',"
2056: " new type `%s'", m->mimetype, l);
2057: return -1;
2058: }
2059:
2060: EATAB;
1.1.1.2 misho 2061: for (i = 0; *l && ((isascii((unsigned char)*l) &&
2062: isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
2063: i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
1.1 misho 2064: continue;
2065: if (i == sizeof(m->mimetype)) {
1.1.1.2 misho 2066: m->mimetype[sizeof(m->mimetype) - 1] = '\0';
1.1 misho 2067: if (ms->flags & MAGIC_CHECK)
1.1.1.2 misho 2068: file_magwarn(ms, "MIME type `%s' truncated %"
2069: SIZE_T_FORMAT "u", m->mimetype, i);
1.1 misho 2070: } else
2071: m->mimetype[i] = '\0';
2072:
2073: if (i > 0)
2074: return 0;
2075: else
2076: return -1;
2077: }
2078:
2079: private int
2080: check_format_type(const char *ptr, int type)
2081: {
2082: int quad = 0;
2083: if (*ptr == '\0') {
2084: /* Missing format string; bad */
2085: return -1;
2086: }
2087:
2088: switch (type) {
2089: case FILE_FMT_QUAD:
2090: quad = 1;
2091: /*FALLTHROUGH*/
2092: case FILE_FMT_NUM:
2093: if (*ptr == '-')
2094: ptr++;
2095: if (*ptr == '.')
2096: ptr++;
2097: while (isdigit((unsigned char)*ptr)) ptr++;
2098: if (*ptr == '.')
2099: ptr++;
2100: while (isdigit((unsigned char)*ptr)) ptr++;
2101: if (quad) {
2102: if (*ptr++ != 'l')
2103: return -1;
2104: if (*ptr++ != 'l')
2105: return -1;
2106: }
2107:
2108: switch (*ptr++) {
2109: case 'l':
2110: switch (*ptr++) {
2111: case 'i':
2112: case 'd':
2113: case 'u':
1.1.1.3 ! misho 2114: case 'o':
1.1 misho 2115: case 'x':
2116: case 'X':
2117: return 0;
2118: default:
2119: return -1;
2120: }
2121:
2122: case 'h':
2123: switch (*ptr++) {
2124: case 'h':
2125: switch (*ptr++) {
2126: case 'i':
2127: case 'd':
2128: case 'u':
1.1.1.3 ! misho 2129: case 'o':
1.1 misho 2130: case 'x':
2131: case 'X':
2132: return 0;
2133: default:
2134: return -1;
2135: }
2136: case 'd':
2137: return 0;
2138: default:
2139: return -1;
2140: }
2141:
2142: case 'i':
2143: case 'c':
2144: case 'd':
2145: case 'u':
1.1.1.3 ! misho 2146: case 'o':
1.1 misho 2147: case 'x':
2148: case 'X':
2149: return 0;
2150:
2151: default:
2152: return -1;
2153: }
2154:
2155: case FILE_FMT_FLOAT:
2156: case FILE_FMT_DOUBLE:
2157: if (*ptr == '-')
2158: ptr++;
2159: if (*ptr == '.')
2160: ptr++;
2161: while (isdigit((unsigned char)*ptr)) ptr++;
2162: if (*ptr == '.')
2163: ptr++;
2164: while (isdigit((unsigned char)*ptr)) ptr++;
2165:
2166: switch (*ptr++) {
2167: case 'e':
2168: case 'E':
2169: case 'f':
2170: case 'F':
2171: case 'g':
2172: case 'G':
2173: return 0;
2174:
2175: default:
2176: return -1;
2177: }
2178:
2179:
2180: case FILE_FMT_STR:
2181: if (*ptr == '-')
2182: ptr++;
2183: while (isdigit((unsigned char )*ptr))
2184: ptr++;
2185: if (*ptr == '.') {
2186: ptr++;
2187: while (isdigit((unsigned char )*ptr))
2188: ptr++;
2189: }
2190:
2191: switch (*ptr++) {
2192: case 's':
2193: return 0;
2194: default:
2195: return -1;
2196: }
2197:
2198: default:
2199: /* internal error */
2200: abort();
2201: }
2202: /*NOTREACHED*/
2203: return -1;
2204: }
2205:
2206: /*
2207: * Check that the optional printf format in description matches
2208: * the type of the magic.
2209: */
2210: private int
2211: check_format(struct magic_set *ms, struct magic *m)
2212: {
2213: char *ptr;
2214:
2215: for (ptr = m->desc; *ptr; ptr++)
2216: if (*ptr == '%')
2217: break;
2218: if (*ptr == '\0') {
2219: /* No format string; ok */
2220: return 1;
2221: }
2222:
2223: assert(file_nformats == file_nnames);
2224:
2225: if (m->type >= file_nformats) {
2226: file_magwarn(ms, "Internal error inconsistency between "
2227: "m->type and format strings");
2228: return -1;
2229: }
2230: if (file_formats[m->type] == FILE_FMT_NONE) {
2231: file_magwarn(ms, "No format string for `%s' with description "
2232: "`%s'", m->desc, file_names[m->type]);
2233: return -1;
2234: }
2235:
2236: ptr++;
2237: if (check_format_type(ptr, file_formats[m->type]) == -1) {
2238: /*
2239: * TODO: this error message is unhelpful if the format
2240: * string is not one character long
2241: */
2242: file_magwarn(ms, "Printf format `%c' is not valid for type "
2243: "`%s' in description `%s'", *ptr ? *ptr : '?',
2244: file_names[m->type], m->desc);
2245: return -1;
2246: }
2247:
2248: for (; *ptr; ptr++) {
2249: if (*ptr == '%') {
2250: file_magwarn(ms,
2251: "Too many format strings (should have at most one) "
2252: "for `%s' with description `%s'",
2253: file_names[m->type], m->desc);
2254: return -1;
2255: }
2256: }
2257: return 0;
2258: }
2259:
2260: /*
2261: * Read a numeric value from a pointer, into the value union of a magic
2262: * pointer, according to the magic type. Update the string pointer to point
2263: * just after the number read. Return 0 for success, non-zero for failure.
2264: */
2265: private int
2266: getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2267: {
2268: switch (m->type) {
2269: case FILE_BESTRING16:
2270: case FILE_LESTRING16:
2271: case FILE_STRING:
2272: case FILE_PSTRING:
2273: case FILE_REGEX:
2274: case FILE_SEARCH:
1.1.1.3 ! misho 2275: case FILE_NAME:
! 2276: case FILE_USE:
1.1 misho 2277: *p = getstr(ms, m, *p, action == FILE_COMPILE);
2278: if (*p == NULL) {
2279: if (ms->flags & MAGIC_CHECK)
2280: file_magwarn(ms, "cannot get string from `%s'",
2281: m->value.s);
2282: return -1;
2283: }
2284: return 0;
2285: case FILE_FLOAT:
2286: case FILE_BEFLOAT:
2287: case FILE_LEFLOAT:
2288: if (m->reln != 'x') {
2289: char *ep;
2290: #ifdef HAVE_STRTOF
2291: m->value.f = strtof(*p, &ep);
2292: #else
2293: m->value.f = (float)strtod(*p, &ep);
2294: #endif
2295: *p = ep;
2296: }
2297: return 0;
2298: case FILE_DOUBLE:
2299: case FILE_BEDOUBLE:
2300: case FILE_LEDOUBLE:
2301: if (m->reln != 'x') {
2302: char *ep;
2303: m->value.d = strtod(*p, &ep);
2304: *p = ep;
2305: }
2306: return 0;
2307: default:
2308: if (m->reln != 'x') {
2309: char *ep;
2310: m->value.q = file_signextend(ms, m,
2311: (uint64_t)strtoull(*p, &ep, 0));
2312: *p = ep;
2313: eatsize(p);
2314: }
2315: return 0;
2316: }
2317: }
2318:
2319: /*
2320: * Convert a string containing C character escapes. Stop at an unescaped
2321: * space or tab.
2322: * Copy the converted version to "m->value.s", and the length in m->vallen.
2323: * Return updated scan pointer as function result. Warn if set.
2324: */
2325: private const char *
2326: getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2327: {
2328: const char *origs = s;
2329: char *p = m->value.s;
2330: size_t plen = sizeof(m->value.s);
2331: char *origp = p;
2332: char *pmax = p + plen - 1;
2333: int c;
2334: int val;
2335:
2336: while ((c = *s++) != '\0') {
2337: if (isspace((unsigned char) c))
2338: break;
2339: if (p >= pmax) {
2340: file_error(ms, 0, "string too long: `%s'", origs);
2341: return NULL;
2342: }
2343: if (c == '\\') {
2344: switch(c = *s++) {
2345:
2346: case '\0':
2347: if (warn)
2348: file_magwarn(ms, "incomplete escape");
2349: goto out;
2350:
2351: case '\t':
2352: if (warn) {
2353: file_magwarn(ms,
2354: "escaped tab found, use \\t instead");
2355: warn = 0; /* already did */
2356: }
2357: /*FALLTHROUGH*/
2358: default:
2359: if (warn) {
2360: if (isprint((unsigned char)c)) {
2361: /* Allow escaping of
2362: * ``relations'' */
1.1.1.2 misho 2363: if (strchr("<>&^=!", c) == NULL
2364: && (m->type != FILE_REGEX ||
2365: strchr("[]().*?^$|{}", c)
2366: == NULL)) {
1.1 misho 2367: file_magwarn(ms, "no "
2368: "need to escape "
2369: "`%c'", c);
2370: }
2371: } else {
2372: file_magwarn(ms,
2373: "unknown escape sequence: "
2374: "\\%03o", c);
2375: }
2376: }
2377: /*FALLTHROUGH*/
2378: /* space, perhaps force people to use \040? */
2379: case ' ':
2380: #if 0
2381: /*
2382: * Other things people escape, but shouldn't need to,
2383: * so we disallow them
2384: */
2385: case '\'':
2386: case '"':
2387: case '?':
2388: #endif
2389: /* Relations */
2390: case '>':
2391: case '<':
2392: case '&':
2393: case '^':
2394: case '=':
2395: case '!':
2396: /* and baskslash itself */
2397: case '\\':
2398: *p++ = (char) c;
2399: break;
2400:
2401: case 'a':
2402: *p++ = '\a';
2403: break;
2404:
2405: case 'b':
2406: *p++ = '\b';
2407: break;
2408:
2409: case 'f':
2410: *p++ = '\f';
2411: break;
2412:
2413: case 'n':
2414: *p++ = '\n';
2415: break;
2416:
2417: case 'r':
2418: *p++ = '\r';
2419: break;
2420:
2421: case 't':
2422: *p++ = '\t';
2423: break;
2424:
2425: case 'v':
2426: *p++ = '\v';
2427: break;
2428:
2429: /* \ and up to 3 octal digits */
2430: case '0':
2431: case '1':
2432: case '2':
2433: case '3':
2434: case '4':
2435: case '5':
2436: case '6':
2437: case '7':
2438: val = c - '0';
2439: c = *s++; /* try for 2 */
2440: if (c >= '0' && c <= '7') {
2441: val = (val << 3) | (c - '0');
2442: c = *s++; /* try for 3 */
2443: if (c >= '0' && c <= '7')
2444: val = (val << 3) | (c-'0');
2445: else
2446: --s;
2447: }
2448: else
2449: --s;
2450: *p++ = (char)val;
2451: break;
2452:
2453: /* \x and up to 2 hex digits */
2454: case 'x':
2455: val = 'x'; /* Default if no digits */
2456: c = hextoint(*s++); /* Get next char */
2457: if (c >= 0) {
2458: val = c;
2459: c = hextoint(*s++);
2460: if (c >= 0)
2461: val = (val << 4) + c;
2462: else
2463: --s;
2464: } else
2465: --s;
2466: *p++ = (char)val;
2467: break;
2468: }
2469: } else
2470: *p++ = (char)c;
2471: }
2472: out:
2473: *p = '\0';
1.1.1.2 misho 2474: m->vallen = CAST(unsigned char, (p - origp));
1.1 misho 2475: if (m->type == FILE_PSTRING)
1.1.1.2 misho 2476: m->vallen += (unsigned char)file_pstring_length_size(m);
1.1 misho 2477: return s;
2478: }
2479:
2480:
2481: /* Single hex char to int; -1 if not a hex char. */
2482: private int
2483: hextoint(int c)
2484: {
2485: if (!isascii((unsigned char) c))
2486: return -1;
2487: if (isdigit((unsigned char) c))
2488: return c - '0';
2489: if ((c >= 'a') && (c <= 'f'))
2490: return c + 10 - 'a';
2491: if (( c>= 'A') && (c <= 'F'))
2492: return c + 10 - 'A';
2493: return -1;
2494: }
2495:
2496:
2497: /*
2498: * Print a string containing C character escapes.
2499: */
2500: protected void
2501: file_showstr(FILE *fp, const char *s, size_t len)
2502: {
2503: char c;
2504:
2505: for (;;) {
2506: if (len == ~0U) {
1.1.1.2 misho 2507: c = *s++;
1.1 misho 2508: if (c == '\0')
2509: break;
2510: }
2511: else {
2512: if (len-- == 0)
2513: break;
1.1.1.2 misho 2514: c = *s++;
1.1 misho 2515: }
2516: if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
2517: (void) fputc(c, fp);
2518: else {
2519: (void) fputc('\\', fp);
2520: switch (c) {
2521: case '\a':
2522: (void) fputc('a', fp);
2523: break;
2524:
2525: case '\b':
2526: (void) fputc('b', fp);
2527: break;
2528:
2529: case '\f':
2530: (void) fputc('f', fp);
2531: break;
2532:
2533: case '\n':
2534: (void) fputc('n', fp);
2535: break;
2536:
2537: case '\r':
2538: (void) fputc('r', fp);
2539: break;
2540:
2541: case '\t':
2542: (void) fputc('t', fp);
2543: break;
2544:
2545: case '\v':
2546: (void) fputc('v', fp);
2547: break;
2548:
2549: default:
2550: (void) fprintf(fp, "%.3o", c & 0377);
2551: break;
2552: }
2553: }
2554: }
2555: }
2556:
2557: /*
2558: * eatsize(): Eat the size spec from a number [eg. 10UL]
2559: */
2560: private void
2561: eatsize(const char **p)
2562: {
2563: const char *l = *p;
2564:
2565: if (LOWCASE(*l) == 'u')
2566: l++;
2567:
2568: switch (LOWCASE(*l)) {
2569: case 'l': /* long */
2570: case 's': /* short */
2571: case 'h': /* short */
2572: case 'b': /* char/byte */
2573: case 'c': /* char/byte */
2574: l++;
2575: /*FALLTHROUGH*/
2576: default:
2577: break;
2578: }
2579:
2580: *p = l;
2581: }
2582:
2583: /*
2584: * handle a compiled file.
2585: */
1.1.1.3 ! misho 2586:
! 2587: private struct magic_map *
! 2588: apprentice_map(struct magic_set *ms, const char *fn)
1.1 misho 2589: {
2590: uint32_t *ptr;
1.1.1.3 ! misho 2591: uint32_t version, entries, nentries;
1.1 misho 2592: int needsbyteswap;
2593: char *dbname = NULL;
1.1.1.3 ! misho 2594: struct magic_map *map;
! 2595: size_t i;
1.1 misho 2596: php_stream *stream = NULL;
2597: php_stream_statbuf st;
2598:
2599:
2600: TSRMLS_FETCH();
2601:
1.1.1.3 ! misho 2602: if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
! 2603: file_oomem(ms, sizeof(*map));
! 2604: efree(map);
! 2605: goto error;
! 2606: }
! 2607:
1.1 misho 2608: if (fn == NULL) {
1.1.1.3 ! misho 2609: map->p = (void *)&php_magic_database;
1.1 misho 2610: goto internal_loaded;
2611: }
2612:
1.1.1.3 ! misho 2613: #ifdef PHP_WIN32
! 2614: /* Don't bother on windows with php_stream_open_wrapper,
! 2615: return to give apprentice_load() a chance. */
! 2616: if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
! 2617: if (st.sb.st_mode & S_IFDIR) {
! 2618: goto error;
! 2619: }
! 2620: }
! 2621: #endif
! 2622:
1.1 misho 2623: dbname = mkdbname(ms, fn, 0);
2624: if (dbname == NULL)
1.1.1.3 ! misho 2625: goto error;
1.1 misho 2626:
2627: #if PHP_API_VERSION < 20100412
2628: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
2629: #else
2630: stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
2631: #endif
2632:
2633: if (!stream) {
1.1.1.3 ! misho 2634: goto error;
1.1 misho 2635: }
2636:
2637: if (php_stream_stat(stream, &st) < 0) {
2638: file_error(ms, errno, "cannot stat `%s'", dbname);
1.1.1.3 ! misho 2639: goto error;
1.1 misho 2640: }
2641:
2642: if (st.sb.st_size < 8) {
2643: file_error(ms, 0, "file `%s' is too small", dbname);
1.1.1.3 ! misho 2644: goto error;
1.1 misho 2645: }
2646:
1.1.1.3 ! misho 2647: map->len = (size_t)st.sb.st_size;
! 2648: if ((map->p = CAST(void *, emalloc(map->len))) == NULL) {
! 2649: file_oomem(ms, map->len);
! 2650: goto error;
! 2651: }
! 2652: if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
1.1 misho 2653: file_badread(ms);
1.1.1.3 ! misho 2654: goto error;
1.1 misho 2655: }
1.1.1.3 ! misho 2656: map->len = 0;
! 2657: #define RET 1
1.1 misho 2658:
2659: php_stream_close(stream);
2660: stream = NULL;
2661:
2662: internal_loaded:
1.1.1.3 ! misho 2663: ptr = (uint32_t *)(void *)map->p;
1.1 misho 2664: if (*ptr != MAGICNO) {
2665: if (swap4(*ptr) != MAGICNO) {
2666: file_error(ms, 0, "bad magic in `%s'", dbname);
1.1.1.3 ! misho 2667: goto error;
1.1 misho 2668: }
2669: needsbyteswap = 1;
1.1.1.2 misho 2670: } else
1.1 misho 2671: needsbyteswap = 0;
2672: if (needsbyteswap)
2673: version = swap4(ptr[1]);
2674: else
2675: version = ptr[1];
2676: if (version != VERSIONNO) {
2677: file_error(ms, 0, "File %d.%d supports only version %d magic "
2678: "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
2679: VERSIONNO, dbname, version);
1.1.1.3 ! misho 2680: goto error;
1.1 misho 2681: }
2682:
2683: /* php_magic_database is a const, performing writes will segfault. This is for big-endian
2684: machines only, PPC and Sparc specifically. Consider static variable or MINIT in
2685: future. */
2686: if (needsbyteswap && fn == NULL) {
1.1.1.3 ! misho 2687: map->p = emalloc(sizeof(php_magic_database));
! 2688: map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
1.1 misho 2689: }
2690:
1.1.1.3 ! misho 2691: if (NULL != fn) {
! 2692: nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
! 2693: entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
! 2694: if ((off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
! 2695: file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
! 2696: dbname, (unsigned long long)st.sb.st_size,
! 2697: sizeof(struct magic));
! 2698: goto error;
! 2699: }
! 2700: }
! 2701: map->magic[0] = CAST(struct magic *, map->p) + 1;
! 2702: nentries = 0;
! 2703: for (i = 0; i < MAGIC_SETS; i++) {
! 2704: if (needsbyteswap)
! 2705: map->nmagic[i] = swap4(ptr[i + 2]);
! 2706: else
! 2707: map->nmagic[i] = ptr[i + 2];
! 2708: if (i != MAGIC_SETS - 1)
! 2709: map->magic[i + 1] = map->magic[i] + map->nmagic[i];
! 2710: nentries += map->nmagic[i];
! 2711: }
! 2712: if (NULL != fn && entries != nentries + 1) {
! 2713: file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
! 2714: dbname, entries, nentries + 1);
! 2715: goto error;
1.1 misho 2716: }
2717:
1.1.1.3 ! misho 2718: if (needsbyteswap)
! 2719: for (i = 0; i < MAGIC_SETS; i++)
! 2720: byteswap(map->magic[i], map->nmagic[i]);
! 2721:
1.1 misho 2722: if (dbname) {
2723: efree(dbname);
2724: }
1.1.1.3 ! misho 2725: return map;
1.1 misho 2726:
1.1.1.3 ! misho 2727: error:
1.1 misho 2728: if (stream) {
2729: php_stream_close(stream);
2730: }
1.1.1.3 ! misho 2731: apprentice_unmap(map);
1.1 misho 2732: if (dbname) {
2733: efree(dbname);
2734: }
1.1.1.3 ! misho 2735: return NULL;
1.1 misho 2736: }
2737:
2738: private const uint32_t ar[] = {
2739: MAGICNO, VERSIONNO
2740: };
1.1.1.3 ! misho 2741:
1.1 misho 2742: /*
2743: * handle an mmaped file.
2744: */
2745: private int
1.1.1.3 ! misho 2746: apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
1.1 misho 2747: {
1.1.1.3 ! misho 2748: static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
! 2749: static const size_t m = sizeof(**map->magic);
! 2750: int fd = -1;
! 2751: size_t len;
1.1 misho 2752: char *dbname;
2753: int rv = -1;
1.1.1.3 ! misho 2754: uint32_t i;
1.1 misho 2755: php_stream *stream;
2756:
2757: TSRMLS_FETCH();
2758:
2759: dbname = mkdbname(ms, fn, 0);
2760:
1.1.1.2 misho 2761: if (dbname == NULL)
1.1 misho 2762: goto out;
2763:
2764: /* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
2765: #if PHP_API_VERSION < 20100412
2766: stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL);
2767: #else
2768: stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
2769: #endif
2770:
2771: if (!stream) {
2772: file_error(ms, errno, "cannot open `%s'", dbname);
2773: goto out;
2774: }
2775:
1.1.1.3 ! misho 2776: if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
1.1 misho 2777: file_error(ms, errno, "error writing `%s'", dbname);
2778: goto out;
2779: }
2780:
1.1.1.3 ! misho 2781: if (php_stream_write(stream, (const char *)map->nmagic, nm) != (ssize_t)nm) {
! 2782: file_error(ms, errno, "error writing `%s'", dbname);
! 2783: goto out;
! 2784: }
! 2785:
! 2786: assert(nm + sizeof(ar) < m);
! 2787:
1.1 misho 2788: if (php_stream_seek(stream,(off_t)sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) {
2789: file_error(ms, errno, "error seeking `%s'", dbname);
2790: goto out;
2791: }
2792:
1.1.1.3 ! misho 2793: for (i = 0; i < MAGIC_SETS; i++) {
! 2794: len = m * map->nmagic[i];
! 2795: if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
! 2796: file_error(ms, errno, "error writing `%s'", dbname);
! 2797: goto out;
! 2798: }
1.1 misho 2799: }
2800:
1.1.1.3 ! misho 2801: if (stream) {
! 2802: php_stream_close(stream);
! 2803: }
1.1 misho 2804:
2805: rv = 0;
2806: out:
2807: efree(dbname);
2808: return rv;
2809: }
2810:
2811: private const char ext[] = ".mgc";
2812: /*
2813: * make a dbname
2814: */
2815: private char *
2816: mkdbname(struct magic_set *ms, const char *fn, int strip)
2817: {
2818: const char *p, *q;
2819: char *buf;
2820: TSRMLS_FETCH();
2821:
2822: if (strip) {
2823: if ((p = strrchr(fn, '/')) != NULL)
2824: fn = ++p;
2825: }
2826:
2827: for (q = fn; *q; q++)
2828: continue;
2829: /* Look for .mgc */
2830: for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
2831: if (*p != *q)
2832: break;
2833:
2834: /* Did not find .mgc, restore q */
2835: if (p >= ext)
2836: while (*q)
2837: q++;
2838:
2839: q++;
2840: /* Compatibility with old code that looked in .mime */
2841: if (ms->flags & MAGIC_MIME) {
2842: spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext);
1.1.1.3 ! misho 2843: #ifdef PHP_WIN32
! 2844: if (VCWD_ACCESS(buf, R_OK) == 0) {
! 2845: #else
1.1 misho 2846: if (VCWD_ACCESS(buf, R_OK) != -1) {
1.1.1.3 ! misho 2847: #endif
1.1 misho 2848: ms->flags &= MAGIC_MIME_TYPE;
2849: return buf;
2850: }
2851: efree(buf);
2852: }
2853: spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext);
2854:
2855: /* Compatibility with old code that looked in .mime */
2856: if (strstr(p, ".mime") != NULL)
2857: ms->flags &= MAGIC_MIME_TYPE;
2858: return buf;
2859: }
2860:
2861: /*
2862: * Byteswap an mmap'ed file if needed
2863: */
2864: private void
2865: byteswap(struct magic *magic, uint32_t nmagic)
2866: {
2867: uint32_t i;
2868: for (i = 0; i < nmagic; i++)
2869: bs1(&magic[i]);
2870: }
2871:
2872: /*
2873: * swap a short
2874: */
2875: private uint16_t
2876: swap2(uint16_t sv)
2877: {
2878: uint16_t rv;
2879: uint8_t *s = (uint8_t *)(void *)&sv;
2880: uint8_t *d = (uint8_t *)(void *)&rv;
2881: d[0] = s[1];
2882: d[1] = s[0];
2883: return rv;
2884: }
2885:
2886: /*
2887: * swap an int
2888: */
2889: private uint32_t
2890: swap4(uint32_t sv)
2891: {
2892: uint32_t rv;
2893: uint8_t *s = (uint8_t *)(void *)&sv;
2894: uint8_t *d = (uint8_t *)(void *)&rv;
2895: d[0] = s[3];
2896: d[1] = s[2];
2897: d[2] = s[1];
2898: d[3] = s[0];
2899: return rv;
2900: }
2901:
2902: /*
2903: * swap a quad
2904: */
2905: private uint64_t
2906: swap8(uint64_t sv)
2907: {
2908: uint64_t rv;
2909: uint8_t *s = (uint8_t *)(void *)&sv;
2910: uint8_t *d = (uint8_t *)(void *)&rv;
2911: #if 0
2912: d[0] = s[3];
2913: d[1] = s[2];
2914: d[2] = s[1];
2915: d[3] = s[0];
2916: d[4] = s[7];
2917: d[5] = s[6];
2918: d[6] = s[5];
2919: d[7] = s[4];
2920: #else
2921: d[0] = s[7];
2922: d[1] = s[6];
2923: d[2] = s[5];
2924: d[3] = s[4];
2925: d[4] = s[3];
2926: d[5] = s[2];
2927: d[6] = s[1];
2928: d[7] = s[0];
2929: #endif
2930: return rv;
2931: }
2932:
2933: /*
2934: * byteswap a single magic entry
2935: */
2936: private void
2937: bs1(struct magic *m)
2938: {
2939: m->cont_level = swap2(m->cont_level);
2940: m->offset = swap4((uint32_t)m->offset);
2941: m->in_offset = swap4((uint32_t)m->in_offset);
2942: m->lineno = swap4((uint32_t)m->lineno);
2943: if (IS_LIBMAGIC_STRING(m->type)) {
2944: m->str_range = swap4(m->str_range);
2945: m->str_flags = swap4(m->str_flags);
2946: }
2947: else {
2948: m->value.q = swap8(m->value.q);
2949: m->num_mask = swap8(m->num_mask);
2950: }
2951: }
1.1.1.2 misho 2952:
2953: protected size_t
2954: file_pstring_length_size(const struct magic *m)
2955: {
2956: switch (m->str_flags & PSTRING_LEN) {
2957: case PSTRING_1_LE:
2958: return 1;
2959: case PSTRING_2_LE:
2960: case PSTRING_2_BE:
2961: return 2;
2962: case PSTRING_4_LE:
2963: case PSTRING_4_BE:
2964: return 4;
2965: default:
2966: abort(); /* Impossible */
2967: return 1;
2968: }
2969: }
2970: protected size_t
2971: file_pstring_get_length(const struct magic *m, const char *s)
2972: {
2973: size_t len = 0;
2974:
2975: switch (m->str_flags & PSTRING_LEN) {
2976: case PSTRING_1_LE:
2977: len = *s;
2978: break;
2979: case PSTRING_2_LE:
2980: len = (s[1] << 8) | s[0];
2981: break;
2982: case PSTRING_2_BE:
2983: len = (s[0] << 8) | s[1];
2984: break;
2985: case PSTRING_4_LE:
2986: len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
2987: break;
2988: case PSTRING_4_BE:
2989: len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
2990: break;
2991: default:
2992: abort(); /* Impossible */
2993: }
2994:
2995: if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
2996: len -= file_pstring_length_size(m);
2997:
2998: return len;
2999: }
1.1.1.3 ! misho 3000:
! 3001: protected int
! 3002: file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
! 3003: {
! 3004: uint32_t i, j;
! 3005: struct mlist *mlist, *ml;
! 3006:
! 3007: mlist = ms->mlist[1];
! 3008:
! 3009: for (ml = mlist->next; ml != mlist; ml = ml->next) {
! 3010: struct magic *ma = ml->magic;
! 3011: uint32_t nma = ml->nmagic;
! 3012: for (i = 0; i < nma; i++) {
! 3013: if (ma[i].type != FILE_NAME)
! 3014: continue;
! 3015: if (strcmp(ma[i].value.s, name) == 0) {
! 3016: v->magic = &ma[i];
! 3017: for (j = i + 1; j < nma; j++)
! 3018: if (ma[j].cont_level == 0)
! 3019: break;
! 3020: v->nmagic = j - i;
! 3021: return 0;
! 3022: }
! 3023: }
! 3024: }
! 3025: return -1;
! 3026: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>