--- embedaddon/php/ext/fileinfo/libmagic/ascmagic.c 2012/02/21 23:47:56 1.1.1.1 +++ embedaddon/php/ext/fileinfo/libmagic/ascmagic.c 2014/06/15 20:03:48 1.1.1.4 @@ -26,8 +26,7 @@ * SUCH DAMAGE. */ /* - * ASCII magic -- file types that we know based on keywords - * that can appear anywhere in the file. + * ASCII magic -- try to detect text encoding. * * Extensively modified by Eric Fischer in July, 2000, * to handle character codes other than ASCII on a unified basis. @@ -36,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: ascmagic.c,v 1.75 2009/02/03 20:27:51 christos Exp $") +FILE_RCSID("@(#)$File: ascmagic.c,v 1.85 2012/08/09 16:33:15 christos Exp $") #endif /* lint */ #include "magic.h" @@ -47,13 +46,11 @@ FILE_RCSID("@(#)$File: ascmagic.c,v 1.75 2009/02/03 20 #ifdef HAVE_UNISTD_H #include #endif -#include "names.h" #define MAXLINELEN 300 /* longest sane line length */ #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \ || (x) == 0x85 || (x) == '\f') -private int ascmatch(const unsigned char *, const unichar *, size_t); private unsigned char *encode_utf8(unsigned char *, size_t, unichar *, size_t); private size_t trim_nuls(const unsigned char *, size_t); @@ -71,7 +68,8 @@ trim_nuls(const unsigned char *buf, size_t nbytes) } protected int -file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) +file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes, + int text) { unichar *ubuf = NULL; size_t ulen; @@ -88,29 +86,24 @@ file_ascmagic(struct magic_set *ms, const unsigned cha /* If file doesn't look like any sort of text, give up. */ if (file_encoding(ms, buf, nbytes, &ubuf, &ulen, &code, &code_mime, - &type) == 0) { + &type) == 0) rv = 0; - goto done; - } + else + rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code, + type, text); - rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code, - type); + free(ubuf); - done: - if (ubuf) - free(ubuf); - return rv; } protected int file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t ulen, const char *code, - const char *type) + const char *type, int text) { unsigned char *utf8_buf = NULL, *utf8_end; size_t mlen, i; - const struct names *p; int rv = -1; int mime = ms->flags & MAGIC_MIME; @@ -125,6 +118,7 @@ file_ascmagic_with_encoding(struct magic_set *ms, cons int n_lf = 0; int n_cr = 0; int n_nel = 0; + int executable = 0; size_t last_line_end = (size_t)-1; int has_long_lines = 0; @@ -140,54 +134,23 @@ file_ascmagic_with_encoding(struct magic_set *ms, cons goto done; } - /* Convert ubuf to UTF-8 and try text soft magic */ - /* malloc size is a conservative overestimate; could be - improved, or at least realloced after conversion. */ - mlen = ulen * 6; - utf8_buf = emalloc(mlen); - - if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL) - goto done; - if ((rv = file_softmagic(ms, utf8_buf, (size_t)(utf8_end - utf8_buf), - TEXTTEST)) != 0) - goto done; - else - rv = -1; - - /* look for tokens from names.h - this is expensive! */ - if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0) - goto subtype_identified; - - i = 0; - while (i < ulen) { - size_t end; - - /* skip past any leading space */ - while (i < ulen && ISSPC(ubuf[i])) - i++; - if (i >= ulen) - break; - - /* find the next whitespace */ - for (end = i + 1; end < nbytes; end++) - if (ISSPC(ubuf[end])) - break; - - /* compare the word thus isolated against the token list */ - for (p = names; p < names + NNAMES; p++) { - if (ascmatch((const unsigned char *)p->name, ubuf + i, - end - i)) { - subtype = types[p->type].human; - subtype_mime = types[p->type].mime; - goto subtype_identified; - } + if (ulen > 0 && (ms->flags & MAGIC_NO_CHECK_SOFT) == 0) { + /* Convert ubuf to UTF-8 and try text soft magic */ + /* malloc size is a conservative overestimate; could be + improved, or at least realloced after conversion. */ + mlen = ulen * 6; + if ((utf8_buf = CAST(unsigned char *, emalloc(mlen))) == NULL) { + file_oomem(ms, mlen); + goto done; } - - i = end; + if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) + == NULL) + goto done; + if ((rv = file_softmagic(ms, utf8_buf, + (size_t)(utf8_end - utf8_buf), 0, TEXTTEST, text)) == 0) + rv = -1; } -subtype_identified: - /* Now try to discover other details about the file. */ for (i = 0; i < ulen; i++) { if (ubuf[i] == '\n') { @@ -230,7 +193,7 @@ subtype_identified: goto done; } if (mime) { - if ((mime & MAGIC_MIME_TYPE) != 0) { + if (!file_printedlen(ms) && (mime & MAGIC_MIME_TYPE) != 0) { if (subtype_mime) { if (file_printf(ms, "%s", subtype_mime) == -1) goto done; @@ -240,6 +203,29 @@ subtype_identified: } } } else { + if (file_printedlen(ms)) { + switch (file_replace(ms, " text$", ", ")) { + case 0: + switch (file_replace(ms, " text executable$", + ", ")) { + case 0: + if (file_printf(ms, ", ") == -1) + goto done; + break; + case -1: + goto done; + default: + executable = 1; + break; + } + break; + case -1: + goto done; + default: + break; + } + } + if (file_printf(ms, "%s", code) == -1) goto done; @@ -251,6 +237,10 @@ subtype_identified: if (file_printf(ms, " %s", type) == -1) goto done; + if (executable) + if (file_printf(ms, " executable") == -1) + goto done; + if (has_long_lines) if (file_printf(ms, ", with very long lines") == -1) goto done; @@ -311,22 +301,6 @@ done: efree(utf8_buf); return rv; -} - -private int -ascmatch(const unsigned char *s, const unichar *us, size_t ulen) -{ - size_t i; - - for (i = 0; i < ulen; i++) { - if (s[i] != us[i]) - return 0; - } - - if (s[i]) - return 0; - else - return 1; } /*