Annotation of embedaddon/php/Zend/zend_ini_scanner.l, revision 1.1.1.3
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | Zend Engine |
4: +----------------------------------------------------------------------+
1.1.1.3 ! misho 5: | Copyright (c) 1998-2013 Zend Technologies Ltd. (http://www.zend.com) |
1.1 misho 6: +----------------------------------------------------------------------+
7: | This source file is subject to version 2.00 of the Zend license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.zend.com/license/2_00.txt. |
11: | If you did not receive a copy of the Zend license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@zend.com so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Authors: Zeev Suraski <zeev@zend.com> |
16: | Jani Taskinen <jani@php.net> |
17: | Marcus Boerger <helly@php.net> |
18: | Nuno Lopes <nlopess@php.net> |
19: | Scott MacVicar <scottmac@php.net> |
20: +----------------------------------------------------------------------+
21: */
22:
1.1.1.2 misho 23: /* $Id$ */
1.1 misho 24:
25: #include <errno.h>
26: #include "zend.h"
27: #include "zend_globals.h"
28: #include <zend_ini_parser.h>
29: #include "zend_ini_scanner.h"
30:
31: #if 0
32: # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
33: #else
34: # define YYDEBUG(s, c)
35: #endif
36:
37: #include "zend_ini_scanner_defs.h"
38:
39: #define YYCTYPE unsigned char
40: /* allow the scanner to read one null byte after the end of the string (from ZEND_MMAP_AHEAD)
41: * so that if will be able to terminate to match the current token (e.g. non-enclosed string) */
42: #define YYFILL(n) { if (YYCURSOR > YYLIMIT) return 0; }
43: #define YYCURSOR SCNG(yy_cursor)
44: #define YYLIMIT SCNG(yy_limit)
45: #define YYMARKER SCNG(yy_marker)
46:
47: #define YYGETCONDITION() SCNG(yy_state)
48: #define YYSETCONDITION(s) SCNG(yy_state) = s
49:
50: #define STATE(name) yyc##name
51:
52: /* emulate flex constructs */
53: #define BEGIN(state) YYSETCONDITION(STATE(state))
54: #define YYSTATE YYGETCONDITION()
55: #define yytext ((char*)SCNG(yy_text))
56: #define yyleng SCNG(yy_leng)
57: #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
58: yyleng = (unsigned int)x; } while(0)
59:
60: /* #define yymore() goto yymore_restart */
61:
62: /* perform sanity check. If this message is triggered you should
63: increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
64: /*!max:re2c */
65: #if ZEND_MMAP_AHEAD < (YYMAXFILL + 1)
66: # error ZEND_MMAP_AHEAD should be greater than YYMAXFILL
67: #endif
68:
69:
70: /* How it works (for the core ini directives):
71: * ===========================================
72: *
73: * 1. Scanner scans file for tokens and passes them to parser.
74: * 2. Parser parses the tokens and passes the name/value pairs to the callback
75: * function which stores them in the configuration hash table.
76: * 3. Later REGISTER_INI_ENTRIES() is called which triggers the actual
77: * registering of ini entries and uses zend_get_configuration_directive()
78: * to fetch the previously stored name/value pair from configuration hash table
79: * and registers the static ini entries which match the name to the value
80: * into EG(ini_directives) hash table.
81: * 4. PATH section entries are used per-request from down to top, each overriding
82: * previous if one exists. zend_alter_ini_entry() is called for each entry.
83: * Settings in PATH section are ZEND_INI_SYSTEM accessible and thus mimics the
84: * php_admin_* directives used within Apache httpd.conf when PHP is compiled as
85: * module for Apache.
86: * 5. User defined ini files (like .htaccess for apache) are parsed for each request and
87: * stored in separate hash defined by SAPI.
88: */
89:
90: /* TODO: (ordered by importance :-)
91: * ===============================================================================
92: *
93: * - Separate constant lookup totally from plain strings (using CONSTANT pattern)
94: * - Add #if .. #else .. #endif and ==, !=, <, > , <=, >= operators
95: * - Add #include "some.ini"
96: * - Allow variables to refer to options also when using parse_ini_file()
97: *
98: */
99:
100: /* Globals Macros */
101: #define SCNG INI_SCNG
102: #ifdef ZTS
103: ZEND_API ts_rsrc_id ini_scanner_globals_id;
104: #else
105: ZEND_API zend_ini_scanner_globals ini_scanner_globals;
106: #endif
107:
108: /* Eat leading whitespace */
109: #define EAT_LEADING_WHITESPACE() \
110: while (yytext[0]) { \
111: if (yytext[0] == ' ' || yytext[0] == '\t') { \
112: SCNG(yy_text)++; \
113: yyleng--; \
114: } else { \
115: break; \
116: } \
117: }
118:
119: /* Eat trailing whitespace + extra char */
120: #define EAT_TRAILING_WHITESPACE_EX(ch) \
121: while (yyleng > 0 && ( \
122: (ch != 'X' && yytext[yyleng - 1] == ch) || \
123: yytext[yyleng - 1] == '\n' || \
124: yytext[yyleng - 1] == '\r' || \
125: yytext[yyleng - 1] == '\t' || \
126: yytext[yyleng - 1] == ' ') \
127: ) { \
128: yyleng--; \
129: }
130:
131: /* Eat trailing whitespace */
132: #define EAT_TRAILING_WHITESPACE() EAT_TRAILING_WHITESPACE_EX('X')
133:
134: #define zend_ini_copy_value(retval, str, len) { \
135: Z_STRVAL_P(retval) = zend_strndup(str, len); \
136: Z_STRLEN_P(retval) = len; \
137: Z_TYPE_P(retval) = IS_STRING; \
138: }
139:
140: #define RETURN_TOKEN(type, str, len) { \
141: zend_ini_copy_value(ini_lval, str, len); \
142: return type; \
143: }
144:
145: static void _yy_push_state(int new_state TSRMLS_DC)
146: {
147: zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
148: YYSETCONDITION(new_state);
149: }
150:
151: #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
152:
153: static void yy_pop_state(TSRMLS_D)
154: {
155: int *stack_state;
156: zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
157: YYSETCONDITION(*stack_state);
158: zend_stack_del_top(&SCNG(state_stack));
159: }
160:
161: static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
162: {
163: YYCURSOR = (YYCTYPE*)str;
164: SCNG(yy_start) = YYCURSOR;
165: YYLIMIT = YYCURSOR + len;
166: }
167:
168: #define ini_filename SCNG(filename)
169:
170: /* {{{ init_ini_scanner()
171: */
172: static int init_ini_scanner(int scanner_mode, zend_file_handle *fh TSRMLS_DC)
173: {
174: /* Sanity check */
175: if (scanner_mode != ZEND_INI_SCANNER_NORMAL && scanner_mode != ZEND_INI_SCANNER_RAW) {
176: zend_error(E_WARNING, "Invalid scanner mode");
177: return FAILURE;
178: }
179:
180: SCNG(lineno) = 1;
181: SCNG(scanner_mode) = scanner_mode;
182: SCNG(yy_in) = fh;
183:
184: if (fh != NULL) {
185: ini_filename = zend_strndup(fh->filename, strlen(fh->filename));
186: } else {
187: ini_filename = NULL;
188: }
189:
190: zend_stack_init(&SCNG(state_stack));
191: BEGIN(INITIAL);
192:
193: return SUCCESS;
194: }
195: /* }}} */
196:
197: /* {{{ shutdown_ini_scanner()
198: */
199: void shutdown_ini_scanner(TSRMLS_D)
200: {
201: zend_stack_destroy(&SCNG(state_stack));
202: if (ini_filename) {
203: free(ini_filename);
204: }
205: }
206: /* }}} */
207:
208: /* {{{ zend_ini_scanner_get_lineno()
209: */
210: int zend_ini_scanner_get_lineno(TSRMLS_D)
211: {
212: return SCNG(lineno);
213: }
214: /* }}} */
215:
216: /* {{{ zend_ini_scanner_get_filename()
217: */
218: char *zend_ini_scanner_get_filename(TSRMLS_D)
219: {
220: return ini_filename ? ini_filename : "Unknown";
221: }
222: /* }}} */
223:
224: /* {{{ zend_ini_open_file_for_scanning()
225: */
226: int zend_ini_open_file_for_scanning(zend_file_handle *fh, int scanner_mode TSRMLS_DC)
227: {
228: char *buf;
229: size_t size;
230:
1.1.1.2 misho 231: if (zend_stream_fixup(fh, &buf, &size TSRMLS_CC) == FAILURE) {
232: return FAILURE;
233: }
234:
235: if (init_ini_scanner(scanner_mode, fh TSRMLS_CC) == FAILURE) {
236: zend_file_handle_dtor(fh TSRMLS_CC);
1.1 misho 237: return FAILURE;
238: }
239:
240: yy_scan_buffer(buf, size TSRMLS_CC);
241:
242: return SUCCESS;
243: }
244: /* }}} */
245:
246: /* {{{ zend_ini_prepare_string_for_scanning()
247: */
248: int zend_ini_prepare_string_for_scanning(char *str, int scanner_mode TSRMLS_DC)
249: {
250: int len = strlen(str);
251:
252: if (init_ini_scanner(scanner_mode, NULL TSRMLS_CC) == FAILURE) {
253: return FAILURE;
254: }
255:
256: yy_scan_buffer(str, len TSRMLS_CC);
257:
258: return SUCCESS;
259: }
260: /* }}} */
261:
262: /* {{{ zend_ini_escape_string()
263: */
264: static void zend_ini_escape_string(zval *lval, char *str, int len, char quote_type TSRMLS_DC)
265: {
266: register char *s, *t;
267: char *end;
268:
269: zend_ini_copy_value(lval, str, len);
270:
271: /* convert escape sequences */
272: s = t = Z_STRVAL_P(lval);
273: end = s + Z_STRLEN_P(lval);
274:
275: while (s < end) {
276: if (*s == '\\') {
277: s++;
278: if (s >= end) {
279: *t++ = '\\';
280: continue;
281: }
282: switch (*s) {
283: case '"':
284: if (*s != quote_type) {
285: *t++ = '\\';
286: *t++ = *s;
287: break;
288: }
289: case '\\':
290: case '$':
291: *t++ = *s;
292: Z_STRLEN_P(lval)--;
293: break;
294: default:
295: *t++ = '\\';
296: *t++ = *s;
297: break;
298: }
299: } else {
300: *t++ = *s;
301: }
302: if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
303: SCNG(lineno)++;
304: }
305: s++;
306: }
307: *t = 0;
308: }
309: /* }}} */
310:
311: int ini_lex(zval *ini_lval TSRMLS_DC)
312: {
313: restart:
314: SCNG(yy_text) = YYCURSOR;
315:
316: /* yymore_restart: */
317: /* detect EOF */
318: if (YYCURSOR >= YYLIMIT) {
319: if (YYSTATE == STATE(ST_VALUE) || YYSTATE == STATE(ST_RAW)) {
320: BEGIN(INITIAL);
321: return END_OF_LINE;
322: }
323: return 0;
324: }
325:
326: /* Eat any UTF-8 BOM we find in the first 3 bytes */
327: if (YYCURSOR == SCNG(yy_start) && YYCURSOR + 3 < YYLIMIT) {
328: if (memcmp(YYCURSOR, "\xef\xbb\xbf", 3) == 0) {
329: YYCURSOR += 3;
330: goto restart;
331: }
332: }
333: /*!re2c
334: re2c:yyfill:check = 0;
335: LNUM [0-9]+
336: DNUM ([0-9]*[\.][0-9]+)|([0-9]+[\.][0-9]*)
337: NUMBER [-]?{LNUM}|{DNUM}
338: ANY_CHAR (.|[\n\t])
339: NEWLINE ("\r"|"\n"|"\r\n")
340: TABS_AND_SPACES [ \t]
341: WHITESPACE [ \t]+
342: CONSTANT [a-zA-Z_][a-zA-Z0-9_]*
343: LABEL [^=\n\r\t;|&$~(){}!"\[]+
344: TOKENS [:,.\[\]"'()|^&+-/*=%$!~<>?@{}]
345: OPERATORS [&|~()!]
346: DOLLAR_CURLY "${"
347:
348: SECTION_RAW_CHARS [^\]\n\r]
349: SINGLE_QUOTED_CHARS [^']
350: RAW_VALUE_CHARS [^\n\r;\000]
351:
352: LITERAL_DOLLAR ("$"([^{\000]|("\\"{ANY_CHAR})))
353: VALUE_CHARS ([^$= \t\n\r;&|~()!"'\000]|{LITERAL_DOLLAR})
354: SECTION_VALUE_CHARS ([^$\n\r;"'\]\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR})
355:
356: <!*> := yyleng = YYCURSOR - SCNG(yy_text);
357:
358: <INITIAL>"[" { /* Section start */
359: /* Enter section data lookup state */
360: if (SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW) {
361: yy_push_state(ST_SECTION_RAW TSRMLS_CC);
362: } else {
363: yy_push_state(ST_SECTION_VALUE TSRMLS_CC);
364: }
365: return TC_SECTION;
366: }
367:
368: <ST_VALUE,ST_SECTION_VALUE,ST_OFFSET>"'"{SINGLE_QUOTED_CHARS}+"'" { /* Raw string */
369: /* Eat leading and trailing single quotes */
370: if (yytext[0] == '\'' && yytext[yyleng - 1] == '\'') {
371: SCNG(yy_text)++;
372: yyleng = yyleng - 2;
373: }
374: RETURN_TOKEN(TC_RAW, yytext, yyleng);
375: }
376:
377: <ST_SECTION_RAW,ST_SECTION_VALUE>"]"{TABS_AND_SPACES}*{NEWLINE}? { /* End of section */
378: BEGIN(INITIAL);
379: SCNG(lineno)++;
380: return ']';
381: }
382:
383: <INITIAL>{LABEL}"["{TABS_AND_SPACES}* { /* Start of option with offset */
384: /* Eat leading whitespace */
385: EAT_LEADING_WHITESPACE();
386:
387: /* Eat trailing whitespace and [ */
388: EAT_TRAILING_WHITESPACE_EX('[');
389:
390: /* Enter offset lookup state */
391: yy_push_state(ST_OFFSET TSRMLS_CC);
392:
393: RETURN_TOKEN(TC_OFFSET, yytext, yyleng);
394: }
395:
396: <ST_OFFSET>{TABS_AND_SPACES}*"]" { /* End of section or an option offset */
397: BEGIN(INITIAL);
398: return ']';
399: }
400:
401: <ST_DOUBLE_QUOTES,ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{DOLLAR_CURLY} { /* Variable start */
402: yy_push_state(ST_VARNAME TSRMLS_CC);
403: return TC_DOLLAR_CURLY;
404: }
405:
406: <ST_VARNAME>{LABEL} { /* Variable name */
407: /* Eat leading whitespace */
408: EAT_LEADING_WHITESPACE();
409:
410: /* Eat trailing whitespace */
411: EAT_TRAILING_WHITESPACE();
412:
413: RETURN_TOKEN(TC_VARNAME, yytext, yyleng);
414: }
415:
416: <ST_VARNAME>"}" { /* Variable end */
417: yy_pop_state(TSRMLS_C);
418: return '}';
419: }
420:
421: <INITIAL,ST_VALUE>("true"|"on"|"yes"){TABS_AND_SPACES}* { /* TRUE value (when used outside option value/offset this causes parse error!) */
422: RETURN_TOKEN(BOOL_TRUE, "1", 1);
423: }
424:
425: <INITIAL,ST_VALUE>("false"|"off"|"no"|"none"|"null"){TABS_AND_SPACES}* { /* FALSE value (when used outside option value/offset this causes parse error!)*/
426: RETURN_TOKEN(BOOL_FALSE, "", 0);
427: }
428:
429: <INITIAL>{LABEL} { /* Get option name */
430: /* Eat leading whitespace */
431: EAT_LEADING_WHITESPACE();
432:
433: /* Eat trailing whitespace */
434: EAT_TRAILING_WHITESPACE();
435:
436: RETURN_TOKEN(TC_LABEL, yytext, yyleng);
437: }
438:
439: <INITIAL>{TABS_AND_SPACES}*[=]{TABS_AND_SPACES}* { /* Start option value */
440: if (SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW) {
441: yy_push_state(ST_RAW TSRMLS_CC);
442: } else {
443: yy_push_state(ST_VALUE TSRMLS_CC);
444: }
445: return '=';
446: }
447:
1.1.1.3 ! misho 448: <ST_RAW>{RAW_VALUE_CHARS} { /* Raw value, only used when SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW. */
! 449: char *sc = NULL;
! 450: while (YYCURSOR < YYLIMIT) {
! 451: switch (*YYCURSOR) {
! 452: case '\n':
! 453: case '\r':
! 454: goto end_raw_value_chars;
! 455: break;
! 456: case ';':
! 457: if (sc == NULL) {
! 458: sc = YYCURSOR;
! 459: }
! 460: /* no break */
! 461: default:
! 462: YYCURSOR++;
! 463: break;
! 464: }
! 465: }
! 466: end_raw_value_chars:
! 467: yyleng = YYCURSOR - SCNG(yy_text);
! 468:
! 469: /* Eat trailing semicolons */
! 470: while (yytext[yyleng - 1] == ';') {
! 471: yyleng--;
! 472: }
! 473:
1.1 misho 474: /* Eat leading and trailing double quotes */
475: if (yytext[0] == '"' && yytext[yyleng - 1] == '"') {
476: SCNG(yy_text)++;
477: yyleng = yyleng - 2;
1.1.1.3 ! misho 478: } else if (sc) {
! 479: YYCURSOR = sc;
! 480: yyleng = YYCURSOR - SCNG(yy_text);
1.1 misho 481: }
482: RETURN_TOKEN(TC_RAW, yytext, yyleng);
483: }
484:
485: <ST_SECTION_RAW>{SECTION_RAW_CHARS}+ { /* Raw value, only used when SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW. */
486: RETURN_TOKEN(TC_RAW, yytext, yyleng);
487: }
488:
489: <ST_VALUE,ST_RAW>{TABS_AND_SPACES}*{NEWLINE} { /* End of option value */
490: BEGIN(INITIAL);
491: SCNG(lineno)++;
492: return END_OF_LINE;
493: }
494:
495: <ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{CONSTANT} { /* Get constant option value */
496: RETURN_TOKEN(TC_CONSTANT, yytext, yyleng);
497: }
498:
499: <ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{NUMBER} { /* Get number option value as string */
500: RETURN_TOKEN(TC_NUMBER, yytext, yyleng);
501: }
502:
503: <INITIAL>{TOKENS} { /* Disallow these chars outside option values */
504: return yytext[0];
505: }
506:
507: <ST_VALUE>{OPERATORS}{TABS_AND_SPACES}* { /* Boolean operators */
508: return yytext[0];
509: }
510:
511: <ST_VALUE>[=] { /* Make = used in option value to trigger error */
512: yyless(0);
513: BEGIN(INITIAL);
514: return END_OF_LINE;
515: }
516:
517: <ST_VALUE>{VALUE_CHARS}+ { /* Get everything else as option/offset value */
518: RETURN_TOKEN(TC_STRING, yytext, yyleng);
519: }
520:
521: <ST_SECTION_VALUE,ST_OFFSET>{SECTION_VALUE_CHARS}+ { /* Get rest as section/offset value */
522: RETURN_TOKEN(TC_STRING, yytext, yyleng);
523: }
524:
525: <ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{TABS_AND_SPACES}*["] { /* Double quoted '"' string start */
526: yy_push_state(ST_DOUBLE_QUOTES TSRMLS_CC);
527: return '"';
528: }
529:
530: <ST_DOUBLE_QUOTES>["]{TABS_AND_SPACES}* { /* Double quoted '"' string ends */
531: yy_pop_state(TSRMLS_C);
532: return '"';
533: }
534:
535: <ST_DOUBLE_QUOTES>[^] { /* Escape double quoted string contents */
536: if (YYCURSOR > YYLIMIT) {
537: return 0;
538: }
539:
540: while (YYCURSOR < YYLIMIT) {
541: switch (*YYCURSOR++) {
542: case '"':
543: if (YYCURSOR < YYLIMIT && YYCURSOR[-2] == '\\' && *YYCURSOR != '\r' && *YYCURSOR != '\n') {
544: continue;
545: }
546: break;
547: case '$':
548: if (*YYCURSOR == '{') {
549: break;
550: }
551: continue;
552: case '\\':
553: if (YYCURSOR < YYLIMIT && *YYCURSOR != '"') {
554: YYCURSOR++;
555: }
556: /* fall through */
557: default:
558: continue;
559: }
560:
561: YYCURSOR--;
562: break;
563: }
564:
565: yyleng = YYCURSOR - SCNG(yy_text);
566:
567: zend_ini_escape_string(ini_lval, yytext, yyleng, '"' TSRMLS_CC);
568: return TC_QUOTED_STRING;
569: }
570:
571: <ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{WHITESPACE} {
572: RETURN_TOKEN(TC_WHITESPACE, yytext, yyleng);
573: }
574:
575: <INITIAL,ST_RAW>{TABS_AND_SPACES}+ {
576: /* eat whitespace */
577: goto restart;
578: }
579:
580: <INITIAL>{TABS_AND_SPACES}*{NEWLINE} {
581: SCNG(lineno)++;
582: return END_OF_LINE;
583: }
584:
585: <INITIAL,ST_VALUE,ST_RAW>{TABS_AND_SPACES}*[;][^\r\n]*{NEWLINE} { /* Comment */
586: BEGIN(INITIAL);
587: SCNG(lineno)++;
588: return END_OF_LINE;
589: }
590:
591: <INITIAL>{TABS_AND_SPACES}*[#][^\r\n]*{NEWLINE} { /* #Comment */
592: zend_error(E_DEPRECATED, "Comments starting with '#' are deprecated in %s on line %d", zend_ini_scanner_get_filename(TSRMLS_C), SCNG(lineno));
593: BEGIN(INITIAL);
594: SCNG(lineno)++;
595: return END_OF_LINE;
596: }
597:
598: <ST_VALUE,ST_RAW>[^] { /* End of option value (if EOF is reached before EOL */
599: BEGIN(INITIAL);
600: return END_OF_LINE;
601: }
602:
603: <*>[^] {
604: return 0;
605: }
606:
607: */
608: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>