Annotation of embedaddon/php/Zend/zend_language_scanner.l, revision 1.1.1.2
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | Zend Engine |
4: +----------------------------------------------------------------------+
5: | Copyright (c) 1998-2012 Zend Technologies Ltd. (http://www.zend.com) |
6: +----------------------------------------------------------------------+
7: | This source file is subject to version 2.00 of the Zend license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.zend.com/license/2_00.txt. |
11: | If you did not receive a copy of the Zend license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@zend.com so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Authors: Marcus Boerger <helly@php.net> |
16: | Nuno Lopes <nlopess@php.net> |
17: | Scott MacVicar <scottmac@php.net> |
18: | Flex version authors: |
19: | Andi Gutmans <andi@zend.com> |
20: | Zeev Suraski <zeev@zend.com> |
21: +----------------------------------------------------------------------+
22: */
23:
1.1.1.2 ! misho 24: /* $Id$ */
1.1 misho 25:
26: #if 0
27: # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28: #else
29: # define YYDEBUG(s, c)
30: #endif
31:
32: #include "zend_language_scanner_defs.h"
33:
34: #include <errno.h>
35: #include "zend.h"
36: #include "zend_alloc.h"
37: #include <zend_language_parser.h>
38: #include "zend_compile.h"
39: #include "zend_language_scanner.h"
40: #include "zend_highlight.h"
41: #include "zend_constants.h"
42: #include "zend_variables.h"
43: #include "zend_operators.h"
44: #include "zend_API.h"
45: #include "zend_strtod.h"
46: #include "zend_exceptions.h"
47: #include "tsrm_virtual_cwd.h"
48: #include "tsrm_config_common.h"
49:
50: #define YYCTYPE unsigned char
51: #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
52: #define YYCURSOR SCNG(yy_cursor)
53: #define YYLIMIT SCNG(yy_limit)
54: #define YYMARKER SCNG(yy_marker)
55:
56: #define YYGETCONDITION() SCNG(yy_state)
57: #define YYSETCONDITION(s) SCNG(yy_state) = s
58:
59: #define STATE(name) yyc##name
60:
61: /* emulate flex constructs */
62: #define BEGIN(state) YYSETCONDITION(STATE(state))
63: #define YYSTATE YYGETCONDITION()
64: #define yytext ((char*)SCNG(yy_text))
65: #define yyleng SCNG(yy_leng)
66: #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
67: yyleng = (unsigned int)x; } while(0)
68: #define yymore() goto yymore_restart
69:
70: /* perform sanity check. If this message is triggered you should
71: increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
72: /*!max:re2c */
73: #if ZEND_MMAP_AHEAD < YYMAXFILL
74: # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
75: #endif
76:
77: #ifdef HAVE_STDARG_H
78: # include <stdarg.h>
79: #endif
80:
81: #ifdef HAVE_UNISTD_H
82: # include <unistd.h>
83: #endif
84:
85: /* Globals Macros */
86: #define SCNG LANG_SCNG
87: #ifdef ZTS
88: ZEND_API ts_rsrc_id language_scanner_globals_id;
89: #else
90: ZEND_API zend_php_scanner_globals language_scanner_globals;
91: #endif
92:
93: #define HANDLE_NEWLINES(s, l) \
94: do { \
95: char *p = (s), *boundary = p+(l); \
96: \
97: while (p<boundary) { \
98: if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
99: CG(zend_lineno)++; \
100: } \
101: p++; \
102: } \
103: } while (0)
104:
105: #define HANDLE_NEWLINE(c) \
106: { \
107: if (c == '\n' || c == '\r') { \
108: CG(zend_lineno)++; \
109: } \
110: }
111:
112: /* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
113: #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
114: #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() CG(doc_comment_len)
115:
116: #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
117:
118: #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
119: #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
120:
121: BEGIN_EXTERN_C()
122:
1.1.1.2 ! misho 123: static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
! 124: {
! 125: const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
! 126: assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
! 127: return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
! 128: }
! 129:
! 130: static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
! 131: {
! 132: return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
! 133: }
! 134:
! 135: static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
! 136: {
! 137: return zend_multibyte_encoding_converter(to, to_length, from, from_length,
! 138: LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
! 139: }
! 140:
! 141: static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
! 142: {
! 143: const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
! 144: assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
! 145: return zend_multibyte_encoding_converter(to, to_length, from, from_length,
! 146: internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
! 147: }
! 148:
! 149:
1.1 misho 150: static void _yy_push_state(int new_state TSRMLS_DC)
151: {
152: zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
153: YYSETCONDITION(new_state);
154: }
155:
156: #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
157:
158: static void yy_pop_state(TSRMLS_D)
159: {
160: int *stack_state;
161: zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
162: YYSETCONDITION(*stack_state);
163: zend_stack_del_top(&SCNG(state_stack));
164: }
165:
166: static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
167: {
168: YYCURSOR = (YYCTYPE*)str;
169: YYLIMIT = YYCURSOR + len;
170: if (!SCNG(yy_start)) {
171: SCNG(yy_start) = YYCURSOR;
172: }
173: }
174:
175: void startup_scanner(TSRMLS_D)
176: {
1.1.1.2 ! misho 177: CG(parse_error) = 0;
1.1 misho 178: CG(heredoc) = NULL;
179: CG(heredoc_len) = 0;
180: CG(doc_comment) = NULL;
181: CG(doc_comment_len) = 0;
182: zend_stack_init(&SCNG(state_stack));
183: }
184:
185: void shutdown_scanner(TSRMLS_D)
186: {
187: if (CG(heredoc)) {
188: efree(CG(heredoc));
189: CG(heredoc_len)=0;
190: }
1.1.1.2 ! misho 191: CG(parse_error) = 0;
1.1 misho 192: zend_stack_destroy(&SCNG(state_stack));
193: RESET_DOC_COMMENT();
194: }
195:
196: ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
197: {
198: lex_state->yy_leng = SCNG(yy_leng);
199: lex_state->yy_start = SCNG(yy_start);
200: lex_state->yy_text = SCNG(yy_text);
201: lex_state->yy_cursor = SCNG(yy_cursor);
202: lex_state->yy_marker = SCNG(yy_marker);
203: lex_state->yy_limit = SCNG(yy_limit);
204:
205: lex_state->state_stack = SCNG(state_stack);
206: zend_stack_init(&SCNG(state_stack));
207:
208: lex_state->in = SCNG(yy_in);
209: lex_state->yy_state = YYSTATE;
210: lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
211: lex_state->lineno = CG(zend_lineno);
212:
213: lex_state->script_org = SCNG(script_org);
214: lex_state->script_org_size = SCNG(script_org_size);
215: lex_state->script_filtered = SCNG(script_filtered);
216: lex_state->script_filtered_size = SCNG(script_filtered_size);
217: lex_state->input_filter = SCNG(input_filter);
218: lex_state->output_filter = SCNG(output_filter);
219: lex_state->script_encoding = SCNG(script_encoding);
220: }
221:
222: ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
223: {
224: SCNG(yy_leng) = lex_state->yy_leng;
225: SCNG(yy_start) = lex_state->yy_start;
226: SCNG(yy_text) = lex_state->yy_text;
227: SCNG(yy_cursor) = lex_state->yy_cursor;
228: SCNG(yy_marker) = lex_state->yy_marker;
229: SCNG(yy_limit) = lex_state->yy_limit;
230:
231: zend_stack_destroy(&SCNG(state_stack));
232: SCNG(state_stack) = lex_state->state_stack;
233:
234: SCNG(yy_in) = lex_state->in;
235: YYSETCONDITION(lex_state->yy_state);
236: CG(zend_lineno) = lex_state->lineno;
237: zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
1.1.1.2 ! misho 238:
1.1 misho 239: if (SCNG(script_filtered)) {
240: efree(SCNG(script_filtered));
241: SCNG(script_filtered) = NULL;
242: }
243: SCNG(script_org) = lex_state->script_org;
244: SCNG(script_org_size) = lex_state->script_org_size;
245: SCNG(script_filtered) = lex_state->script_filtered;
246: SCNG(script_filtered_size) = lex_state->script_filtered_size;
247: SCNG(input_filter) = lex_state->input_filter;
248: SCNG(output_filter) = lex_state->output_filter;
249: SCNG(script_encoding) = lex_state->script_encoding;
250:
251: if (CG(heredoc)) {
252: efree(CG(heredoc));
253: CG(heredoc) = NULL;
254: CG(heredoc_len) = 0;
255: }
256: }
257:
258: ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
259: {
260: zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
261: /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
262: file_handle->opened_path = NULL;
263: if (file_handle->free_filename) {
264: file_handle->filename = NULL;
265: }
266: }
267:
1.1.1.2 ! misho 268: #define BOM_UTF32_BE "\x00\x00\xfe\xff"
! 269: #define BOM_UTF32_LE "\xff\xfe\x00\x00"
! 270: #define BOM_UTF16_BE "\xfe\xff"
! 271: #define BOM_UTF16_LE "\xff\xfe"
! 272: #define BOM_UTF8 "\xef\xbb\xbf"
! 273:
! 274: static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
! 275: {
! 276: const unsigned char *p;
! 277: int wchar_size = 2;
! 278: int le = 0;
! 279:
! 280: /* utf-16 or utf-32? */
! 281: p = script;
! 282: while ((p-script) < script_size) {
! 283: p = memchr(p, 0, script_size-(p-script)-2);
! 284: if (!p) {
! 285: break;
! 286: }
! 287: if (*(p+1) == '\0' && *(p+2) == '\0') {
! 288: wchar_size = 4;
! 289: break;
! 290: }
! 291:
! 292: /* searching for UTF-32 specific byte orders, so this will do */
! 293: p += 4;
! 294: }
! 295:
! 296: /* BE or LE? */
! 297: p = script;
! 298: while ((p-script) < script_size) {
! 299: if (*p == '\0' && *(p+wchar_size-1) != '\0') {
! 300: /* BE */
! 301: le = 0;
! 302: break;
! 303: } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
! 304: /* LE* */
! 305: le = 1;
! 306: break;
! 307: }
! 308: p += wchar_size;
! 309: }
! 310:
! 311: if (wchar_size == 2) {
! 312: return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
! 313: } else {
! 314: return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
! 315: }
! 316:
! 317: return NULL;
! 318: }
! 319:
! 320: static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
! 321: {
! 322: const zend_encoding *script_encoding = NULL;
! 323: int bom_size;
! 324: unsigned char *pos1, *pos2;
! 325:
! 326: if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
! 327: return NULL;
! 328: }
! 329:
! 330: /* check out BOM */
! 331: if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
! 332: script_encoding = zend_multibyte_encoding_utf32be;
! 333: bom_size = sizeof(BOM_UTF32_BE)-1;
! 334: } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
! 335: script_encoding = zend_multibyte_encoding_utf32le;
! 336: bom_size = sizeof(BOM_UTF32_LE)-1;
! 337: } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
! 338: script_encoding = zend_multibyte_encoding_utf16be;
! 339: bom_size = sizeof(BOM_UTF16_BE)-1;
! 340: } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
! 341: script_encoding = zend_multibyte_encoding_utf16le;
! 342: bom_size = sizeof(BOM_UTF16_LE)-1;
! 343: } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
! 344: script_encoding = zend_multibyte_encoding_utf8;
! 345: bom_size = sizeof(BOM_UTF8)-1;
! 346: }
! 347:
! 348: if (script_encoding) {
! 349: /* remove BOM */
! 350: LANG_SCNG(script_org) += bom_size;
! 351: LANG_SCNG(script_org_size) -= bom_size;
! 352:
! 353: return script_encoding;
! 354: }
! 355:
! 356: /* script contains NULL bytes -> auto-detection */
! 357: if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
! 358: /* check if the NULL byte is after the __HALT_COMPILER(); */
! 359: pos2 = LANG_SCNG(script_org);
! 360:
! 361: while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
! 362: pos2 = memchr(pos2, '_', pos1 - pos2);
! 363: if (!pos2) break;
! 364: pos2++;
! 365: if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
! 366: pos2 += sizeof("_HALT_COMPILER")-1;
! 367: while (*pos2 == ' ' ||
! 368: *pos2 == '\t' ||
! 369: *pos2 == '\r' ||
! 370: *pos2 == '\n') {
! 371: pos2++;
! 372: }
! 373: if (*pos2 == '(') {
! 374: pos2++;
! 375: while (*pos2 == ' ' ||
! 376: *pos2 == '\t' ||
! 377: *pos2 == '\r' ||
! 378: *pos2 == '\n') {
! 379: pos2++;
! 380: }
! 381: if (*pos2 == ')') {
! 382: pos2++;
! 383: while (*pos2 == ' ' ||
! 384: *pos2 == '\t' ||
! 385: *pos2 == '\r' ||
! 386: *pos2 == '\n') {
! 387: pos2++;
! 388: }
! 389: if (*pos2 == ';') {
! 390: return NULL;
! 391: }
! 392: }
! 393: }
! 394: }
! 395: }
! 396: /* make best effort if BOM is missing */
! 397: return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
! 398: }
! 399:
! 400: return NULL;
! 401: }
! 402:
! 403: static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
! 404: {
! 405: const zend_encoding *script_encoding;
! 406:
! 407: if (CG(detect_unicode)) {
! 408: /* check out bom(byte order mark) and see if containing wchars */
! 409: script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
! 410: if (script_encoding != NULL) {
! 411: /* bom or wchar detection is prior to 'script_encoding' option */
! 412: return script_encoding;
! 413: }
! 414: }
! 415:
! 416: /* if no script_encoding specified, just leave alone */
! 417: if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
! 418: return NULL;
! 419: }
! 420:
! 421: /* if multiple encodings specified, detect automagically */
! 422: if (CG(script_encoding_list_size) > 1) {
! 423: return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
! 424: }
! 425:
! 426: return CG(script_encoding_list)[0];
! 427: }
! 428:
! 429: ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
! 430: {
! 431: const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
! 432: const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
! 433:
! 434: if (!script_encoding) {
! 435: return FAILURE;
! 436: }
! 437:
! 438: /* judge input/output filter */
! 439: LANG_SCNG(script_encoding) = script_encoding;
! 440: LANG_SCNG(input_filter) = NULL;
! 441: LANG_SCNG(output_filter) = NULL;
! 442:
! 443: if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
! 444: if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
! 445: /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
! 446: LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
! 447: LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
! 448: } else {
! 449: LANG_SCNG(input_filter) = NULL;
! 450: LANG_SCNG(output_filter) = NULL;
! 451: }
! 452: return SUCCESS;
! 453: }
! 454:
! 455: if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
! 456: LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
! 457: LANG_SCNG(output_filter) = NULL;
! 458: } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
! 459: LANG_SCNG(input_filter) = NULL;
! 460: LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
! 461: } else {
! 462: /* both script and internal encodings are incompatible w/ flex */
! 463: LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
! 464: LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
! 465: }
! 466:
! 467: return 0;
! 468: }
1.1 misho 469:
470: ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
471: {
1.1.1.2 ! misho 472: const char *file_path = NULL;
! 473: char *buf;
1.1 misho 474: size_t size, offset = 0;
1.1.1.2 ! misho 475:
1.1 misho 476: /* The shebang line was read, get the current position to obtain the buffer start */
477: if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
478: if ((offset = ftell(file_handle->handle.fp)) == -1) {
479: offset = 0;
480: }
481: }
482:
483: if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
484: return FAILURE;
485: }
486:
487: zend_llist_add_element(&CG(open_files), file_handle);
488: if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
489: zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
490: size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
491: fh->handle.stream.handle = (void*)(((char*)fh) + diff);
492: file_handle->handle.stream.handle = fh->handle.stream.handle;
493: }
494:
495: /* Reset the scanner for scanning the new file */
496: SCNG(yy_in) = file_handle;
497: SCNG(yy_start) = NULL;
498:
499: if (size != -1) {
1.1.1.2 ! misho 500: if (CG(multibyte)) {
! 501: SCNG(script_org) = (unsigned char*)buf;
! 502: SCNG(script_org_size) = size;
! 503: SCNG(script_filtered) = NULL;
1.1 misho 504:
1.1.1.2 ! misho 505: zend_multibyte_set_filter(NULL TSRMLS_CC);
1.1 misho 506:
1.1.1.2 ! misho 507: if (SCNG(input_filter)) {
! 508: if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
! 509: zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
! 510: "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
! 511: }
! 512: buf = (char*)SCNG(script_filtered);
! 513: size = SCNG(script_filtered_size);
1.1 misho 514: }
515: }
1.1.1.2 ! misho 516: SCNG(yy_start) = (unsigned char *)buf - offset;
1.1 misho 517: yy_scan_buffer(buf, size TSRMLS_CC);
518: } else {
519: zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
520: }
521:
522: BEGIN(INITIAL);
523:
524: if (file_handle->opened_path) {
525: file_path = file_handle->opened_path;
526: } else {
527: file_path = file_handle->filename;
528: }
529:
530: zend_set_compiled_filename(file_path TSRMLS_CC);
531:
532: if (CG(start_lineno)) {
533: CG(zend_lineno) = CG(start_lineno);
534: CG(start_lineno) = 0;
535: } else {
536: CG(zend_lineno) = 1;
537: }
538:
539: CG(increment_lineno) = 0;
540: return SUCCESS;
541: }
542: END_EXTERN_C()
543:
544:
545: ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
546: {
547: zend_lex_state original_lex_state;
548: zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
549: zend_op_array *original_active_op_array = CG(active_op_array);
550: zend_op_array *retval=NULL;
551: int compiler_result;
552: zend_bool compilation_successful=0;
553: znode retval_znode;
554: zend_bool original_in_compilation = CG(in_compilation);
555:
556: retval_znode.op_type = IS_CONST;
557: retval_znode.u.constant.type = IS_LONG;
558: retval_znode.u.constant.value.lval = 1;
559: Z_UNSET_ISREF(retval_znode.u.constant);
560: Z_SET_REFCOUNT(retval_znode.u.constant, 1);
561:
562: zend_save_lexical_state(&original_lex_state TSRMLS_CC);
563:
564: retval = op_array; /* success oriented */
565:
566: if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
567: if (type==ZEND_REQUIRE) {
568: zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
569: zend_bailout();
570: } else {
571: zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
572: }
573: compilation_successful=0;
574: } else {
575: init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
576: CG(in_compilation) = 1;
577: CG(active_op_array) = op_array;
1.1.1.2 ! misho 578: zend_init_compiler_context(TSRMLS_C);
1.1 misho 579: compiler_result = zendparse(TSRMLS_C);
580: zend_do_return(&retval_znode, 0 TSRMLS_CC);
581: CG(in_compilation) = original_in_compilation;
582: if (compiler_result==1) { /* parser error */
583: zend_bailout();
584: }
585: compilation_successful=1;
586: }
587:
588: if (retval) {
589: CG(active_op_array) = original_active_op_array;
590: if (compilation_successful) {
591: pass_two(op_array TSRMLS_CC);
592: zend_release_labels(TSRMLS_C);
593: } else {
594: efree(op_array);
595: retval = NULL;
596: }
597: }
598: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
599: return retval;
600: }
601:
602:
603: zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
604: {
605: zend_file_handle file_handle;
606: zval tmp;
607: zend_op_array *retval;
608: char *opened_path = NULL;
609:
610: if (filename->type != IS_STRING) {
611: tmp = *filename;
612: zval_copy_ctor(&tmp);
613: convert_to_string(&tmp);
614: filename = &tmp;
615: }
616: file_handle.filename = filename->value.str.val;
617: file_handle.free_filename = 0;
618: file_handle.type = ZEND_HANDLE_FILENAME;
619: file_handle.opened_path = NULL;
620: file_handle.handle.fp = NULL;
621:
622: retval = zend_compile_file(&file_handle, type TSRMLS_CC);
623: if (retval && file_handle.handle.stream.handle) {
624: int dummy = 1;
625:
626: if (!file_handle.opened_path) {
627: file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len);
628: }
629:
630: zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
631:
632: if (opened_path) {
633: efree(opened_path);
634: }
635: }
636: zend_destroy_file_handle(&file_handle TSRMLS_CC);
637:
638: if (filename==&tmp) {
639: zval_dtor(&tmp);
640: }
641: return retval;
642: }
643:
644: ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
645: {
1.1.1.2 ! misho 646: char *buf;
! 647: size_t size;
! 648:
1.1 misho 649: /* enforce two trailing NULLs for flex... */
1.1.1.2 ! misho 650: if (IS_INTERNED(str->value.str.val)) {
! 651: char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD);
! 652: memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD);
! 653: str->value.str.val = tmp;
! 654: } else {
! 655: str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD);
! 656: }
1.1 misho 657:
658: memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
659:
1.1.1.2 ! misho 660: SCNG(yy_in) = NULL;
1.1 misho 661: SCNG(yy_start) = NULL;
662:
1.1.1.2 ! misho 663: buf = str->value.str.val;
! 664: size = str->value.str.len;
! 665:
! 666: if (CG(multibyte)) {
! 667: SCNG(script_org) = (unsigned char*)buf;
! 668: SCNG(script_org_size) = size;
! 669: SCNG(script_filtered) = NULL;
1.1 misho 670:
1.1.1.2 ! misho 671: zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
1.1 misho 672:
1.1.1.2 ! misho 673: if (SCNG(input_filter)) {
! 674: if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
! 675: zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
! 676: "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
! 677: }
! 678: buf = (char*)SCNG(script_filtered);
! 679: size = SCNG(script_filtered_size);
! 680: }
1.1 misho 681: }
682:
1.1.1.2 ! misho 683: yy_scan_buffer(buf, size TSRMLS_CC);
1.1 misho 684:
685: zend_set_compiled_filename(filename TSRMLS_CC);
686: CG(zend_lineno) = 1;
687: CG(increment_lineno) = 0;
688: return SUCCESS;
689: }
690:
691:
692: ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
693: {
694: size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
695: if (SCNG(input_filter)) {
1.1.1.2 ! misho 696: size_t original_offset = offset, length = 0;
! 697: do {
1.1 misho 698: unsigned char *p = NULL;
1.1.1.2 ! misho 699: if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
! 700: return (size_t)-1;
1.1 misho 701: }
702: efree(p);
703: if (length > original_offset) {
704: offset--;
705: } else if (length < original_offset) {
706: offset++;
707: }
708: } while (original_offset != length);
709: }
710: return offset;
711: }
712:
713:
714: zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
715: {
716: zend_lex_state original_lex_state;
717: zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
718: zend_op_array *original_active_op_array = CG(active_op_array);
719: zend_op_array *retval;
720: zval tmp;
721: int compiler_result;
722: zend_bool original_in_compilation = CG(in_compilation);
723:
724: if (source_string->value.str.len==0) {
725: efree(op_array);
726: return NULL;
727: }
728:
729: CG(in_compilation) = 1;
730:
731: tmp = *source_string;
732: zval_copy_ctor(&tmp);
733: convert_to_string(&tmp);
734: source_string = &tmp;
735:
736: zend_save_lexical_state(&original_lex_state TSRMLS_CC);
737: if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
738: efree(op_array);
739: retval = NULL;
740: } else {
741: zend_bool orig_interactive = CG(interactive);
742:
743: CG(interactive) = 0;
744: init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
745: CG(interactive) = orig_interactive;
746: CG(active_op_array) = op_array;
1.1.1.2 ! misho 747: zend_init_compiler_context(TSRMLS_C);
1.1 misho 748: BEGIN(ST_IN_SCRIPTING);
749: compiler_result = zendparse(TSRMLS_C);
750:
751: if (SCNG(script_filtered)) {
752: efree(SCNG(script_filtered));
753: SCNG(script_filtered) = NULL;
754: }
755:
756: if (compiler_result==1) {
757: CG(active_op_array) = original_active_op_array;
758: CG(unclean_shutdown)=1;
759: destroy_op_array(op_array TSRMLS_CC);
760: efree(op_array);
761: retval = NULL;
762: } else {
763: zend_do_return(NULL, 0 TSRMLS_CC);
764: CG(active_op_array) = original_active_op_array;
765: pass_two(op_array TSRMLS_CC);
766: zend_release_labels(TSRMLS_C);
767: retval = op_array;
768: }
769: }
770: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
771: zval_dtor(&tmp);
772: CG(in_compilation) = original_in_compilation;
773: return retval;
774: }
775:
776:
777: BEGIN_EXTERN_C()
778: int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
779: {
780: zend_lex_state original_lex_state;
781: zend_file_handle file_handle;
782:
783: file_handle.type = ZEND_HANDLE_FILENAME;
784: file_handle.filename = filename;
785: file_handle.free_filename = 0;
786: file_handle.opened_path = NULL;
787: zend_save_lexical_state(&original_lex_state TSRMLS_CC);
788: if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
789: zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
790: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
791: return FAILURE;
792: }
793: zend_highlight(syntax_highlighter_ini TSRMLS_CC);
794: if (SCNG(script_filtered)) {
795: efree(SCNG(script_filtered));
796: SCNG(script_filtered) = NULL;
797: }
798: zend_destroy_file_handle(&file_handle TSRMLS_CC);
799: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
800: return SUCCESS;
801: }
802:
803: int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
804: {
805: zend_lex_state original_lex_state;
806: zval tmp = *str;
807:
808: str = &tmp;
809: zval_copy_ctor(str);
810: zend_save_lexical_state(&original_lex_state TSRMLS_CC);
811: if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
812: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
813: return FAILURE;
814: }
815: BEGIN(INITIAL);
816: zend_highlight(syntax_highlighter_ini TSRMLS_CC);
817: if (SCNG(script_filtered)) {
818: efree(SCNG(script_filtered));
819: SCNG(script_filtered) = NULL;
820: }
821: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
822: zval_dtor(str);
823: return SUCCESS;
824: }
825:
1.1.1.2 ! misho 826: ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
1.1 misho 827: {
1.1.1.2 ! misho 828: size_t length;
! 829: unsigned char *new_yy_start;
1.1 misho 830:
831: /* convert and set */
832: if (!SCNG(input_filter)) {
1.1.1.2 ! misho 833: if (SCNG(script_filtered)) {
! 834: efree(SCNG(script_filtered));
! 835: SCNG(script_filtered) = NULL;
1.1 misho 836: }
1.1.1.2 ! misho 837: SCNG(script_filtered_size) = 0;
! 838: length = SCNG(script_org_size);
! 839: new_yy_start = SCNG(script_org);
! 840: } else {
! 841: if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
! 842: zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
! 843: "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
! 844: }
! 845: SCNG(script_filtered) = new_yy_start;
! 846: SCNG(script_filtered_size) = length;
1.1 misho 847: }
848:
1.1.1.2 ! misho 849: SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
! 850: SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
! 851: SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
! 852: SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
1.1 misho 853:
1.1.1.2 ! misho 854: SCNG(yy_start) = new_yy_start;
1.1 misho 855: }
856:
857:
858: # define zend_copy_value(zendlval, yytext, yyleng) \
859: if (SCNG(output_filter)) { \
860: size_t sz = 0; \
861: SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
862: zendlval->value.str.len = sz; \
863: } else { \
864: zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
865: zendlval->value.str.len = yyleng; \
866: }
867:
868: static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
869: {
870: register char *s, *t;
871: char *end;
872:
873: ZVAL_STRINGL(zendlval, str, len, 1);
874:
875: /* convert escape sequences */
876: s = t = zendlval->value.str.val;
877: end = s+zendlval->value.str.len;
878: while (s<end) {
879: if (*s=='\\') {
880: s++;
881: if (s >= end) {
882: *t++ = '\\';
883: break;
884: }
885:
886: switch(*s) {
887: case 'n':
888: *t++ = '\n';
889: zendlval->value.str.len--;
890: break;
891: case 'r':
892: *t++ = '\r';
893: zendlval->value.str.len--;
894: break;
895: case 't':
896: *t++ = '\t';
897: zendlval->value.str.len--;
898: break;
899: case 'f':
900: *t++ = '\f';
901: zendlval->value.str.len--;
902: break;
903: case 'v':
904: *t++ = '\v';
905: zendlval->value.str.len--;
906: break;
1.1.1.2 ! misho 907: case 'e':
! 908: *t++ = '\e';
! 909: zendlval->value.str.len--;
! 910: break;
1.1 misho 911: case '"':
912: case '`':
913: if (*s != quote_type) {
914: *t++ = '\\';
915: *t++ = *s;
916: break;
917: }
918: case '\\':
919: case '$':
920: *t++ = *s;
921: zendlval->value.str.len--;
922: break;
923: case 'x':
924: case 'X':
925: if (ZEND_IS_HEX(*(s+1))) {
926: char hex_buf[3] = { 0, 0, 0 };
927:
928: zendlval->value.str.len--; /* for the 'x' */
929:
930: hex_buf[0] = *(++s);
931: zendlval->value.str.len--;
932: if (ZEND_IS_HEX(*(s+1))) {
933: hex_buf[1] = *(++s);
934: zendlval->value.str.len--;
935: }
936: *t++ = (char) strtol(hex_buf, NULL, 16);
937: } else {
938: *t++ = '\\';
939: *t++ = *s;
940: }
941: break;
942: default:
943: /* check for an octal */
944: if (ZEND_IS_OCT(*s)) {
945: char octal_buf[4] = { 0, 0, 0, 0 };
946:
947: octal_buf[0] = *s;
948: zendlval->value.str.len--;
949: if (ZEND_IS_OCT(*(s+1))) {
950: octal_buf[1] = *(++s);
951: zendlval->value.str.len--;
952: if (ZEND_IS_OCT(*(s+1))) {
953: octal_buf[2] = *(++s);
954: zendlval->value.str.len--;
955: }
956: }
957: *t++ = (char) strtol(octal_buf, NULL, 8);
958: } else {
959: *t++ = '\\';
960: *t++ = *s;
961: }
962: break;
963: }
964: } else {
965: *t++ = *s;
966: }
967:
968: if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
969: CG(zend_lineno)++;
970: }
971: s++;
972: }
973: *t = 0;
974: if (SCNG(output_filter)) {
975: size_t sz = 0;
976: s = zendlval->value.str.val;
977: SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
978: zendlval->value.str.len = sz;
979: efree(s);
980: }
981: }
982:
983:
984: int lex_scan(zval *zendlval TSRMLS_DC)
985: {
986: restart:
987: SCNG(yy_text) = YYCURSOR;
988:
989: yymore_restart:
990:
991: /*!re2c
992: re2c:yyfill:check = 0;
993: LNUM [0-9]+
994: DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
995: EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
996: HNUM "0x"[0-9a-fA-F]+
1.1.1.2 ! misho 997: BNUM "0b"[01]+
1.1 misho 998: LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
999: WHITESPACE [ \n\r\t]+
1000: TABS_AND_SPACES [ \t]*
1001: TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1002: ANY_CHAR [^]
1003: NEWLINE ("\r"|"\n"|"\r\n")
1004:
1005: /* compute yyleng before each rule */
1006: <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1007:
1008:
1009: <ST_IN_SCRIPTING>"exit" {
1010: return T_EXIT;
1011: }
1012:
1013: <ST_IN_SCRIPTING>"die" {
1014: return T_EXIT;
1015: }
1016:
1017: <ST_IN_SCRIPTING>"function" {
1018: return T_FUNCTION;
1019: }
1020:
1021: <ST_IN_SCRIPTING>"const" {
1022: return T_CONST;
1023: }
1024:
1025: <ST_IN_SCRIPTING>"return" {
1026: return T_RETURN;
1027: }
1028:
1029: <ST_IN_SCRIPTING>"try" {
1030: return T_TRY;
1031: }
1032:
1033: <ST_IN_SCRIPTING>"catch" {
1034: return T_CATCH;
1035: }
1036:
1037: <ST_IN_SCRIPTING>"throw" {
1038: return T_THROW;
1039: }
1040:
1041: <ST_IN_SCRIPTING>"if" {
1042: return T_IF;
1043: }
1044:
1045: <ST_IN_SCRIPTING>"elseif" {
1046: return T_ELSEIF;
1047: }
1048:
1049: <ST_IN_SCRIPTING>"endif" {
1050: return T_ENDIF;
1051: }
1052:
1053: <ST_IN_SCRIPTING>"else" {
1054: return T_ELSE;
1055: }
1056:
1057: <ST_IN_SCRIPTING>"while" {
1058: return T_WHILE;
1059: }
1060:
1061: <ST_IN_SCRIPTING>"endwhile" {
1062: return T_ENDWHILE;
1063: }
1064:
1065: <ST_IN_SCRIPTING>"do" {
1066: return T_DO;
1067: }
1068:
1069: <ST_IN_SCRIPTING>"for" {
1070: return T_FOR;
1071: }
1072:
1073: <ST_IN_SCRIPTING>"endfor" {
1074: return T_ENDFOR;
1075: }
1076:
1077: <ST_IN_SCRIPTING>"foreach" {
1078: return T_FOREACH;
1079: }
1080:
1081: <ST_IN_SCRIPTING>"endforeach" {
1082: return T_ENDFOREACH;
1083: }
1084:
1085: <ST_IN_SCRIPTING>"declare" {
1086: return T_DECLARE;
1087: }
1088:
1089: <ST_IN_SCRIPTING>"enddeclare" {
1090: return T_ENDDECLARE;
1091: }
1092:
1093: <ST_IN_SCRIPTING>"instanceof" {
1094: return T_INSTANCEOF;
1095: }
1096:
1097: <ST_IN_SCRIPTING>"as" {
1098: return T_AS;
1099: }
1100:
1101: <ST_IN_SCRIPTING>"switch" {
1102: return T_SWITCH;
1103: }
1104:
1105: <ST_IN_SCRIPTING>"endswitch" {
1106: return T_ENDSWITCH;
1107: }
1108:
1109: <ST_IN_SCRIPTING>"case" {
1110: return T_CASE;
1111: }
1112:
1113: <ST_IN_SCRIPTING>"default" {
1114: return T_DEFAULT;
1115: }
1116:
1117: <ST_IN_SCRIPTING>"break" {
1118: return T_BREAK;
1119: }
1120:
1121: <ST_IN_SCRIPTING>"continue" {
1122: return T_CONTINUE;
1123: }
1124:
1125: <ST_IN_SCRIPTING>"goto" {
1126: return T_GOTO;
1127: }
1128:
1129: <ST_IN_SCRIPTING>"echo" {
1130: return T_ECHO;
1131: }
1132:
1133: <ST_IN_SCRIPTING>"print" {
1134: return T_PRINT;
1135: }
1136:
1137: <ST_IN_SCRIPTING>"class" {
1138: return T_CLASS;
1139: }
1140:
1141: <ST_IN_SCRIPTING>"interface" {
1142: return T_INTERFACE;
1143: }
1144:
1.1.1.2 ! misho 1145: <ST_IN_SCRIPTING>"trait" {
! 1146: return T_TRAIT;
! 1147: }
! 1148:
1.1 misho 1149: <ST_IN_SCRIPTING>"extends" {
1150: return T_EXTENDS;
1151: }
1152:
1153: <ST_IN_SCRIPTING>"implements" {
1154: return T_IMPLEMENTS;
1155: }
1156:
1157: <ST_IN_SCRIPTING>"->" {
1158: yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1159: return T_OBJECT_OPERATOR;
1160: }
1161:
1162: <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1163: zendlval->value.str.val = yytext; /* no copying - intentional */
1164: zendlval->value.str.len = yyleng;
1165: zendlval->type = IS_STRING;
1166: HANDLE_NEWLINES(yytext, yyleng);
1167: return T_WHITESPACE;
1168: }
1169:
1170: <ST_LOOKING_FOR_PROPERTY>"->" {
1171: return T_OBJECT_OPERATOR;
1172: }
1173:
1174: <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1175: yy_pop_state(TSRMLS_C);
1176: zend_copy_value(zendlval, yytext, yyleng);
1177: zendlval->type = IS_STRING;
1178: return T_STRING;
1179: }
1180:
1181: <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1182: yyless(0);
1183: yy_pop_state(TSRMLS_C);
1184: goto restart;
1185: }
1186:
1187: <ST_IN_SCRIPTING>"::" {
1188: return T_PAAMAYIM_NEKUDOTAYIM;
1189: }
1190:
1191: <ST_IN_SCRIPTING>"\\" {
1192: return T_NS_SEPARATOR;
1193: }
1194:
1195: <ST_IN_SCRIPTING>"new" {
1196: return T_NEW;
1197: }
1198:
1199: <ST_IN_SCRIPTING>"clone" {
1200: return T_CLONE;
1201: }
1202:
1203: <ST_IN_SCRIPTING>"var" {
1204: return T_VAR;
1205: }
1206:
1207: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1208: return T_INT_CAST;
1209: }
1210:
1211: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1212: return T_DOUBLE_CAST;
1213: }
1214:
1.1.1.2 ! misho 1215: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1.1 misho 1216: return T_STRING_CAST;
1217: }
1218:
1219: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1220: return T_ARRAY_CAST;
1221: }
1222:
1223: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1224: return T_OBJECT_CAST;
1225: }
1226:
1227: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1228: return T_BOOL_CAST;
1229: }
1230:
1231: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1232: return T_UNSET_CAST;
1233: }
1234:
1235: <ST_IN_SCRIPTING>"eval" {
1236: return T_EVAL;
1237: }
1238:
1239: <ST_IN_SCRIPTING>"include" {
1240: return T_INCLUDE;
1241: }
1242:
1243: <ST_IN_SCRIPTING>"include_once" {
1244: return T_INCLUDE_ONCE;
1245: }
1246:
1247: <ST_IN_SCRIPTING>"require" {
1248: return T_REQUIRE;
1249: }
1250:
1251: <ST_IN_SCRIPTING>"require_once" {
1252: return T_REQUIRE_ONCE;
1253: }
1254:
1255: <ST_IN_SCRIPTING>"namespace" {
1256: return T_NAMESPACE;
1257: }
1258:
1259: <ST_IN_SCRIPTING>"use" {
1260: return T_USE;
1261: }
1262:
1.1.1.2 ! misho 1263: <ST_IN_SCRIPTING>"insteadof" {
! 1264: return T_INSTEADOF;
! 1265: }
! 1266:
1.1 misho 1267: <ST_IN_SCRIPTING>"global" {
1268: return T_GLOBAL;
1269: }
1270:
1271: <ST_IN_SCRIPTING>"isset" {
1272: return T_ISSET;
1273: }
1274:
1275: <ST_IN_SCRIPTING>"empty" {
1276: return T_EMPTY;
1277: }
1278:
1279: <ST_IN_SCRIPTING>"__halt_compiler" {
1280: return T_HALT_COMPILER;
1281: }
1282:
1283: <ST_IN_SCRIPTING>"static" {
1284: return T_STATIC;
1285: }
1286:
1287: <ST_IN_SCRIPTING>"abstract" {
1288: return T_ABSTRACT;
1289: }
1290:
1291: <ST_IN_SCRIPTING>"final" {
1292: return T_FINAL;
1293: }
1294:
1295: <ST_IN_SCRIPTING>"private" {
1296: return T_PRIVATE;
1297: }
1298:
1299: <ST_IN_SCRIPTING>"protected" {
1300: return T_PROTECTED;
1301: }
1302:
1303: <ST_IN_SCRIPTING>"public" {
1304: return T_PUBLIC;
1305: }
1306:
1307: <ST_IN_SCRIPTING>"unset" {
1308: return T_UNSET;
1309: }
1310:
1311: <ST_IN_SCRIPTING>"=>" {
1312: return T_DOUBLE_ARROW;
1313: }
1314:
1315: <ST_IN_SCRIPTING>"list" {
1316: return T_LIST;
1317: }
1318:
1319: <ST_IN_SCRIPTING>"array" {
1320: return T_ARRAY;
1321: }
1322:
1.1.1.2 ! misho 1323: <ST_IN_SCRIPTING>"callable" {
! 1324: return T_CALLABLE;
! 1325: }
! 1326:
1.1 misho 1327: <ST_IN_SCRIPTING>"++" {
1328: return T_INC;
1329: }
1330:
1331: <ST_IN_SCRIPTING>"--" {
1332: return T_DEC;
1333: }
1334:
1335: <ST_IN_SCRIPTING>"===" {
1336: return T_IS_IDENTICAL;
1337: }
1338:
1339: <ST_IN_SCRIPTING>"!==" {
1340: return T_IS_NOT_IDENTICAL;
1341: }
1342:
1343: <ST_IN_SCRIPTING>"==" {
1344: return T_IS_EQUAL;
1345: }
1346:
1347: <ST_IN_SCRIPTING>"!="|"<>" {
1348: return T_IS_NOT_EQUAL;
1349: }
1350:
1351: <ST_IN_SCRIPTING>"<=" {
1352: return T_IS_SMALLER_OR_EQUAL;
1353: }
1354:
1355: <ST_IN_SCRIPTING>">=" {
1356: return T_IS_GREATER_OR_EQUAL;
1357: }
1358:
1359: <ST_IN_SCRIPTING>"+=" {
1360: return T_PLUS_EQUAL;
1361: }
1362:
1363: <ST_IN_SCRIPTING>"-=" {
1364: return T_MINUS_EQUAL;
1365: }
1366:
1367: <ST_IN_SCRIPTING>"*=" {
1368: return T_MUL_EQUAL;
1369: }
1370:
1371: <ST_IN_SCRIPTING>"/=" {
1372: return T_DIV_EQUAL;
1373: }
1374:
1375: <ST_IN_SCRIPTING>".=" {
1376: return T_CONCAT_EQUAL;
1377: }
1378:
1379: <ST_IN_SCRIPTING>"%=" {
1380: return T_MOD_EQUAL;
1381: }
1382:
1383: <ST_IN_SCRIPTING>"<<=" {
1384: return T_SL_EQUAL;
1385: }
1386:
1387: <ST_IN_SCRIPTING>">>=" {
1388: return T_SR_EQUAL;
1389: }
1390:
1391: <ST_IN_SCRIPTING>"&=" {
1392: return T_AND_EQUAL;
1393: }
1394:
1395: <ST_IN_SCRIPTING>"|=" {
1396: return T_OR_EQUAL;
1397: }
1398:
1399: <ST_IN_SCRIPTING>"^=" {
1400: return T_XOR_EQUAL;
1401: }
1402:
1403: <ST_IN_SCRIPTING>"||" {
1404: return T_BOOLEAN_OR;
1405: }
1406:
1407: <ST_IN_SCRIPTING>"&&" {
1408: return T_BOOLEAN_AND;
1409: }
1410:
1411: <ST_IN_SCRIPTING>"OR" {
1412: return T_LOGICAL_OR;
1413: }
1414:
1415: <ST_IN_SCRIPTING>"AND" {
1416: return T_LOGICAL_AND;
1417: }
1418:
1419: <ST_IN_SCRIPTING>"XOR" {
1420: return T_LOGICAL_XOR;
1421: }
1422:
1423: <ST_IN_SCRIPTING>"<<" {
1424: return T_SL;
1425: }
1426:
1427: <ST_IN_SCRIPTING>">>" {
1428: return T_SR;
1429: }
1430:
1431: <ST_IN_SCRIPTING>{TOKENS} {
1432: return yytext[0];
1433: }
1434:
1435:
1436: <ST_IN_SCRIPTING>"{" {
1437: yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1438: return '{';
1439: }
1440:
1441:
1442: <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1443: yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1444: return T_DOLLAR_OPEN_CURLY_BRACES;
1445: }
1446:
1447:
1448: <ST_IN_SCRIPTING>"}" {
1449: RESET_DOC_COMMENT();
1450: if (!zend_stack_is_empty(&SCNG(state_stack))) {
1451: yy_pop_state(TSRMLS_C);
1452: }
1453: return '}';
1454: }
1455:
1456:
1457: <ST_LOOKING_FOR_VARNAME>{LABEL} {
1458: zend_copy_value(zendlval, yytext, yyleng);
1459: zendlval->type = IS_STRING;
1460: yy_pop_state(TSRMLS_C);
1461: yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1462: return T_STRING_VARNAME;
1463: }
1464:
1465:
1466: <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1467: yyless(0);
1468: yy_pop_state(TSRMLS_C);
1469: yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1470: goto restart;
1471: }
1472:
1.1.1.2 ! misho 1473: <ST_IN_SCRIPTING>{BNUM} {
! 1474: char *bin = yytext + 2; /* Skip "0b" */
! 1475: int len = yyleng - 2;
! 1476:
! 1477: /* Skip any leading 0s */
! 1478: while (*bin == '0') {
! 1479: ++bin;
! 1480: --len;
! 1481: }
! 1482:
! 1483: if (len < SIZEOF_LONG * 8) {
! 1484: if (len == 0) {
! 1485: zendlval->value.lval = 0;
! 1486: } else {
! 1487: zendlval->value.lval = strtol(bin, NULL, 2);
! 1488: }
! 1489: zendlval->type = IS_LONG;
! 1490: return T_LNUMBER;
! 1491: } else {
! 1492: zendlval->value.dval = zend_bin_strtod(bin, NULL);
! 1493: zendlval->type = IS_DOUBLE;
! 1494: return T_DNUMBER;
! 1495: }
! 1496: }
1.1 misho 1497:
1498: <ST_IN_SCRIPTING>{LNUM} {
1499: if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1500: zendlval->value.lval = strtol(yytext, NULL, 0);
1501: } else {
1502: errno = 0;
1503: zendlval->value.lval = strtol(yytext, NULL, 0);
1504: if (errno == ERANGE) { /* Overflow */
1505: if (yytext[0] == '0') { /* octal overflow */
1506: zendlval->value.dval = zend_oct_strtod(yytext, NULL);
1507: } else {
1508: zendlval->value.dval = zend_strtod(yytext, NULL);
1509: }
1510: zendlval->type = IS_DOUBLE;
1511: return T_DNUMBER;
1512: }
1513: }
1514:
1515: zendlval->type = IS_LONG;
1516: return T_LNUMBER;
1517: }
1518:
1519: <ST_IN_SCRIPTING>{HNUM} {
1520: char *hex = yytext + 2; /* Skip "0x" */
1521: int len = yyleng - 2;
1522:
1523: /* Skip any leading 0s */
1524: while (*hex == '0') {
1525: hex++;
1526: len--;
1527: }
1528:
1529: if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
1.1.1.2 ! misho 1530: if (len == 0) {
! 1531: zendlval->value.lval = 0;
! 1532: } else {
! 1533: zendlval->value.lval = strtol(hex, NULL, 16);
! 1534: }
1.1 misho 1535: zendlval->type = IS_LONG;
1536: return T_LNUMBER;
1537: } else {
1538: zendlval->value.dval = zend_hex_strtod(hex, NULL);
1539: zendlval->type = IS_DOUBLE;
1540: return T_DNUMBER;
1541: }
1542: }
1543:
1544: <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1545: if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1546: zendlval->value.lval = strtol(yytext, NULL, 10);
1547: zendlval->type = IS_LONG;
1548: } else {
1549: zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1550: zendlval->value.str.len = yyleng;
1551: zendlval->type = IS_STRING;
1552: }
1553: return T_NUM_STRING;
1554: }
1555:
1.1.1.2 ! misho 1556: <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1.1 misho 1557: zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1558: zendlval->value.str.len = yyleng;
1559: zendlval->type = IS_STRING;
1560: return T_NUM_STRING;
1561: }
1562:
1563: <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1564: zendlval->value.dval = zend_strtod(yytext, NULL);
1565: zendlval->type = IS_DOUBLE;
1566: return T_DNUMBER;
1567: }
1568:
1569: <ST_IN_SCRIPTING>"__CLASS__" {
1.1.1.2 ! misho 1570: const char *class_name = NULL;
! 1571:
! 1572: if (CG(active_class_entry)
! 1573: && (ZEND_ACC_TRAIT ==
! 1574: (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
! 1575: /* We create a special __CLASS__ constant that is going to be resolved
! 1576: at run-time */
! 1577: zendlval->value.str.len = sizeof("__CLASS__")-1;
! 1578: zendlval->value.str.val = estrndup("__CLASS__", zendlval->value.str.len);
! 1579: zendlval->type = IS_CONSTANT;
! 1580: } else {
! 1581: if (CG(active_class_entry)) {
! 1582: class_name = CG(active_class_entry)->name;
! 1583: }
! 1584:
! 1585: if (!class_name) {
! 1586: class_name = "";
! 1587: }
! 1588:
! 1589: zendlval->value.str.len = strlen(class_name);
! 1590: zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
! 1591: zendlval->type = IS_STRING;
1.1 misho 1592: }
1.1.1.2 ! misho 1593: return T_CLASS_C;
! 1594: }
1.1 misho 1595:
1.1.1.2 ! misho 1596: <ST_IN_SCRIPTING>"__TRAIT__" {
! 1597: const char *trait_name = NULL;
! 1598:
! 1599: if (CG(active_class_entry)
! 1600: && (ZEND_ACC_TRAIT ==
! 1601: (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
! 1602: trait_name = CG(active_class_entry)->name;
! 1603: }
! 1604:
! 1605: if (!trait_name) {
! 1606: trait_name = "";
1.1 misho 1607: }
1.1.1.2 ! misho 1608:
! 1609: zendlval->value.str.len = strlen(trait_name);
! 1610: zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len);
1.1 misho 1611: zendlval->type = IS_STRING;
1.1.1.2 ! misho 1612:
! 1613: return T_TRAIT_C;
1.1 misho 1614: }
1615:
1616: <ST_IN_SCRIPTING>"__FUNCTION__" {
1.1.1.2 ! misho 1617: const char *func_name = NULL;
1.1 misho 1618:
1619: if (CG(active_op_array)) {
1620: func_name = CG(active_op_array)->function_name;
1621: }
1622:
1623: if (!func_name) {
1624: func_name = "";
1625: }
1626: zendlval->value.str.len = strlen(func_name);
1627: zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
1628: zendlval->type = IS_STRING;
1629: return T_FUNC_C;
1630: }
1631:
1632: <ST_IN_SCRIPTING>"__METHOD__" {
1.1.1.2 ! misho 1633: const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
! 1634: const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
1.1 misho 1635: size_t len = 0;
1636:
1637: if (class_name) {
1638: len += strlen(class_name) + 2;
1639: }
1640: if (func_name) {
1641: len += strlen(func_name);
1642: }
1643:
1.1.1.2 ! misho 1644: zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s",
1.1 misho 1645: class_name ? class_name : "",
1646: class_name && func_name ? "::" : "",
1647: func_name ? func_name : ""
1648: );
1649: zendlval->type = IS_STRING;
1650: return T_METHOD_C;
1651: }
1652:
1653: <ST_IN_SCRIPTING>"__LINE__" {
1654: zendlval->value.lval = CG(zend_lineno);
1655: zendlval->type = IS_LONG;
1656: return T_LINE;
1657: }
1658:
1659: <ST_IN_SCRIPTING>"__FILE__" {
1660: char *filename = zend_get_compiled_filename(TSRMLS_C);
1661:
1662: if (!filename) {
1663: filename = "";
1664: }
1665: zendlval->value.str.len = strlen(filename);
1666: zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
1667: zendlval->type = IS_STRING;
1668: return T_FILE;
1669: }
1670:
1671: <ST_IN_SCRIPTING>"__DIR__" {
1672: char *filename = zend_get_compiled_filename(TSRMLS_C);
1673: const size_t filename_len = strlen(filename);
1674: char *dirname;
1675:
1676: if (!filename) {
1677: filename = "";
1678: }
1679:
1680: dirname = estrndup(filename, filename_len);
1681: zend_dirname(dirname, filename_len);
1682:
1683: if (strcmp(dirname, ".") == 0) {
1684: dirname = erealloc(dirname, MAXPATHLEN);
1685: #if HAVE_GETCWD
1686: VCWD_GETCWD(dirname, MAXPATHLEN);
1687: #elif HAVE_GETWD
1688: VCWD_GETWD(dirname);
1689: #endif
1690: }
1691:
1692: zendlval->value.str.len = strlen(dirname);
1693: zendlval->value.str.val = dirname;
1694: zendlval->type = IS_STRING;
1695: return T_DIR;
1696: }
1697:
1698: <ST_IN_SCRIPTING>"__NAMESPACE__" {
1699: if (CG(current_namespace)) {
1700: *zendlval = *CG(current_namespace);
1701: zval_copy_ctor(zendlval);
1702: } else {
1703: ZVAL_EMPTY_STRING(zendlval);
1704: }
1705: return T_NS_C;
1706: }
1707:
1708: <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1.1.1.2 ! misho 1709: YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1.1 misho 1710:
1711: if (bracket != SCNG(yy_text)) {
1712: /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1713: YYCURSOR = bracket;
1714: goto inline_html;
1715: }
1716:
1717: HANDLE_NEWLINES(yytext, yyleng);
1718: zendlval->value.str.val = yytext; /* no copying - intentional */
1719: zendlval->value.str.len = yyleng;
1720: zendlval->type = IS_STRING;
1721: BEGIN(ST_IN_SCRIPTING);
1722: return T_OPEN_TAG;
1723: }
1724:
1725:
1726: <INITIAL>"<%=" {
1727: if (CG(asp_tags)) {
1728: zendlval->value.str.val = yytext; /* no copying - intentional */
1729: zendlval->value.str.len = yyleng;
1730: zendlval->type = IS_STRING;
1731: BEGIN(ST_IN_SCRIPTING);
1732: return T_OPEN_TAG_WITH_ECHO;
1733: } else {
1734: goto inline_char_handler;
1735: }
1736: }
1737:
1738:
1739: <INITIAL>"<?=" {
1.1.1.2 ! misho 1740: zendlval->value.str.val = yytext; /* no copying - intentional */
! 1741: zendlval->value.str.len = yyleng;
! 1742: zendlval->type = IS_STRING;
! 1743: BEGIN(ST_IN_SCRIPTING);
! 1744: return T_OPEN_TAG_WITH_ECHO;
1.1 misho 1745: }
1746:
1747:
1748: <INITIAL>"<%" {
1749: if (CG(asp_tags)) {
1750: zendlval->value.str.val = yytext; /* no copying - intentional */
1751: zendlval->value.str.len = yyleng;
1752: zendlval->type = IS_STRING;
1753: BEGIN(ST_IN_SCRIPTING);
1754: return T_OPEN_TAG;
1755: } else {
1756: goto inline_char_handler;
1757: }
1758: }
1759:
1760:
1761: <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1762: zendlval->value.str.val = yytext; /* no copying - intentional */
1763: zendlval->value.str.len = yyleng;
1764: zendlval->type = IS_STRING;
1765: HANDLE_NEWLINE(yytext[yyleng-1]);
1766: BEGIN(ST_IN_SCRIPTING);
1767: return T_OPEN_TAG;
1768: }
1769:
1770:
1771: <INITIAL>"<?" {
1772: if (CG(short_tags)) {
1773: zendlval->value.str.val = yytext; /* no copying - intentional */
1774: zendlval->value.str.len = yyleng;
1775: zendlval->type = IS_STRING;
1776: BEGIN(ST_IN_SCRIPTING);
1777: return T_OPEN_TAG;
1778: } else {
1779: goto inline_char_handler;
1780: }
1781: }
1782:
1783: <INITIAL>{ANY_CHAR} {
1784: if (YYCURSOR > YYLIMIT) {
1785: return 0;
1786: }
1787:
1788: inline_char_handler:
1789:
1790: while (1) {
1791: YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1792:
1793: YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1794:
1795: if (YYCURSOR < YYLIMIT) {
1796: switch (*YYCURSOR) {
1797: case '?':
1.1.1.2 ! misho 1798: if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1.1 misho 1799: break;
1800: }
1801: continue;
1802: case '%':
1803: if (CG(asp_tags)) {
1804: break;
1805: }
1806: continue;
1807: case 's':
1808: case 'S':
1809: /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1810: * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1811: YYCURSOR--;
1812: yymore();
1813: default:
1814: continue;
1815: }
1816:
1817: YYCURSOR--;
1818: }
1819:
1820: break;
1821: }
1822:
1823: inline_html:
1824: yyleng = YYCURSOR - SCNG(yy_text);
1825:
1826: if (SCNG(output_filter)) {
1827: int readsize;
1828: size_t sz = 0;
1829: readsize = SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1830: zendlval->value.str.len = sz;
1831: if (readsize < yyleng) {
1832: yyless(readsize);
1833: }
1834: } else {
1835: zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
1836: zendlval->value.str.len = yyleng;
1837: }
1838: zendlval->type = IS_STRING;
1839: HANDLE_NEWLINES(yytext, yyleng);
1840: return T_INLINE_HTML;
1841: }
1842:
1843:
1844: /* Make sure a label character follows "->", otherwise there is no property
1845: * and "->" will be taken literally
1846: */
1847: <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1848: yyless(yyleng - 3);
1849: yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1850: zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1851: zendlval->type = IS_STRING;
1852: return T_VARIABLE;
1853: }
1854:
1855: /* A [ always designates a variable offset, regardless of what follows
1856: */
1857: <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1858: yyless(yyleng - 1);
1859: yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1860: zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1861: zendlval->type = IS_STRING;
1862: return T_VARIABLE;
1863: }
1864:
1865: <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1866: zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1867: zendlval->type = IS_STRING;
1868: return T_VARIABLE;
1869: }
1870:
1871: <ST_VAR_OFFSET>"]" {
1872: yy_pop_state(TSRMLS_C);
1873: return ']';
1874: }
1875:
1876: <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1877: /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1878: return yytext[0];
1879: }
1880:
1881: <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1882: /* Invalid rule to return a more explicit parse error with proper line number */
1883: yyless(0);
1884: yy_pop_state(TSRMLS_C);
1885: return T_ENCAPSED_AND_WHITESPACE;
1886: }
1887:
1888: <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1889: zend_copy_value(zendlval, yytext, yyleng);
1890: zendlval->type = IS_STRING;
1891: return T_STRING;
1892: }
1893:
1894:
1895: <ST_IN_SCRIPTING>"#"|"//" {
1896: while (YYCURSOR < YYLIMIT) {
1897: switch (*YYCURSOR++) {
1898: case '\r':
1899: if (*YYCURSOR == '\n') {
1900: YYCURSOR++;
1901: }
1902: /* fall through */
1903: case '\n':
1904: CG(zend_lineno)++;
1905: break;
1906: case '%':
1907: if (!CG(asp_tags)) {
1908: continue;
1909: }
1910: /* fall through */
1911: case '?':
1912: if (*YYCURSOR == '>') {
1913: YYCURSOR--;
1914: break;
1915: }
1916: /* fall through */
1917: default:
1918: continue;
1919: }
1920:
1921: break;
1922: }
1923:
1924: yyleng = YYCURSOR - SCNG(yy_text);
1925:
1926: return T_COMMENT;
1927: }
1928:
1929: <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1930: int doc_com;
1931:
1932: if (yyleng > 2) {
1933: doc_com = 1;
1934: RESET_DOC_COMMENT();
1935: } else {
1936: doc_com = 0;
1937: }
1938:
1939: while (YYCURSOR < YYLIMIT) {
1940: if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1941: break;
1942: }
1943: }
1944:
1945: if (YYCURSOR < YYLIMIT) {
1946: YYCURSOR++;
1947: } else {
1948: zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1949: }
1950:
1951: yyleng = YYCURSOR - SCNG(yy_text);
1952: HANDLE_NEWLINES(yytext, yyleng);
1953:
1954: if (doc_com) {
1955: CG(doc_comment) = estrndup(yytext, yyleng);
1956: CG(doc_comment_len) = yyleng;
1957: return T_DOC_COMMENT;
1958: }
1959:
1960: return T_COMMENT;
1961: }
1962:
1963: <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1964: zendlval->value.str.val = yytext; /* no copying - intentional */
1965: zendlval->value.str.len = yyleng;
1966: zendlval->type = IS_STRING;
1967: BEGIN(INITIAL);
1968: return T_CLOSE_TAG; /* implicit ';' at php-end tag */
1969: }
1970:
1971:
1972: <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1973: if (CG(asp_tags)) {
1974: BEGIN(INITIAL);
1975: zendlval->value.str.len = yyleng;
1976: zendlval->type = IS_STRING;
1977: zendlval->value.str.val = yytext; /* no copying - intentional */
1978: return T_CLOSE_TAG; /* implicit ';' at php-end tag */
1979: } else {
1980: yyless(1);
1981: return yytext[0];
1982: }
1983: }
1984:
1985:
1986: <ST_IN_SCRIPTING>b?['] {
1987: register char *s, *t;
1988: char *end;
1989: int bprefix = (yytext[0] != '\'') ? 1 : 0;
1990:
1991: while (1) {
1992: if (YYCURSOR < YYLIMIT) {
1993: if (*YYCURSOR == '\'') {
1994: YYCURSOR++;
1995: yyleng = YYCURSOR - SCNG(yy_text);
1996:
1997: break;
1998: } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1999: YYCURSOR++;
2000: }
2001: } else {
2002: yyleng = YYLIMIT - SCNG(yy_text);
2003:
2004: /* Unclosed single quotes; treat similar to double quotes, but without a separate token
2005: * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
2006: * rule, which continued in ST_IN_SCRIPTING state after the quote */
2007: return T_ENCAPSED_AND_WHITESPACE;
2008: }
2009: }
2010:
2011: zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
2012: zendlval->value.str.len = yyleng-bprefix-2;
2013: zendlval->type = IS_STRING;
2014:
2015: /* convert escape sequences */
2016: s = t = zendlval->value.str.val;
2017: end = s+zendlval->value.str.len;
2018: while (s<end) {
2019: if (*s=='\\') {
2020: s++;
2021:
2022: switch(*s) {
2023: case '\\':
2024: case '\'':
2025: *t++ = *s;
2026: zendlval->value.str.len--;
2027: break;
2028: default:
2029: *t++ = '\\';
2030: *t++ = *s;
2031: break;
2032: }
2033: } else {
2034: *t++ = *s;
2035: }
2036:
2037: if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2038: CG(zend_lineno)++;
2039: }
2040: s++;
2041: }
2042: *t = 0;
2043:
2044: if (SCNG(output_filter)) {
2045: size_t sz = 0;
2046: s = zendlval->value.str.val;
2047: SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
2048: zendlval->value.str.len = sz;
2049: efree(s);
2050: }
2051: return T_CONSTANT_ENCAPSED_STRING;
2052: }
2053:
2054:
2055: <ST_IN_SCRIPTING>b?["] {
2056: int bprefix = (yytext[0] != '"') ? 1 : 0;
2057:
2058: while (YYCURSOR < YYLIMIT) {
2059: switch (*YYCURSOR++) {
2060: case '"':
2061: yyleng = YYCURSOR - SCNG(yy_text);
2062: zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
2063: return T_CONSTANT_ENCAPSED_STRING;
2064: case '$':
2065: if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2066: break;
2067: }
2068: continue;
2069: case '{':
2070: if (*YYCURSOR == '$') {
2071: break;
2072: }
2073: continue;
2074: case '\\':
2075: if (YYCURSOR < YYLIMIT) {
2076: YYCURSOR++;
2077: }
2078: /* fall through */
2079: default:
2080: continue;
2081: }
2082:
2083: YYCURSOR--;
2084: break;
2085: }
2086:
2087: /* Remember how much was scanned to save rescanning */
2088: SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2089:
2090: YYCURSOR = SCNG(yy_text) + yyleng;
2091:
2092: BEGIN(ST_DOUBLE_QUOTES);
2093: return '"';
2094: }
2095:
2096:
2097: <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2098: char *s;
2099: int bprefix = (yytext[0] != '<') ? 1 : 0;
2100:
2101: /* save old heredoc label */
2102: Z_STRVAL_P(zendlval) = CG(heredoc);
2103: Z_STRLEN_P(zendlval) = CG(heredoc_len);
2104:
2105: CG(zend_lineno)++;
2106: CG(heredoc_len) = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2107: s = yytext+bprefix+3;
2108: while ((*s == ' ') || (*s == '\t')) {
2109: s++;
2110: CG(heredoc_len)--;
2111: }
2112:
2113: if (*s == '\'') {
2114: s++;
2115: CG(heredoc_len) -= 2;
2116:
2117: BEGIN(ST_NOWDOC);
2118: } else {
2119: if (*s == '"') {
2120: s++;
2121: CG(heredoc_len) -= 2;
2122: }
2123:
2124: BEGIN(ST_HEREDOC);
2125: }
2126:
2127: CG(heredoc) = estrndup(s, CG(heredoc_len));
2128:
2129: /* Check for ending label on the next line */
2130: if (CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, CG(heredoc_len))) {
2131: YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2132:
2133: if (*end == ';') {
2134: end++;
2135: }
2136:
2137: if (*end == '\n' || *end == '\r') {
2138: BEGIN(ST_END_HEREDOC);
2139: }
2140: }
2141:
2142: return T_START_HEREDOC;
2143: }
2144:
2145:
2146: <ST_IN_SCRIPTING>[`] {
2147: BEGIN(ST_BACKQUOTE);
2148: return '`';
2149: }
2150:
2151:
2152: <ST_END_HEREDOC>{ANY_CHAR} {
2153: YYCURSOR += CG(heredoc_len) - 1;
2154: yyleng = CG(heredoc_len);
2155:
2156: Z_STRVAL_P(zendlval) = CG(heredoc);
2157: Z_STRLEN_P(zendlval) = CG(heredoc_len);
2158: CG(heredoc) = NULL;
2159: CG(heredoc_len) = 0;
2160: BEGIN(ST_IN_SCRIPTING);
2161: return T_END_HEREDOC;
2162: }
2163:
2164:
2165: <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2166: zendlval->value.lval = (long) '{';
2167: yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2168: yyless(1);
2169: return T_CURLY_OPEN;
2170: }
2171:
2172:
2173: <ST_DOUBLE_QUOTES>["] {
2174: BEGIN(ST_IN_SCRIPTING);
2175: return '"';
2176: }
2177:
2178: <ST_BACKQUOTE>[`] {
2179: BEGIN(ST_IN_SCRIPTING);
2180: return '`';
2181: }
2182:
2183:
2184: <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2185: if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2186: YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2187: SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2188:
2189: goto double_quotes_scan_done;
2190: }
2191:
2192: if (YYCURSOR > YYLIMIT) {
2193: return 0;
2194: }
2195: if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2196: YYCURSOR++;
2197: }
2198:
2199: while (YYCURSOR < YYLIMIT) {
2200: switch (*YYCURSOR++) {
2201: case '"':
2202: break;
2203: case '$':
2204: if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2205: break;
2206: }
2207: continue;
2208: case '{':
2209: if (*YYCURSOR == '$') {
2210: break;
2211: }
2212: continue;
2213: case '\\':
2214: if (YYCURSOR < YYLIMIT) {
2215: YYCURSOR++;
2216: }
2217: /* fall through */
2218: default:
2219: continue;
2220: }
2221:
2222: YYCURSOR--;
2223: break;
2224: }
2225:
2226: double_quotes_scan_done:
2227: yyleng = YYCURSOR - SCNG(yy_text);
2228:
2229: zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2230: return T_ENCAPSED_AND_WHITESPACE;
2231: }
2232:
2233:
2234: <ST_BACKQUOTE>{ANY_CHAR} {
2235: if (YYCURSOR > YYLIMIT) {
2236: return 0;
2237: }
2238: if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2239: YYCURSOR++;
2240: }
2241:
2242: while (YYCURSOR < YYLIMIT) {
2243: switch (*YYCURSOR++) {
2244: case '`':
2245: break;
2246: case '$':
2247: if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2248: break;
2249: }
2250: continue;
2251: case '{':
2252: if (*YYCURSOR == '$') {
2253: break;
2254: }
2255: continue;
2256: case '\\':
2257: if (YYCURSOR < YYLIMIT) {
2258: YYCURSOR++;
2259: }
2260: /* fall through */
2261: default:
2262: continue;
2263: }
2264:
2265: YYCURSOR--;
2266: break;
2267: }
2268:
2269: yyleng = YYCURSOR - SCNG(yy_text);
2270:
2271: zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2272: return T_ENCAPSED_AND_WHITESPACE;
2273: }
2274:
2275:
2276: <ST_HEREDOC>{ANY_CHAR} {
2277: int newline = 0;
2278:
2279: if (YYCURSOR > YYLIMIT) {
2280: return 0;
2281: }
2282:
2283: YYCURSOR--;
2284:
2285: while (YYCURSOR < YYLIMIT) {
2286: switch (*YYCURSOR++) {
2287: case '\r':
2288: if (*YYCURSOR == '\n') {
2289: YYCURSOR++;
2290: }
2291: /* fall through */
2292: case '\n':
2293: /* Check for ending label on the next line */
2294: if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
2295: YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2296:
2297: if (*end == ';') {
2298: end++;
2299: }
2300:
2301: if (*end == '\n' || *end == '\r') {
2302: /* newline before label will be subtracted from returned text, but
2303: * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2304: if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2305: newline = 2; /* Windows newline */
2306: } else {
2307: newline = 1;
2308: }
2309:
2310: CG(increment_lineno) = 1; /* For newline before label */
2311: BEGIN(ST_END_HEREDOC);
2312:
2313: goto heredoc_scan_done;
2314: }
2315: }
2316: continue;
2317: case '$':
2318: if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2319: break;
2320: }
2321: continue;
2322: case '{':
2323: if (*YYCURSOR == '$') {
2324: break;
2325: }
2326: continue;
2327: case '\\':
2328: if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2329: YYCURSOR++;
2330: }
2331: /* fall through */
2332: default:
2333: continue;
2334: }
2335:
2336: YYCURSOR--;
2337: break;
2338: }
2339:
2340: heredoc_scan_done:
2341: yyleng = YYCURSOR - SCNG(yy_text);
2342:
2343: zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2344: return T_ENCAPSED_AND_WHITESPACE;
2345: }
2346:
2347:
2348: <ST_NOWDOC>{ANY_CHAR} {
2349: int newline = 0;
2350:
2351: if (YYCURSOR > YYLIMIT) {
2352: return 0;
2353: }
2354:
2355: YYCURSOR--;
2356:
2357: while (YYCURSOR < YYLIMIT) {
2358: switch (*YYCURSOR++) {
2359: case '\r':
2360: if (*YYCURSOR == '\n') {
2361: YYCURSOR++;
2362: }
2363: /* fall through */
2364: case '\n':
2365: /* Check for ending label on the next line */
2366: if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
2367: YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2368:
2369: if (*end == ';') {
2370: end++;
2371: }
2372:
2373: if (*end == '\n' || *end == '\r') {
2374: /* newline before label will be subtracted from returned text, but
2375: * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2376: if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2377: newline = 2; /* Windows newline */
2378: } else {
2379: newline = 1;
2380: }
2381:
2382: CG(increment_lineno) = 1; /* For newline before label */
2383: BEGIN(ST_END_HEREDOC);
2384:
2385: goto nowdoc_scan_done;
2386: }
2387: }
2388: /* fall through */
2389: default:
2390: continue;
2391: }
2392: }
2393:
2394: nowdoc_scan_done:
2395: yyleng = YYCURSOR - SCNG(yy_text);
2396:
2397: zend_copy_value(zendlval, yytext, yyleng - newline);
2398: zendlval->type = IS_STRING;
2399: HANDLE_NEWLINES(yytext, yyleng - newline);
2400: return T_ENCAPSED_AND_WHITESPACE;
2401: }
2402:
2403:
2404: <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2405: if (YYCURSOR > YYLIMIT) {
2406: return 0;
2407: }
2408:
2409: zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2410: goto restart;
2411: }
2412:
2413: */
2414: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>