Annotation of embedaddon/php/Zend/zend_language_scanner.l, revision 1.1.1.3
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | Zend Engine |
4: +----------------------------------------------------------------------+
1.1.1.3 ! misho 5: | Copyright (c) 1998-2013 Zend Technologies Ltd. (http://www.zend.com) |
1.1 misho 6: +----------------------------------------------------------------------+
7: | This source file is subject to version 2.00 of the Zend license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.zend.com/license/2_00.txt. |
11: | If you did not receive a copy of the Zend license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@zend.com so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Authors: Marcus Boerger <helly@php.net> |
16: | Nuno Lopes <nlopess@php.net> |
17: | Scott MacVicar <scottmac@php.net> |
18: | Flex version authors: |
19: | Andi Gutmans <andi@zend.com> |
20: | Zeev Suraski <zeev@zend.com> |
21: +----------------------------------------------------------------------+
22: */
23:
1.1.1.2 misho 24: /* $Id$ */
1.1 misho 25:
26: #if 0
27: # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28: #else
29: # define YYDEBUG(s, c)
30: #endif
31:
32: #include "zend_language_scanner_defs.h"
33:
34: #include <errno.h>
35: #include "zend.h"
1.1.1.3 ! misho 36: #ifdef PHP_WIN32
! 37: # include <Winuser.h>
! 38: #endif
1.1 misho 39: #include "zend_alloc.h"
40: #include <zend_language_parser.h>
41: #include "zend_compile.h"
42: #include "zend_language_scanner.h"
43: #include "zend_highlight.h"
44: #include "zend_constants.h"
45: #include "zend_variables.h"
46: #include "zend_operators.h"
47: #include "zend_API.h"
48: #include "zend_strtod.h"
49: #include "zend_exceptions.h"
50: #include "tsrm_virtual_cwd.h"
51: #include "tsrm_config_common.h"
52:
53: #define YYCTYPE unsigned char
54: #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55: #define YYCURSOR SCNG(yy_cursor)
56: #define YYLIMIT SCNG(yy_limit)
57: #define YYMARKER SCNG(yy_marker)
58:
59: #define YYGETCONDITION() SCNG(yy_state)
60: #define YYSETCONDITION(s) SCNG(yy_state) = s
61:
62: #define STATE(name) yyc##name
63:
64: /* emulate flex constructs */
65: #define BEGIN(state) YYSETCONDITION(STATE(state))
66: #define YYSTATE YYGETCONDITION()
67: #define yytext ((char*)SCNG(yy_text))
68: #define yyleng SCNG(yy_leng)
69: #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
70: yyleng = (unsigned int)x; } while(0)
71: #define yymore() goto yymore_restart
72:
73: /* perform sanity check. If this message is triggered you should
74: increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75: /*!max:re2c */
76: #if ZEND_MMAP_AHEAD < YYMAXFILL
77: # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78: #endif
79:
80: #ifdef HAVE_STDARG_H
81: # include <stdarg.h>
82: #endif
83:
84: #ifdef HAVE_UNISTD_H
85: # include <unistd.h>
86: #endif
87:
88: /* Globals Macros */
89: #define SCNG LANG_SCNG
90: #ifdef ZTS
91: ZEND_API ts_rsrc_id language_scanner_globals_id;
92: #else
93: ZEND_API zend_php_scanner_globals language_scanner_globals;
94: #endif
95:
96: #define HANDLE_NEWLINES(s, l) \
97: do { \
98: char *p = (s), *boundary = p+(l); \
99: \
100: while (p<boundary) { \
101: if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
102: CG(zend_lineno)++; \
103: } \
104: p++; \
105: } \
106: } while (0)
107:
108: #define HANDLE_NEWLINE(c) \
109: { \
110: if (c == '\n' || c == '\r') { \
111: CG(zend_lineno)++; \
112: } \
113: }
114:
115: /* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
116: #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
117: #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() CG(doc_comment_len)
118:
119: #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120:
121: #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
122: #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123:
124: BEGIN_EXTERN_C()
125:
1.1.1.2 misho 126: static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127: {
128: const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129: assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
130: return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131: }
132:
133: static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134: {
135: return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136: }
137:
138: static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139: {
140: return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141: LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142: }
143:
144: static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145: {
146: const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147: assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
148: return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149: internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150: }
151:
152:
1.1 misho 153: static void _yy_push_state(int new_state TSRMLS_DC)
154: {
155: zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
156: YYSETCONDITION(new_state);
157: }
158:
159: #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160:
161: static void yy_pop_state(TSRMLS_D)
162: {
163: int *stack_state;
164: zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
165: YYSETCONDITION(*stack_state);
166: zend_stack_del_top(&SCNG(state_stack));
167: }
168:
169: static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
170: {
171: YYCURSOR = (YYCTYPE*)str;
172: YYLIMIT = YYCURSOR + len;
173: if (!SCNG(yy_start)) {
174: SCNG(yy_start) = YYCURSOR;
175: }
176: }
177:
178: void startup_scanner(TSRMLS_D)
179: {
1.1.1.2 misho 180: CG(parse_error) = 0;
1.1 misho 181: CG(heredoc) = NULL;
182: CG(heredoc_len) = 0;
183: CG(doc_comment) = NULL;
184: CG(doc_comment_len) = 0;
185: zend_stack_init(&SCNG(state_stack));
186: }
187:
188: void shutdown_scanner(TSRMLS_D)
189: {
190: if (CG(heredoc)) {
191: efree(CG(heredoc));
192: CG(heredoc_len)=0;
193: }
1.1.1.2 misho 194: CG(parse_error) = 0;
1.1 misho 195: zend_stack_destroy(&SCNG(state_stack));
196: RESET_DOC_COMMENT();
197: }
198:
199: ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
200: {
201: lex_state->yy_leng = SCNG(yy_leng);
202: lex_state->yy_start = SCNG(yy_start);
203: lex_state->yy_text = SCNG(yy_text);
204: lex_state->yy_cursor = SCNG(yy_cursor);
205: lex_state->yy_marker = SCNG(yy_marker);
206: lex_state->yy_limit = SCNG(yy_limit);
207:
208: lex_state->state_stack = SCNG(state_stack);
209: zend_stack_init(&SCNG(state_stack));
210:
211: lex_state->in = SCNG(yy_in);
212: lex_state->yy_state = YYSTATE;
213: lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
214: lex_state->lineno = CG(zend_lineno);
215:
216: lex_state->script_org = SCNG(script_org);
217: lex_state->script_org_size = SCNG(script_org_size);
218: lex_state->script_filtered = SCNG(script_filtered);
219: lex_state->script_filtered_size = SCNG(script_filtered_size);
220: lex_state->input_filter = SCNG(input_filter);
221: lex_state->output_filter = SCNG(output_filter);
222: lex_state->script_encoding = SCNG(script_encoding);
223: }
224:
225: ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
226: {
227: SCNG(yy_leng) = lex_state->yy_leng;
228: SCNG(yy_start) = lex_state->yy_start;
229: SCNG(yy_text) = lex_state->yy_text;
230: SCNG(yy_cursor) = lex_state->yy_cursor;
231: SCNG(yy_marker) = lex_state->yy_marker;
232: SCNG(yy_limit) = lex_state->yy_limit;
233:
234: zend_stack_destroy(&SCNG(state_stack));
235: SCNG(state_stack) = lex_state->state_stack;
236:
237: SCNG(yy_in) = lex_state->in;
238: YYSETCONDITION(lex_state->yy_state);
239: CG(zend_lineno) = lex_state->lineno;
240: zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
1.1.1.2 misho 241:
1.1 misho 242: if (SCNG(script_filtered)) {
243: efree(SCNG(script_filtered));
244: SCNG(script_filtered) = NULL;
245: }
246: SCNG(script_org) = lex_state->script_org;
247: SCNG(script_org_size) = lex_state->script_org_size;
248: SCNG(script_filtered) = lex_state->script_filtered;
249: SCNG(script_filtered_size) = lex_state->script_filtered_size;
250: SCNG(input_filter) = lex_state->input_filter;
251: SCNG(output_filter) = lex_state->output_filter;
252: SCNG(script_encoding) = lex_state->script_encoding;
253:
254: if (CG(heredoc)) {
255: efree(CG(heredoc));
256: CG(heredoc) = NULL;
257: CG(heredoc_len) = 0;
258: }
1.1.1.3 ! misho 259: RESET_DOC_COMMENT();
1.1 misho 260: }
261:
262: ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
263: {
264: zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
265: /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
266: file_handle->opened_path = NULL;
267: if (file_handle->free_filename) {
268: file_handle->filename = NULL;
269: }
270: }
271:
1.1.1.2 misho 272: #define BOM_UTF32_BE "\x00\x00\xfe\xff"
273: #define BOM_UTF32_LE "\xff\xfe\x00\x00"
274: #define BOM_UTF16_BE "\xfe\xff"
275: #define BOM_UTF16_LE "\xff\xfe"
276: #define BOM_UTF8 "\xef\xbb\xbf"
277:
278: static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
279: {
280: const unsigned char *p;
281: int wchar_size = 2;
282: int le = 0;
283:
284: /* utf-16 or utf-32? */
285: p = script;
286: while ((p-script) < script_size) {
287: p = memchr(p, 0, script_size-(p-script)-2);
288: if (!p) {
289: break;
290: }
291: if (*(p+1) == '\0' && *(p+2) == '\0') {
292: wchar_size = 4;
293: break;
294: }
295:
296: /* searching for UTF-32 specific byte orders, so this will do */
297: p += 4;
298: }
299:
300: /* BE or LE? */
301: p = script;
302: while ((p-script) < script_size) {
303: if (*p == '\0' && *(p+wchar_size-1) != '\0') {
304: /* BE */
305: le = 0;
306: break;
307: } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
308: /* LE* */
309: le = 1;
310: break;
311: }
312: p += wchar_size;
313: }
314:
315: if (wchar_size == 2) {
316: return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
317: } else {
318: return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
319: }
320:
321: return NULL;
322: }
323:
324: static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
325: {
326: const zend_encoding *script_encoding = NULL;
327: int bom_size;
328: unsigned char *pos1, *pos2;
329:
330: if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
331: return NULL;
332: }
333:
334: /* check out BOM */
335: if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
336: script_encoding = zend_multibyte_encoding_utf32be;
337: bom_size = sizeof(BOM_UTF32_BE)-1;
338: } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
339: script_encoding = zend_multibyte_encoding_utf32le;
340: bom_size = sizeof(BOM_UTF32_LE)-1;
341: } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
342: script_encoding = zend_multibyte_encoding_utf16be;
343: bom_size = sizeof(BOM_UTF16_BE)-1;
344: } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
345: script_encoding = zend_multibyte_encoding_utf16le;
346: bom_size = sizeof(BOM_UTF16_LE)-1;
347: } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
348: script_encoding = zend_multibyte_encoding_utf8;
349: bom_size = sizeof(BOM_UTF8)-1;
350: }
351:
352: if (script_encoding) {
353: /* remove BOM */
354: LANG_SCNG(script_org) += bom_size;
355: LANG_SCNG(script_org_size) -= bom_size;
356:
357: return script_encoding;
358: }
359:
360: /* script contains NULL bytes -> auto-detection */
361: if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
362: /* check if the NULL byte is after the __HALT_COMPILER(); */
363: pos2 = LANG_SCNG(script_org);
364:
365: while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
366: pos2 = memchr(pos2, '_', pos1 - pos2);
367: if (!pos2) break;
368: pos2++;
369: if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
370: pos2 += sizeof("_HALT_COMPILER")-1;
371: while (*pos2 == ' ' ||
372: *pos2 == '\t' ||
373: *pos2 == '\r' ||
374: *pos2 == '\n') {
375: pos2++;
376: }
377: if (*pos2 == '(') {
378: pos2++;
379: while (*pos2 == ' ' ||
380: *pos2 == '\t' ||
381: *pos2 == '\r' ||
382: *pos2 == '\n') {
383: pos2++;
384: }
385: if (*pos2 == ')') {
386: pos2++;
387: while (*pos2 == ' ' ||
388: *pos2 == '\t' ||
389: *pos2 == '\r' ||
390: *pos2 == '\n') {
391: pos2++;
392: }
393: if (*pos2 == ';') {
394: return NULL;
395: }
396: }
397: }
398: }
399: }
400: /* make best effort if BOM is missing */
401: return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
402: }
403:
404: return NULL;
405: }
406:
407: static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
408: {
409: const zend_encoding *script_encoding;
410:
411: if (CG(detect_unicode)) {
412: /* check out bom(byte order mark) and see if containing wchars */
413: script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
414: if (script_encoding != NULL) {
415: /* bom or wchar detection is prior to 'script_encoding' option */
416: return script_encoding;
417: }
418: }
419:
420: /* if no script_encoding specified, just leave alone */
421: if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
422: return NULL;
423: }
424:
425: /* if multiple encodings specified, detect automagically */
426: if (CG(script_encoding_list_size) > 1) {
427: return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
428: }
429:
430: return CG(script_encoding_list)[0];
431: }
432:
433: ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
434: {
435: const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
436: const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
437:
438: if (!script_encoding) {
439: return FAILURE;
440: }
441:
442: /* judge input/output filter */
443: LANG_SCNG(script_encoding) = script_encoding;
444: LANG_SCNG(input_filter) = NULL;
445: LANG_SCNG(output_filter) = NULL;
446:
447: if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
448: if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
449: /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
450: LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
451: LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
452: } else {
453: LANG_SCNG(input_filter) = NULL;
454: LANG_SCNG(output_filter) = NULL;
455: }
456: return SUCCESS;
457: }
458:
459: if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
460: LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
461: LANG_SCNG(output_filter) = NULL;
462: } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
463: LANG_SCNG(input_filter) = NULL;
464: LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
465: } else {
466: /* both script and internal encodings are incompatible w/ flex */
467: LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
468: LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
469: }
470:
471: return 0;
472: }
1.1 misho 473:
474: ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
475: {
1.1.1.2 misho 476: const char *file_path = NULL;
477: char *buf;
1.1 misho 478: size_t size, offset = 0;
1.1.1.2 misho 479:
1.1 misho 480: /* The shebang line was read, get the current position to obtain the buffer start */
481: if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
482: if ((offset = ftell(file_handle->handle.fp)) == -1) {
483: offset = 0;
484: }
485: }
486:
487: if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
488: return FAILURE;
489: }
490:
491: zend_llist_add_element(&CG(open_files), file_handle);
492: if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
493: zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
494: size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
495: fh->handle.stream.handle = (void*)(((char*)fh) + diff);
496: file_handle->handle.stream.handle = fh->handle.stream.handle;
497: }
498:
499: /* Reset the scanner for scanning the new file */
500: SCNG(yy_in) = file_handle;
501: SCNG(yy_start) = NULL;
502:
503: if (size != -1) {
1.1.1.2 misho 504: if (CG(multibyte)) {
505: SCNG(script_org) = (unsigned char*)buf;
506: SCNG(script_org_size) = size;
507: SCNG(script_filtered) = NULL;
1.1 misho 508:
1.1.1.2 misho 509: zend_multibyte_set_filter(NULL TSRMLS_CC);
1.1 misho 510:
1.1.1.2 misho 511: if (SCNG(input_filter)) {
512: if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
513: zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
514: "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
515: }
516: buf = (char*)SCNG(script_filtered);
517: size = SCNG(script_filtered_size);
1.1 misho 518: }
519: }
1.1.1.2 misho 520: SCNG(yy_start) = (unsigned char *)buf - offset;
1.1 misho 521: yy_scan_buffer(buf, size TSRMLS_CC);
522: } else {
523: zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
524: }
525:
526: BEGIN(INITIAL);
527:
528: if (file_handle->opened_path) {
529: file_path = file_handle->opened_path;
530: } else {
531: file_path = file_handle->filename;
532: }
533:
534: zend_set_compiled_filename(file_path TSRMLS_CC);
535:
536: if (CG(start_lineno)) {
537: CG(zend_lineno) = CG(start_lineno);
538: CG(start_lineno) = 0;
539: } else {
540: CG(zend_lineno) = 1;
541: }
542:
1.1.1.3 ! misho 543: RESET_DOC_COMMENT();
1.1 misho 544: CG(increment_lineno) = 0;
545: return SUCCESS;
546: }
547: END_EXTERN_C()
548:
549:
550: ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
551: {
552: zend_lex_state original_lex_state;
553: zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
554: zend_op_array *original_active_op_array = CG(active_op_array);
555: zend_op_array *retval=NULL;
556: int compiler_result;
557: zend_bool compilation_successful=0;
558: znode retval_znode;
559: zend_bool original_in_compilation = CG(in_compilation);
560:
561: retval_znode.op_type = IS_CONST;
562: retval_znode.u.constant.type = IS_LONG;
563: retval_znode.u.constant.value.lval = 1;
564: Z_UNSET_ISREF(retval_znode.u.constant);
565: Z_SET_REFCOUNT(retval_znode.u.constant, 1);
566:
567: zend_save_lexical_state(&original_lex_state TSRMLS_CC);
568:
569: retval = op_array; /* success oriented */
570:
571: if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
572: if (type==ZEND_REQUIRE) {
573: zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
574: zend_bailout();
575: } else {
576: zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
577: }
578: compilation_successful=0;
579: } else {
580: init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
581: CG(in_compilation) = 1;
582: CG(active_op_array) = op_array;
1.1.1.3 ! misho 583: zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
1.1.1.2 misho 584: zend_init_compiler_context(TSRMLS_C);
1.1 misho 585: compiler_result = zendparse(TSRMLS_C);
586: zend_do_return(&retval_znode, 0 TSRMLS_CC);
587: CG(in_compilation) = original_in_compilation;
1.1.1.3 ! misho 588: if (compiler_result != 0) { /* parser error */
1.1 misho 589: zend_bailout();
590: }
591: compilation_successful=1;
592: }
593:
594: if (retval) {
595: CG(active_op_array) = original_active_op_array;
596: if (compilation_successful) {
597: pass_two(op_array TSRMLS_CC);
1.1.1.3 ! misho 598: zend_release_labels(0 TSRMLS_CC);
1.1 misho 599: } else {
600: efree(op_array);
601: retval = NULL;
602: }
603: }
604: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
605: return retval;
606: }
607:
608:
609: zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
610: {
611: zend_file_handle file_handle;
612: zval tmp;
613: zend_op_array *retval;
614: char *opened_path = NULL;
615:
616: if (filename->type != IS_STRING) {
617: tmp = *filename;
618: zval_copy_ctor(&tmp);
619: convert_to_string(&tmp);
620: filename = &tmp;
621: }
622: file_handle.filename = filename->value.str.val;
623: file_handle.free_filename = 0;
624: file_handle.type = ZEND_HANDLE_FILENAME;
625: file_handle.opened_path = NULL;
626: file_handle.handle.fp = NULL;
627:
628: retval = zend_compile_file(&file_handle, type TSRMLS_CC);
629: if (retval && file_handle.handle.stream.handle) {
630: int dummy = 1;
631:
632: if (!file_handle.opened_path) {
633: file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len);
634: }
635:
636: zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
637:
638: if (opened_path) {
639: efree(opened_path);
640: }
641: }
642: zend_destroy_file_handle(&file_handle TSRMLS_CC);
643:
644: if (filename==&tmp) {
645: zval_dtor(&tmp);
646: }
647: return retval;
648: }
649:
650: ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
651: {
1.1.1.2 misho 652: char *buf;
653: size_t size;
654:
1.1 misho 655: /* enforce two trailing NULLs for flex... */
1.1.1.2 misho 656: if (IS_INTERNED(str->value.str.val)) {
657: char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD);
658: memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD);
659: str->value.str.val = tmp;
660: } else {
661: str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD);
662: }
1.1 misho 663:
664: memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
665:
1.1.1.2 misho 666: SCNG(yy_in) = NULL;
1.1 misho 667: SCNG(yy_start) = NULL;
668:
1.1.1.2 misho 669: buf = str->value.str.val;
670: size = str->value.str.len;
671:
672: if (CG(multibyte)) {
673: SCNG(script_org) = (unsigned char*)buf;
674: SCNG(script_org_size) = size;
675: SCNG(script_filtered) = NULL;
1.1 misho 676:
1.1.1.2 misho 677: zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
1.1 misho 678:
1.1.1.2 misho 679: if (SCNG(input_filter)) {
680: if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
681: zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
682: "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
683: }
684: buf = (char*)SCNG(script_filtered);
685: size = SCNG(script_filtered_size);
686: }
1.1 misho 687: }
688:
1.1.1.2 misho 689: yy_scan_buffer(buf, size TSRMLS_CC);
1.1 misho 690:
691: zend_set_compiled_filename(filename TSRMLS_CC);
692: CG(zend_lineno) = 1;
693: CG(increment_lineno) = 0;
1.1.1.3 ! misho 694: RESET_DOC_COMMENT();
1.1 misho 695: return SUCCESS;
696: }
697:
698:
699: ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
700: {
701: size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
702: if (SCNG(input_filter)) {
1.1.1.2 misho 703: size_t original_offset = offset, length = 0;
704: do {
1.1 misho 705: unsigned char *p = NULL;
1.1.1.2 misho 706: if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
707: return (size_t)-1;
1.1 misho 708: }
709: efree(p);
710: if (length > original_offset) {
711: offset--;
712: } else if (length < original_offset) {
713: offset++;
714: }
715: } while (original_offset != length);
716: }
717: return offset;
718: }
719:
720:
721: zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
722: {
723: zend_lex_state original_lex_state;
724: zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
725: zend_op_array *original_active_op_array = CG(active_op_array);
726: zend_op_array *retval;
727: zval tmp;
728: int compiler_result;
729: zend_bool original_in_compilation = CG(in_compilation);
730:
731: if (source_string->value.str.len==0) {
732: efree(op_array);
733: return NULL;
734: }
735:
736: CG(in_compilation) = 1;
737:
738: tmp = *source_string;
739: zval_copy_ctor(&tmp);
740: convert_to_string(&tmp);
741: source_string = &tmp;
742:
743: zend_save_lexical_state(&original_lex_state TSRMLS_CC);
744: if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
745: efree(op_array);
746: retval = NULL;
747: } else {
748: zend_bool orig_interactive = CG(interactive);
749:
750: CG(interactive) = 0;
751: init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
752: CG(interactive) = orig_interactive;
753: CG(active_op_array) = op_array;
1.1.1.3 ! misho 754: zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
1.1.1.2 misho 755: zend_init_compiler_context(TSRMLS_C);
1.1 misho 756: BEGIN(ST_IN_SCRIPTING);
757: compiler_result = zendparse(TSRMLS_C);
758:
759: if (SCNG(script_filtered)) {
760: efree(SCNG(script_filtered));
761: SCNG(script_filtered) = NULL;
762: }
763:
1.1.1.3 ! misho 764: if (compiler_result != 0) {
1.1 misho 765: CG(active_op_array) = original_active_op_array;
766: CG(unclean_shutdown)=1;
767: destroy_op_array(op_array TSRMLS_CC);
768: efree(op_array);
769: retval = NULL;
770: } else {
771: zend_do_return(NULL, 0 TSRMLS_CC);
772: CG(active_op_array) = original_active_op_array;
773: pass_two(op_array TSRMLS_CC);
1.1.1.3 ! misho 774: zend_release_labels(0 TSRMLS_CC);
1.1 misho 775: retval = op_array;
776: }
777: }
778: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
779: zval_dtor(&tmp);
780: CG(in_compilation) = original_in_compilation;
781: return retval;
782: }
783:
784:
785: BEGIN_EXTERN_C()
786: int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
787: {
788: zend_lex_state original_lex_state;
789: zend_file_handle file_handle;
790:
791: file_handle.type = ZEND_HANDLE_FILENAME;
792: file_handle.filename = filename;
793: file_handle.free_filename = 0;
794: file_handle.opened_path = NULL;
795: zend_save_lexical_state(&original_lex_state TSRMLS_CC);
796: if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
797: zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
798: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
799: return FAILURE;
800: }
801: zend_highlight(syntax_highlighter_ini TSRMLS_CC);
802: if (SCNG(script_filtered)) {
803: efree(SCNG(script_filtered));
804: SCNG(script_filtered) = NULL;
805: }
806: zend_destroy_file_handle(&file_handle TSRMLS_CC);
807: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
808: return SUCCESS;
809: }
810:
811: int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
812: {
813: zend_lex_state original_lex_state;
814: zval tmp = *str;
815:
816: str = &tmp;
817: zval_copy_ctor(str);
818: zend_save_lexical_state(&original_lex_state TSRMLS_CC);
819: if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
820: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
821: return FAILURE;
822: }
823: BEGIN(INITIAL);
824: zend_highlight(syntax_highlighter_ini TSRMLS_CC);
825: if (SCNG(script_filtered)) {
826: efree(SCNG(script_filtered));
827: SCNG(script_filtered) = NULL;
828: }
829: zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
830: zval_dtor(str);
831: return SUCCESS;
832: }
833:
1.1.1.2 misho 834: ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
1.1 misho 835: {
1.1.1.2 misho 836: size_t length;
837: unsigned char *new_yy_start;
1.1 misho 838:
839: /* convert and set */
840: if (!SCNG(input_filter)) {
1.1.1.2 misho 841: if (SCNG(script_filtered)) {
842: efree(SCNG(script_filtered));
843: SCNG(script_filtered) = NULL;
1.1 misho 844: }
1.1.1.2 misho 845: SCNG(script_filtered_size) = 0;
846: length = SCNG(script_org_size);
847: new_yy_start = SCNG(script_org);
848: } else {
849: if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
850: zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
851: "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
852: }
853: SCNG(script_filtered) = new_yy_start;
854: SCNG(script_filtered_size) = length;
1.1 misho 855: }
856:
1.1.1.2 misho 857: SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
858: SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
859: SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
860: SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
1.1 misho 861:
1.1.1.2 misho 862: SCNG(yy_start) = new_yy_start;
1.1 misho 863: }
864:
865:
866: # define zend_copy_value(zendlval, yytext, yyleng) \
867: if (SCNG(output_filter)) { \
868: size_t sz = 0; \
869: SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
870: zendlval->value.str.len = sz; \
871: } else { \
872: zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
873: zendlval->value.str.len = yyleng; \
874: }
875:
876: static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
877: {
878: register char *s, *t;
879: char *end;
880:
881: ZVAL_STRINGL(zendlval, str, len, 1);
882:
883: /* convert escape sequences */
884: s = t = zendlval->value.str.val;
885: end = s+zendlval->value.str.len;
886: while (s<end) {
887: if (*s=='\\') {
888: s++;
889: if (s >= end) {
890: *t++ = '\\';
891: break;
892: }
893:
894: switch(*s) {
895: case 'n':
896: *t++ = '\n';
897: zendlval->value.str.len--;
898: break;
899: case 'r':
900: *t++ = '\r';
901: zendlval->value.str.len--;
902: break;
903: case 't':
904: *t++ = '\t';
905: zendlval->value.str.len--;
906: break;
907: case 'f':
908: *t++ = '\f';
909: zendlval->value.str.len--;
910: break;
911: case 'v':
912: *t++ = '\v';
913: zendlval->value.str.len--;
914: break;
1.1.1.2 misho 915: case 'e':
1.1.1.3 ! misho 916: #ifdef PHP_WIN32
! 917: *t++ = VK_ESCAPE;
! 918: #else
1.1.1.2 misho 919: *t++ = '\e';
1.1.1.3 ! misho 920: #endif
1.1.1.2 misho 921: zendlval->value.str.len--;
922: break;
1.1 misho 923: case '"':
924: case '`':
925: if (*s != quote_type) {
926: *t++ = '\\';
927: *t++ = *s;
928: break;
929: }
930: case '\\':
931: case '$':
932: *t++ = *s;
933: zendlval->value.str.len--;
934: break;
935: case 'x':
936: case 'X':
937: if (ZEND_IS_HEX(*(s+1))) {
938: char hex_buf[3] = { 0, 0, 0 };
939:
940: zendlval->value.str.len--; /* for the 'x' */
941:
942: hex_buf[0] = *(++s);
943: zendlval->value.str.len--;
944: if (ZEND_IS_HEX(*(s+1))) {
945: hex_buf[1] = *(++s);
946: zendlval->value.str.len--;
947: }
948: *t++ = (char) strtol(hex_buf, NULL, 16);
949: } else {
950: *t++ = '\\';
951: *t++ = *s;
952: }
953: break;
954: default:
955: /* check for an octal */
956: if (ZEND_IS_OCT(*s)) {
957: char octal_buf[4] = { 0, 0, 0, 0 };
958:
959: octal_buf[0] = *s;
960: zendlval->value.str.len--;
961: if (ZEND_IS_OCT(*(s+1))) {
962: octal_buf[1] = *(++s);
963: zendlval->value.str.len--;
964: if (ZEND_IS_OCT(*(s+1))) {
965: octal_buf[2] = *(++s);
966: zendlval->value.str.len--;
967: }
968: }
969: *t++ = (char) strtol(octal_buf, NULL, 8);
970: } else {
971: *t++ = '\\';
972: *t++ = *s;
973: }
974: break;
975: }
976: } else {
977: *t++ = *s;
978: }
979:
980: if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
981: CG(zend_lineno)++;
982: }
983: s++;
984: }
985: *t = 0;
986: if (SCNG(output_filter)) {
987: size_t sz = 0;
988: s = zendlval->value.str.val;
989: SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
990: zendlval->value.str.len = sz;
991: efree(s);
992: }
993: }
994:
995:
996: int lex_scan(zval *zendlval TSRMLS_DC)
997: {
998: restart:
999: SCNG(yy_text) = YYCURSOR;
1000:
1001: yymore_restart:
1002:
1003: /*!re2c
1004: re2c:yyfill:check = 0;
1005: LNUM [0-9]+
1006: DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1007: EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1008: HNUM "0x"[0-9a-fA-F]+
1.1.1.2 misho 1009: BNUM "0b"[01]+
1.1 misho 1010: LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1011: WHITESPACE [ \n\r\t]+
1012: TABS_AND_SPACES [ \t]*
1013: TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1014: ANY_CHAR [^]
1015: NEWLINE ("\r"|"\n"|"\r\n")
1016:
1017: /* compute yyleng before each rule */
1018: <!*> := yyleng = YYCURSOR - SCNG(yy_text);
1019:
1020:
1021: <ST_IN_SCRIPTING>"exit" {
1022: return T_EXIT;
1023: }
1024:
1025: <ST_IN_SCRIPTING>"die" {
1026: return T_EXIT;
1027: }
1028:
1029: <ST_IN_SCRIPTING>"function" {
1030: return T_FUNCTION;
1031: }
1032:
1033: <ST_IN_SCRIPTING>"const" {
1034: return T_CONST;
1035: }
1036:
1037: <ST_IN_SCRIPTING>"return" {
1038: return T_RETURN;
1039: }
1040:
1041: <ST_IN_SCRIPTING>"try" {
1042: return T_TRY;
1043: }
1044:
1045: <ST_IN_SCRIPTING>"catch" {
1046: return T_CATCH;
1047: }
1048:
1049: <ST_IN_SCRIPTING>"throw" {
1050: return T_THROW;
1051: }
1052:
1053: <ST_IN_SCRIPTING>"if" {
1054: return T_IF;
1055: }
1056:
1057: <ST_IN_SCRIPTING>"elseif" {
1058: return T_ELSEIF;
1059: }
1060:
1061: <ST_IN_SCRIPTING>"endif" {
1062: return T_ENDIF;
1063: }
1064:
1065: <ST_IN_SCRIPTING>"else" {
1066: return T_ELSE;
1067: }
1068:
1069: <ST_IN_SCRIPTING>"while" {
1070: return T_WHILE;
1071: }
1072:
1073: <ST_IN_SCRIPTING>"endwhile" {
1074: return T_ENDWHILE;
1075: }
1076:
1077: <ST_IN_SCRIPTING>"do" {
1078: return T_DO;
1079: }
1080:
1081: <ST_IN_SCRIPTING>"for" {
1082: return T_FOR;
1083: }
1084:
1085: <ST_IN_SCRIPTING>"endfor" {
1086: return T_ENDFOR;
1087: }
1088:
1089: <ST_IN_SCRIPTING>"foreach" {
1090: return T_FOREACH;
1091: }
1092:
1093: <ST_IN_SCRIPTING>"endforeach" {
1094: return T_ENDFOREACH;
1095: }
1096:
1097: <ST_IN_SCRIPTING>"declare" {
1098: return T_DECLARE;
1099: }
1100:
1101: <ST_IN_SCRIPTING>"enddeclare" {
1102: return T_ENDDECLARE;
1103: }
1104:
1105: <ST_IN_SCRIPTING>"instanceof" {
1106: return T_INSTANCEOF;
1107: }
1108:
1109: <ST_IN_SCRIPTING>"as" {
1110: return T_AS;
1111: }
1112:
1113: <ST_IN_SCRIPTING>"switch" {
1114: return T_SWITCH;
1115: }
1116:
1117: <ST_IN_SCRIPTING>"endswitch" {
1118: return T_ENDSWITCH;
1119: }
1120:
1121: <ST_IN_SCRIPTING>"case" {
1122: return T_CASE;
1123: }
1124:
1125: <ST_IN_SCRIPTING>"default" {
1126: return T_DEFAULT;
1127: }
1128:
1129: <ST_IN_SCRIPTING>"break" {
1130: return T_BREAK;
1131: }
1132:
1133: <ST_IN_SCRIPTING>"continue" {
1134: return T_CONTINUE;
1135: }
1136:
1137: <ST_IN_SCRIPTING>"goto" {
1138: return T_GOTO;
1139: }
1140:
1141: <ST_IN_SCRIPTING>"echo" {
1142: return T_ECHO;
1143: }
1144:
1145: <ST_IN_SCRIPTING>"print" {
1146: return T_PRINT;
1147: }
1148:
1149: <ST_IN_SCRIPTING>"class" {
1150: return T_CLASS;
1151: }
1152:
1153: <ST_IN_SCRIPTING>"interface" {
1154: return T_INTERFACE;
1155: }
1156:
1.1.1.2 misho 1157: <ST_IN_SCRIPTING>"trait" {
1158: return T_TRAIT;
1159: }
1160:
1.1 misho 1161: <ST_IN_SCRIPTING>"extends" {
1162: return T_EXTENDS;
1163: }
1164:
1165: <ST_IN_SCRIPTING>"implements" {
1166: return T_IMPLEMENTS;
1167: }
1168:
1169: <ST_IN_SCRIPTING>"->" {
1170: yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1171: return T_OBJECT_OPERATOR;
1172: }
1173:
1174: <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1175: zendlval->value.str.val = yytext; /* no copying - intentional */
1176: zendlval->value.str.len = yyleng;
1177: zendlval->type = IS_STRING;
1178: HANDLE_NEWLINES(yytext, yyleng);
1179: return T_WHITESPACE;
1180: }
1181:
1182: <ST_LOOKING_FOR_PROPERTY>"->" {
1183: return T_OBJECT_OPERATOR;
1184: }
1185:
1186: <ST_LOOKING_FOR_PROPERTY>{LABEL} {
1187: yy_pop_state(TSRMLS_C);
1188: zend_copy_value(zendlval, yytext, yyleng);
1189: zendlval->type = IS_STRING;
1190: return T_STRING;
1191: }
1192:
1193: <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1194: yyless(0);
1195: yy_pop_state(TSRMLS_C);
1196: goto restart;
1197: }
1198:
1199: <ST_IN_SCRIPTING>"::" {
1200: return T_PAAMAYIM_NEKUDOTAYIM;
1201: }
1202:
1203: <ST_IN_SCRIPTING>"\\" {
1204: return T_NS_SEPARATOR;
1205: }
1206:
1207: <ST_IN_SCRIPTING>"new" {
1208: return T_NEW;
1209: }
1210:
1211: <ST_IN_SCRIPTING>"clone" {
1212: return T_CLONE;
1213: }
1214:
1215: <ST_IN_SCRIPTING>"var" {
1216: return T_VAR;
1217: }
1218:
1219: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1220: return T_INT_CAST;
1221: }
1222:
1223: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1224: return T_DOUBLE_CAST;
1225: }
1226:
1.1.1.2 misho 1227: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1.1 misho 1228: return T_STRING_CAST;
1229: }
1230:
1231: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1232: return T_ARRAY_CAST;
1233: }
1234:
1235: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1236: return T_OBJECT_CAST;
1237: }
1238:
1239: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1240: return T_BOOL_CAST;
1241: }
1242:
1243: <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1244: return T_UNSET_CAST;
1245: }
1246:
1247: <ST_IN_SCRIPTING>"eval" {
1248: return T_EVAL;
1249: }
1250:
1251: <ST_IN_SCRIPTING>"include" {
1252: return T_INCLUDE;
1253: }
1254:
1255: <ST_IN_SCRIPTING>"include_once" {
1256: return T_INCLUDE_ONCE;
1257: }
1258:
1259: <ST_IN_SCRIPTING>"require" {
1260: return T_REQUIRE;
1261: }
1262:
1263: <ST_IN_SCRIPTING>"require_once" {
1264: return T_REQUIRE_ONCE;
1265: }
1266:
1267: <ST_IN_SCRIPTING>"namespace" {
1268: return T_NAMESPACE;
1269: }
1270:
1271: <ST_IN_SCRIPTING>"use" {
1272: return T_USE;
1273: }
1274:
1.1.1.2 misho 1275: <ST_IN_SCRIPTING>"insteadof" {
1276: return T_INSTEADOF;
1277: }
1278:
1.1 misho 1279: <ST_IN_SCRIPTING>"global" {
1280: return T_GLOBAL;
1281: }
1282:
1283: <ST_IN_SCRIPTING>"isset" {
1284: return T_ISSET;
1285: }
1286:
1287: <ST_IN_SCRIPTING>"empty" {
1288: return T_EMPTY;
1289: }
1290:
1291: <ST_IN_SCRIPTING>"__halt_compiler" {
1292: return T_HALT_COMPILER;
1293: }
1294:
1295: <ST_IN_SCRIPTING>"static" {
1296: return T_STATIC;
1297: }
1298:
1299: <ST_IN_SCRIPTING>"abstract" {
1300: return T_ABSTRACT;
1301: }
1302:
1303: <ST_IN_SCRIPTING>"final" {
1304: return T_FINAL;
1305: }
1306:
1307: <ST_IN_SCRIPTING>"private" {
1308: return T_PRIVATE;
1309: }
1310:
1311: <ST_IN_SCRIPTING>"protected" {
1312: return T_PROTECTED;
1313: }
1314:
1315: <ST_IN_SCRIPTING>"public" {
1316: return T_PUBLIC;
1317: }
1318:
1319: <ST_IN_SCRIPTING>"unset" {
1320: return T_UNSET;
1321: }
1322:
1323: <ST_IN_SCRIPTING>"=>" {
1324: return T_DOUBLE_ARROW;
1325: }
1326:
1327: <ST_IN_SCRIPTING>"list" {
1328: return T_LIST;
1329: }
1330:
1331: <ST_IN_SCRIPTING>"array" {
1332: return T_ARRAY;
1333: }
1334:
1.1.1.2 misho 1335: <ST_IN_SCRIPTING>"callable" {
1336: return T_CALLABLE;
1337: }
1338:
1.1 misho 1339: <ST_IN_SCRIPTING>"++" {
1340: return T_INC;
1341: }
1342:
1343: <ST_IN_SCRIPTING>"--" {
1344: return T_DEC;
1345: }
1346:
1347: <ST_IN_SCRIPTING>"===" {
1348: return T_IS_IDENTICAL;
1349: }
1350:
1351: <ST_IN_SCRIPTING>"!==" {
1352: return T_IS_NOT_IDENTICAL;
1353: }
1354:
1355: <ST_IN_SCRIPTING>"==" {
1356: return T_IS_EQUAL;
1357: }
1358:
1359: <ST_IN_SCRIPTING>"!="|"<>" {
1360: return T_IS_NOT_EQUAL;
1361: }
1362:
1363: <ST_IN_SCRIPTING>"<=" {
1364: return T_IS_SMALLER_OR_EQUAL;
1365: }
1366:
1367: <ST_IN_SCRIPTING>">=" {
1368: return T_IS_GREATER_OR_EQUAL;
1369: }
1370:
1371: <ST_IN_SCRIPTING>"+=" {
1372: return T_PLUS_EQUAL;
1373: }
1374:
1375: <ST_IN_SCRIPTING>"-=" {
1376: return T_MINUS_EQUAL;
1377: }
1378:
1379: <ST_IN_SCRIPTING>"*=" {
1380: return T_MUL_EQUAL;
1381: }
1382:
1383: <ST_IN_SCRIPTING>"/=" {
1384: return T_DIV_EQUAL;
1385: }
1386:
1387: <ST_IN_SCRIPTING>".=" {
1388: return T_CONCAT_EQUAL;
1389: }
1390:
1391: <ST_IN_SCRIPTING>"%=" {
1392: return T_MOD_EQUAL;
1393: }
1394:
1395: <ST_IN_SCRIPTING>"<<=" {
1396: return T_SL_EQUAL;
1397: }
1398:
1399: <ST_IN_SCRIPTING>">>=" {
1400: return T_SR_EQUAL;
1401: }
1402:
1403: <ST_IN_SCRIPTING>"&=" {
1404: return T_AND_EQUAL;
1405: }
1406:
1407: <ST_IN_SCRIPTING>"|=" {
1408: return T_OR_EQUAL;
1409: }
1410:
1411: <ST_IN_SCRIPTING>"^=" {
1412: return T_XOR_EQUAL;
1413: }
1414:
1415: <ST_IN_SCRIPTING>"||" {
1416: return T_BOOLEAN_OR;
1417: }
1418:
1419: <ST_IN_SCRIPTING>"&&" {
1420: return T_BOOLEAN_AND;
1421: }
1422:
1423: <ST_IN_SCRIPTING>"OR" {
1424: return T_LOGICAL_OR;
1425: }
1426:
1427: <ST_IN_SCRIPTING>"AND" {
1428: return T_LOGICAL_AND;
1429: }
1430:
1431: <ST_IN_SCRIPTING>"XOR" {
1432: return T_LOGICAL_XOR;
1433: }
1434:
1435: <ST_IN_SCRIPTING>"<<" {
1436: return T_SL;
1437: }
1438:
1439: <ST_IN_SCRIPTING>">>" {
1440: return T_SR;
1441: }
1442:
1443: <ST_IN_SCRIPTING>{TOKENS} {
1444: return yytext[0];
1445: }
1446:
1447:
1448: <ST_IN_SCRIPTING>"{" {
1449: yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1450: return '{';
1451: }
1452:
1453:
1454: <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1455: yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1456: return T_DOLLAR_OPEN_CURLY_BRACES;
1457: }
1458:
1459:
1460: <ST_IN_SCRIPTING>"}" {
1461: RESET_DOC_COMMENT();
1462: if (!zend_stack_is_empty(&SCNG(state_stack))) {
1463: yy_pop_state(TSRMLS_C);
1464: }
1465: return '}';
1466: }
1467:
1468:
1469: <ST_LOOKING_FOR_VARNAME>{LABEL} {
1470: zend_copy_value(zendlval, yytext, yyleng);
1471: zendlval->type = IS_STRING;
1472: yy_pop_state(TSRMLS_C);
1473: yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1474: return T_STRING_VARNAME;
1475: }
1476:
1477:
1478: <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1479: yyless(0);
1480: yy_pop_state(TSRMLS_C);
1481: yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1482: goto restart;
1483: }
1484:
1.1.1.2 misho 1485: <ST_IN_SCRIPTING>{BNUM} {
1486: char *bin = yytext + 2; /* Skip "0b" */
1487: int len = yyleng - 2;
1488:
1489: /* Skip any leading 0s */
1490: while (*bin == '0') {
1491: ++bin;
1492: --len;
1493: }
1494:
1495: if (len < SIZEOF_LONG * 8) {
1496: if (len == 0) {
1497: zendlval->value.lval = 0;
1498: } else {
1499: zendlval->value.lval = strtol(bin, NULL, 2);
1500: }
1501: zendlval->type = IS_LONG;
1502: return T_LNUMBER;
1503: } else {
1504: zendlval->value.dval = zend_bin_strtod(bin, NULL);
1505: zendlval->type = IS_DOUBLE;
1506: return T_DNUMBER;
1507: }
1508: }
1.1 misho 1509:
1510: <ST_IN_SCRIPTING>{LNUM} {
1511: if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1512: zendlval->value.lval = strtol(yytext, NULL, 0);
1513: } else {
1514: errno = 0;
1515: zendlval->value.lval = strtol(yytext, NULL, 0);
1516: if (errno == ERANGE) { /* Overflow */
1517: if (yytext[0] == '0') { /* octal overflow */
1518: zendlval->value.dval = zend_oct_strtod(yytext, NULL);
1519: } else {
1520: zendlval->value.dval = zend_strtod(yytext, NULL);
1521: }
1522: zendlval->type = IS_DOUBLE;
1523: return T_DNUMBER;
1524: }
1525: }
1526:
1527: zendlval->type = IS_LONG;
1528: return T_LNUMBER;
1529: }
1530:
1531: <ST_IN_SCRIPTING>{HNUM} {
1532: char *hex = yytext + 2; /* Skip "0x" */
1533: int len = yyleng - 2;
1534:
1535: /* Skip any leading 0s */
1536: while (*hex == '0') {
1537: hex++;
1538: len--;
1539: }
1540:
1541: if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
1.1.1.2 misho 1542: if (len == 0) {
1543: zendlval->value.lval = 0;
1544: } else {
1545: zendlval->value.lval = strtol(hex, NULL, 16);
1546: }
1.1 misho 1547: zendlval->type = IS_LONG;
1548: return T_LNUMBER;
1549: } else {
1550: zendlval->value.dval = zend_hex_strtod(hex, NULL);
1551: zendlval->type = IS_DOUBLE;
1552: return T_DNUMBER;
1553: }
1554: }
1555:
1556: <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1557: if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1558: zendlval->value.lval = strtol(yytext, NULL, 10);
1559: zendlval->type = IS_LONG;
1560: } else {
1561: zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1562: zendlval->value.str.len = yyleng;
1563: zendlval->type = IS_STRING;
1564: }
1565: return T_NUM_STRING;
1566: }
1567:
1.1.1.2 misho 1568: <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1.1 misho 1569: zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1570: zendlval->value.str.len = yyleng;
1571: zendlval->type = IS_STRING;
1572: return T_NUM_STRING;
1573: }
1574:
1575: <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1576: zendlval->value.dval = zend_strtod(yytext, NULL);
1577: zendlval->type = IS_DOUBLE;
1578: return T_DNUMBER;
1579: }
1580:
1581: <ST_IN_SCRIPTING>"__CLASS__" {
1.1.1.2 misho 1582: const char *class_name = NULL;
1583:
1584: if (CG(active_class_entry)
1585: && (ZEND_ACC_TRAIT ==
1586: (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1587: /* We create a special __CLASS__ constant that is going to be resolved
1588: at run-time */
1589: zendlval->value.str.len = sizeof("__CLASS__")-1;
1590: zendlval->value.str.val = estrndup("__CLASS__", zendlval->value.str.len);
1591: zendlval->type = IS_CONSTANT;
1592: } else {
1593: if (CG(active_class_entry)) {
1594: class_name = CG(active_class_entry)->name;
1595: }
1596:
1597: if (!class_name) {
1598: class_name = "";
1599: }
1600:
1601: zendlval->value.str.len = strlen(class_name);
1602: zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
1603: zendlval->type = IS_STRING;
1.1 misho 1604: }
1.1.1.2 misho 1605: return T_CLASS_C;
1606: }
1.1 misho 1607:
1.1.1.2 misho 1608: <ST_IN_SCRIPTING>"__TRAIT__" {
1609: const char *trait_name = NULL;
1610:
1611: if (CG(active_class_entry)
1612: && (ZEND_ACC_TRAIT ==
1613: (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1614: trait_name = CG(active_class_entry)->name;
1615: }
1616:
1617: if (!trait_name) {
1618: trait_name = "";
1.1 misho 1619: }
1.1.1.2 misho 1620:
1621: zendlval->value.str.len = strlen(trait_name);
1622: zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len);
1.1 misho 1623: zendlval->type = IS_STRING;
1.1.1.2 misho 1624:
1625: return T_TRAIT_C;
1.1 misho 1626: }
1627:
1628: <ST_IN_SCRIPTING>"__FUNCTION__" {
1.1.1.2 misho 1629: const char *func_name = NULL;
1.1 misho 1630:
1631: if (CG(active_op_array)) {
1632: func_name = CG(active_op_array)->function_name;
1633: }
1634:
1635: if (!func_name) {
1636: func_name = "";
1637: }
1638: zendlval->value.str.len = strlen(func_name);
1639: zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
1640: zendlval->type = IS_STRING;
1641: return T_FUNC_C;
1642: }
1643:
1644: <ST_IN_SCRIPTING>"__METHOD__" {
1.1.1.2 misho 1645: const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
1646: const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
1.1 misho 1647: size_t len = 0;
1648:
1649: if (class_name) {
1650: len += strlen(class_name) + 2;
1651: }
1652: if (func_name) {
1653: len += strlen(func_name);
1654: }
1655:
1.1.1.2 misho 1656: zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s",
1.1 misho 1657: class_name ? class_name : "",
1658: class_name && func_name ? "::" : "",
1659: func_name ? func_name : ""
1660: );
1661: zendlval->type = IS_STRING;
1662: return T_METHOD_C;
1663: }
1664:
1665: <ST_IN_SCRIPTING>"__LINE__" {
1666: zendlval->value.lval = CG(zend_lineno);
1667: zendlval->type = IS_LONG;
1668: return T_LINE;
1669: }
1670:
1671: <ST_IN_SCRIPTING>"__FILE__" {
1672: char *filename = zend_get_compiled_filename(TSRMLS_C);
1673:
1674: if (!filename) {
1675: filename = "";
1676: }
1677: zendlval->value.str.len = strlen(filename);
1678: zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
1679: zendlval->type = IS_STRING;
1680: return T_FILE;
1681: }
1682:
1683: <ST_IN_SCRIPTING>"__DIR__" {
1684: char *filename = zend_get_compiled_filename(TSRMLS_C);
1685: const size_t filename_len = strlen(filename);
1686: char *dirname;
1687:
1688: if (!filename) {
1689: filename = "";
1690: }
1691:
1692: dirname = estrndup(filename, filename_len);
1693: zend_dirname(dirname, filename_len);
1694:
1695: if (strcmp(dirname, ".") == 0) {
1696: dirname = erealloc(dirname, MAXPATHLEN);
1697: #if HAVE_GETCWD
1698: VCWD_GETCWD(dirname, MAXPATHLEN);
1699: #elif HAVE_GETWD
1700: VCWD_GETWD(dirname);
1701: #endif
1702: }
1703:
1704: zendlval->value.str.len = strlen(dirname);
1705: zendlval->value.str.val = dirname;
1706: zendlval->type = IS_STRING;
1707: return T_DIR;
1708: }
1709:
1710: <ST_IN_SCRIPTING>"__NAMESPACE__" {
1711: if (CG(current_namespace)) {
1712: *zendlval = *CG(current_namespace);
1713: zval_copy_ctor(zendlval);
1714: } else {
1715: ZVAL_EMPTY_STRING(zendlval);
1716: }
1717: return T_NS_C;
1718: }
1719:
1720: <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1.1.1.2 misho 1721: YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1.1 misho 1722:
1723: if (bracket != SCNG(yy_text)) {
1724: /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1725: YYCURSOR = bracket;
1726: goto inline_html;
1727: }
1728:
1729: HANDLE_NEWLINES(yytext, yyleng);
1730: zendlval->value.str.val = yytext; /* no copying - intentional */
1731: zendlval->value.str.len = yyleng;
1732: zendlval->type = IS_STRING;
1733: BEGIN(ST_IN_SCRIPTING);
1734: return T_OPEN_TAG;
1735: }
1736:
1737:
1738: <INITIAL>"<%=" {
1739: if (CG(asp_tags)) {
1740: zendlval->value.str.val = yytext; /* no copying - intentional */
1741: zendlval->value.str.len = yyleng;
1742: zendlval->type = IS_STRING;
1743: BEGIN(ST_IN_SCRIPTING);
1744: return T_OPEN_TAG_WITH_ECHO;
1745: } else {
1746: goto inline_char_handler;
1747: }
1748: }
1749:
1750:
1751: <INITIAL>"<?=" {
1.1.1.2 misho 1752: zendlval->value.str.val = yytext; /* no copying - intentional */
1753: zendlval->value.str.len = yyleng;
1754: zendlval->type = IS_STRING;
1755: BEGIN(ST_IN_SCRIPTING);
1756: return T_OPEN_TAG_WITH_ECHO;
1.1 misho 1757: }
1758:
1759:
1760: <INITIAL>"<%" {
1761: if (CG(asp_tags)) {
1762: zendlval->value.str.val = yytext; /* no copying - intentional */
1763: zendlval->value.str.len = yyleng;
1764: zendlval->type = IS_STRING;
1765: BEGIN(ST_IN_SCRIPTING);
1766: return T_OPEN_TAG;
1767: } else {
1768: goto inline_char_handler;
1769: }
1770: }
1771:
1772:
1773: <INITIAL>"<?php"([ \t]|{NEWLINE}) {
1774: zendlval->value.str.val = yytext; /* no copying - intentional */
1775: zendlval->value.str.len = yyleng;
1776: zendlval->type = IS_STRING;
1777: HANDLE_NEWLINE(yytext[yyleng-1]);
1778: BEGIN(ST_IN_SCRIPTING);
1779: return T_OPEN_TAG;
1780: }
1781:
1782:
1783: <INITIAL>"<?" {
1784: if (CG(short_tags)) {
1785: zendlval->value.str.val = yytext; /* no copying - intentional */
1786: zendlval->value.str.len = yyleng;
1787: zendlval->type = IS_STRING;
1788: BEGIN(ST_IN_SCRIPTING);
1789: return T_OPEN_TAG;
1790: } else {
1791: goto inline_char_handler;
1792: }
1793: }
1794:
1795: <INITIAL>{ANY_CHAR} {
1796: if (YYCURSOR > YYLIMIT) {
1797: return 0;
1798: }
1799:
1800: inline_char_handler:
1801:
1802: while (1) {
1803: YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1804:
1805: YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1806:
1807: if (YYCURSOR < YYLIMIT) {
1808: switch (*YYCURSOR) {
1809: case '?':
1.1.1.2 misho 1810: if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1.1 misho 1811: break;
1812: }
1813: continue;
1814: case '%':
1815: if (CG(asp_tags)) {
1816: break;
1817: }
1818: continue;
1819: case 's':
1820: case 'S':
1821: /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1822: * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1823: YYCURSOR--;
1824: yymore();
1825: default:
1826: continue;
1827: }
1828:
1829: YYCURSOR--;
1830: }
1831:
1832: break;
1833: }
1834:
1835: inline_html:
1836: yyleng = YYCURSOR - SCNG(yy_text);
1837:
1838: if (SCNG(output_filter)) {
1839: int readsize;
1840: size_t sz = 0;
1841: readsize = SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1842: zendlval->value.str.len = sz;
1843: if (readsize < yyleng) {
1844: yyless(readsize);
1845: }
1846: } else {
1847: zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
1848: zendlval->value.str.len = yyleng;
1849: }
1850: zendlval->type = IS_STRING;
1851: HANDLE_NEWLINES(yytext, yyleng);
1852: return T_INLINE_HTML;
1853: }
1854:
1855:
1856: /* Make sure a label character follows "->", otherwise there is no property
1857: * and "->" will be taken literally
1858: */
1859: <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1860: yyless(yyleng - 3);
1861: yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1862: zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1863: zendlval->type = IS_STRING;
1864: return T_VARIABLE;
1865: }
1866:
1867: /* A [ always designates a variable offset, regardless of what follows
1868: */
1869: <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1870: yyless(yyleng - 1);
1871: yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1872: zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1873: zendlval->type = IS_STRING;
1874: return T_VARIABLE;
1875: }
1876:
1877: <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1878: zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1879: zendlval->type = IS_STRING;
1880: return T_VARIABLE;
1881: }
1882:
1883: <ST_VAR_OFFSET>"]" {
1884: yy_pop_state(TSRMLS_C);
1885: return ']';
1886: }
1887:
1888: <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1889: /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1890: return yytext[0];
1891: }
1892:
1893: <ST_VAR_OFFSET>[ \n\r\t\\'#] {
1894: /* Invalid rule to return a more explicit parse error with proper line number */
1895: yyless(0);
1896: yy_pop_state(TSRMLS_C);
1897: return T_ENCAPSED_AND_WHITESPACE;
1898: }
1899:
1900: <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1901: zend_copy_value(zendlval, yytext, yyleng);
1902: zendlval->type = IS_STRING;
1903: return T_STRING;
1904: }
1905:
1906:
1907: <ST_IN_SCRIPTING>"#"|"//" {
1908: while (YYCURSOR < YYLIMIT) {
1909: switch (*YYCURSOR++) {
1910: case '\r':
1911: if (*YYCURSOR == '\n') {
1912: YYCURSOR++;
1913: }
1914: /* fall through */
1915: case '\n':
1916: CG(zend_lineno)++;
1917: break;
1918: case '%':
1919: if (!CG(asp_tags)) {
1920: continue;
1921: }
1922: /* fall through */
1923: case '?':
1924: if (*YYCURSOR == '>') {
1925: YYCURSOR--;
1926: break;
1927: }
1928: /* fall through */
1929: default:
1930: continue;
1931: }
1932:
1933: break;
1934: }
1935:
1936: yyleng = YYCURSOR - SCNG(yy_text);
1937:
1938: return T_COMMENT;
1939: }
1940:
1941: <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1942: int doc_com;
1943:
1944: if (yyleng > 2) {
1945: doc_com = 1;
1946: RESET_DOC_COMMENT();
1947: } else {
1948: doc_com = 0;
1949: }
1950:
1951: while (YYCURSOR < YYLIMIT) {
1952: if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1953: break;
1954: }
1955: }
1956:
1957: if (YYCURSOR < YYLIMIT) {
1958: YYCURSOR++;
1959: } else {
1960: zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1961: }
1962:
1963: yyleng = YYCURSOR - SCNG(yy_text);
1964: HANDLE_NEWLINES(yytext, yyleng);
1965:
1966: if (doc_com) {
1967: CG(doc_comment) = estrndup(yytext, yyleng);
1968: CG(doc_comment_len) = yyleng;
1969: return T_DOC_COMMENT;
1970: }
1971:
1972: return T_COMMENT;
1973: }
1974:
1975: <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1976: zendlval->value.str.val = yytext; /* no copying - intentional */
1977: zendlval->value.str.len = yyleng;
1978: zendlval->type = IS_STRING;
1979: BEGIN(INITIAL);
1980: return T_CLOSE_TAG; /* implicit ';' at php-end tag */
1981: }
1982:
1983:
1984: <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1985: if (CG(asp_tags)) {
1986: BEGIN(INITIAL);
1987: zendlval->value.str.len = yyleng;
1988: zendlval->type = IS_STRING;
1989: zendlval->value.str.val = yytext; /* no copying - intentional */
1990: return T_CLOSE_TAG; /* implicit ';' at php-end tag */
1991: } else {
1992: yyless(1);
1993: return yytext[0];
1994: }
1995: }
1996:
1997:
1998: <ST_IN_SCRIPTING>b?['] {
1999: register char *s, *t;
2000: char *end;
2001: int bprefix = (yytext[0] != '\'') ? 1 : 0;
2002:
2003: while (1) {
2004: if (YYCURSOR < YYLIMIT) {
2005: if (*YYCURSOR == '\'') {
2006: YYCURSOR++;
2007: yyleng = YYCURSOR - SCNG(yy_text);
2008:
2009: break;
2010: } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
2011: YYCURSOR++;
2012: }
2013: } else {
2014: yyleng = YYLIMIT - SCNG(yy_text);
2015:
2016: /* Unclosed single quotes; treat similar to double quotes, but without a separate token
2017: * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
2018: * rule, which continued in ST_IN_SCRIPTING state after the quote */
2019: return T_ENCAPSED_AND_WHITESPACE;
2020: }
2021: }
2022:
2023: zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
2024: zendlval->value.str.len = yyleng-bprefix-2;
2025: zendlval->type = IS_STRING;
2026:
2027: /* convert escape sequences */
2028: s = t = zendlval->value.str.val;
2029: end = s+zendlval->value.str.len;
2030: while (s<end) {
2031: if (*s=='\\') {
2032: s++;
2033:
2034: switch(*s) {
2035: case '\\':
2036: case '\'':
2037: *t++ = *s;
2038: zendlval->value.str.len--;
2039: break;
2040: default:
2041: *t++ = '\\';
2042: *t++ = *s;
2043: break;
2044: }
2045: } else {
2046: *t++ = *s;
2047: }
2048:
2049: if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2050: CG(zend_lineno)++;
2051: }
2052: s++;
2053: }
2054: *t = 0;
2055:
2056: if (SCNG(output_filter)) {
2057: size_t sz = 0;
2058: s = zendlval->value.str.val;
2059: SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
2060: zendlval->value.str.len = sz;
2061: efree(s);
2062: }
2063: return T_CONSTANT_ENCAPSED_STRING;
2064: }
2065:
2066:
2067: <ST_IN_SCRIPTING>b?["] {
2068: int bprefix = (yytext[0] != '"') ? 1 : 0;
2069:
2070: while (YYCURSOR < YYLIMIT) {
2071: switch (*YYCURSOR++) {
2072: case '"':
2073: yyleng = YYCURSOR - SCNG(yy_text);
2074: zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
2075: return T_CONSTANT_ENCAPSED_STRING;
2076: case '$':
2077: if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2078: break;
2079: }
2080: continue;
2081: case '{':
2082: if (*YYCURSOR == '$') {
2083: break;
2084: }
2085: continue;
2086: case '\\':
2087: if (YYCURSOR < YYLIMIT) {
2088: YYCURSOR++;
2089: }
2090: /* fall through */
2091: default:
2092: continue;
2093: }
2094:
2095: YYCURSOR--;
2096: break;
2097: }
2098:
2099: /* Remember how much was scanned to save rescanning */
2100: SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2101:
2102: YYCURSOR = SCNG(yy_text) + yyleng;
2103:
2104: BEGIN(ST_DOUBLE_QUOTES);
2105: return '"';
2106: }
2107:
2108:
2109: <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2110: char *s;
2111: int bprefix = (yytext[0] != '<') ? 1 : 0;
2112:
2113: /* save old heredoc label */
2114: Z_STRVAL_P(zendlval) = CG(heredoc);
2115: Z_STRLEN_P(zendlval) = CG(heredoc_len);
2116:
2117: CG(zend_lineno)++;
2118: CG(heredoc_len) = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2119: s = yytext+bprefix+3;
2120: while ((*s == ' ') || (*s == '\t')) {
2121: s++;
2122: CG(heredoc_len)--;
2123: }
2124:
2125: if (*s == '\'') {
2126: s++;
2127: CG(heredoc_len) -= 2;
2128:
2129: BEGIN(ST_NOWDOC);
2130: } else {
2131: if (*s == '"') {
2132: s++;
2133: CG(heredoc_len) -= 2;
2134: }
2135:
2136: BEGIN(ST_HEREDOC);
2137: }
2138:
2139: CG(heredoc) = estrndup(s, CG(heredoc_len));
2140:
2141: /* Check for ending label on the next line */
2142: if (CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, CG(heredoc_len))) {
2143: YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2144:
2145: if (*end == ';') {
2146: end++;
2147: }
2148:
2149: if (*end == '\n' || *end == '\r') {
2150: BEGIN(ST_END_HEREDOC);
2151: }
2152: }
2153:
2154: return T_START_HEREDOC;
2155: }
2156:
2157:
2158: <ST_IN_SCRIPTING>[`] {
2159: BEGIN(ST_BACKQUOTE);
2160: return '`';
2161: }
2162:
2163:
2164: <ST_END_HEREDOC>{ANY_CHAR} {
2165: YYCURSOR += CG(heredoc_len) - 1;
2166: yyleng = CG(heredoc_len);
2167:
2168: Z_STRVAL_P(zendlval) = CG(heredoc);
2169: Z_STRLEN_P(zendlval) = CG(heredoc_len);
2170: CG(heredoc) = NULL;
2171: CG(heredoc_len) = 0;
2172: BEGIN(ST_IN_SCRIPTING);
2173: return T_END_HEREDOC;
2174: }
2175:
2176:
2177: <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2178: zendlval->value.lval = (long) '{';
2179: yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2180: yyless(1);
2181: return T_CURLY_OPEN;
2182: }
2183:
2184:
2185: <ST_DOUBLE_QUOTES>["] {
2186: BEGIN(ST_IN_SCRIPTING);
2187: return '"';
2188: }
2189:
2190: <ST_BACKQUOTE>[`] {
2191: BEGIN(ST_IN_SCRIPTING);
2192: return '`';
2193: }
2194:
2195:
2196: <ST_DOUBLE_QUOTES>{ANY_CHAR} {
2197: if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2198: YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2199: SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2200:
2201: goto double_quotes_scan_done;
2202: }
2203:
2204: if (YYCURSOR > YYLIMIT) {
2205: return 0;
2206: }
2207: if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2208: YYCURSOR++;
2209: }
2210:
2211: while (YYCURSOR < YYLIMIT) {
2212: switch (*YYCURSOR++) {
2213: case '"':
2214: break;
2215: case '$':
2216: if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2217: break;
2218: }
2219: continue;
2220: case '{':
2221: if (*YYCURSOR == '$') {
2222: break;
2223: }
2224: continue;
2225: case '\\':
2226: if (YYCURSOR < YYLIMIT) {
2227: YYCURSOR++;
2228: }
2229: /* fall through */
2230: default:
2231: continue;
2232: }
2233:
2234: YYCURSOR--;
2235: break;
2236: }
2237:
2238: double_quotes_scan_done:
2239: yyleng = YYCURSOR - SCNG(yy_text);
2240:
2241: zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2242: return T_ENCAPSED_AND_WHITESPACE;
2243: }
2244:
2245:
2246: <ST_BACKQUOTE>{ANY_CHAR} {
2247: if (YYCURSOR > YYLIMIT) {
2248: return 0;
2249: }
2250: if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2251: YYCURSOR++;
2252: }
2253:
2254: while (YYCURSOR < YYLIMIT) {
2255: switch (*YYCURSOR++) {
2256: case '`':
2257: break;
2258: case '$':
2259: if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2260: break;
2261: }
2262: continue;
2263: case '{':
2264: if (*YYCURSOR == '$') {
2265: break;
2266: }
2267: continue;
2268: case '\\':
2269: if (YYCURSOR < YYLIMIT) {
2270: YYCURSOR++;
2271: }
2272: /* fall through */
2273: default:
2274: continue;
2275: }
2276:
2277: YYCURSOR--;
2278: break;
2279: }
2280:
2281: yyleng = YYCURSOR - SCNG(yy_text);
2282:
2283: zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2284: return T_ENCAPSED_AND_WHITESPACE;
2285: }
2286:
2287:
2288: <ST_HEREDOC>{ANY_CHAR} {
2289: int newline = 0;
2290:
2291: if (YYCURSOR > YYLIMIT) {
2292: return 0;
2293: }
2294:
2295: YYCURSOR--;
2296:
2297: while (YYCURSOR < YYLIMIT) {
2298: switch (*YYCURSOR++) {
2299: case '\r':
2300: if (*YYCURSOR == '\n') {
2301: YYCURSOR++;
2302: }
2303: /* fall through */
2304: case '\n':
2305: /* Check for ending label on the next line */
2306: if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
2307: YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2308:
2309: if (*end == ';') {
2310: end++;
2311: }
2312:
2313: if (*end == '\n' || *end == '\r') {
2314: /* newline before label will be subtracted from returned text, but
2315: * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2316: if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2317: newline = 2; /* Windows newline */
2318: } else {
2319: newline = 1;
2320: }
2321:
2322: CG(increment_lineno) = 1; /* For newline before label */
2323: BEGIN(ST_END_HEREDOC);
2324:
2325: goto heredoc_scan_done;
2326: }
2327: }
2328: continue;
2329: case '$':
2330: if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2331: break;
2332: }
2333: continue;
2334: case '{':
2335: if (*YYCURSOR == '$') {
2336: break;
2337: }
2338: continue;
2339: case '\\':
2340: if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2341: YYCURSOR++;
2342: }
2343: /* fall through */
2344: default:
2345: continue;
2346: }
2347:
2348: YYCURSOR--;
2349: break;
2350: }
2351:
2352: heredoc_scan_done:
2353: yyleng = YYCURSOR - SCNG(yy_text);
2354:
2355: zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2356: return T_ENCAPSED_AND_WHITESPACE;
2357: }
2358:
2359:
2360: <ST_NOWDOC>{ANY_CHAR} {
2361: int newline = 0;
2362:
2363: if (YYCURSOR > YYLIMIT) {
2364: return 0;
2365: }
2366:
2367: YYCURSOR--;
2368:
2369: while (YYCURSOR < YYLIMIT) {
2370: switch (*YYCURSOR++) {
2371: case '\r':
2372: if (*YYCURSOR == '\n') {
2373: YYCURSOR++;
2374: }
2375: /* fall through */
2376: case '\n':
2377: /* Check for ending label on the next line */
2378: if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
2379: YYCTYPE *end = YYCURSOR + CG(heredoc_len);
2380:
2381: if (*end == ';') {
2382: end++;
2383: }
2384:
2385: if (*end == '\n' || *end == '\r') {
2386: /* newline before label will be subtracted from returned text, but
2387: * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2388: if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2389: newline = 2; /* Windows newline */
2390: } else {
2391: newline = 1;
2392: }
2393:
2394: CG(increment_lineno) = 1; /* For newline before label */
2395: BEGIN(ST_END_HEREDOC);
2396:
2397: goto nowdoc_scan_done;
2398: }
2399: }
2400: /* fall through */
2401: default:
2402: continue;
2403: }
2404: }
2405:
2406: nowdoc_scan_done:
2407: yyleng = YYCURSOR - SCNG(yy_text);
2408:
2409: zend_copy_value(zendlval, yytext, yyleng - newline);
2410: zendlval->type = IS_STRING;
2411: HANDLE_NEWLINES(yytext, yyleng - newline);
2412: return T_ENCAPSED_AND_WHITESPACE;
2413: }
2414:
2415:
2416: <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2417: if (YYCURSOR > YYLIMIT) {
2418: return 0;
2419: }
2420:
2421: zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2422: goto restart;
2423: }
2424:
2425: */
2426: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>