--- embedaddon/php/Zend/zend_language_scanner.l 2012/02/21 23:47:52 1.1.1.1 +++ embedaddon/php/Zend/zend_language_scanner.l 2014/06/15 20:04:04 1.1.1.4 @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | Zend Engine | +----------------------------------------------------------------------+ - | Copyright (c) 1998-2012 Zend Technologies Ltd. (http://www.zend.com) | + | Copyright (c) 1998-2014 Zend Technologies Ltd. (http://www.zend.com) | +----------------------------------------------------------------------+ | This source file is subject to version 2.00 of the Zend license, | | that is bundled with this package in the file LICENSE, and is | @@ -21,7 +21,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: zend_language_scanner.l,v 1.1.1.1 2012/02/21 23:47:52 misho Exp $ */ +/* $Id: zend_language_scanner.l,v 1.1.1.4 2014/06/15 20:04:04 misho Exp $ */ #if 0 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c) @@ -33,6 +33,9 @@ #include #include "zend.h" +#ifdef PHP_WIN32 +# include +#endif #include "zend_alloc.h" #include #include "zend_compile.h" @@ -120,6 +123,33 @@ do { \ BEGIN_EXTERN_C() +static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); + return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC); +} + +static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC); +} + +static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + return zend_multibyte_encoding_converter(to, to_length, from, from_length, +LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC); +} + +static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); + return zend_multibyte_encoding_converter(to, to_length, from, from_length, +internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC); +} + + static void _yy_push_state(int new_state TSRMLS_DC) { zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int)); @@ -147,6 +177,7 @@ static void yy_scan_buffer(char *str, unsigned int len void startup_scanner(TSRMLS_D) { + CG(parse_error) = 0; CG(heredoc) = NULL; CG(heredoc_len) = 0; CG(doc_comment) = NULL; @@ -160,6 +191,7 @@ void shutdown_scanner(TSRMLS_D) efree(CG(heredoc)); CG(heredoc_len)=0; } + CG(parse_error) = 0; zend_stack_destroy(&SCNG(state_stack)); RESET_DOC_COMMENT(); } @@ -181,7 +213,6 @@ ZEND_API void zend_save_lexical_state(zend_lex_state * lex_state->filename = zend_get_compiled_filename(TSRMLS_C); lex_state->lineno = CG(zend_lineno); -#ifdef ZEND_MULTIBYTE lex_state->script_org = SCNG(script_org); lex_state->script_org_size = SCNG(script_org_size); lex_state->script_filtered = SCNG(script_filtered); @@ -189,8 +220,6 @@ ZEND_API void zend_save_lexical_state(zend_lex_state * lex_state->input_filter = SCNG(input_filter); lex_state->output_filter = SCNG(output_filter); lex_state->script_encoding = SCNG(script_encoding); - lex_state->internal_encoding = SCNG(internal_encoding); -#endif /* ZEND_MULTIBYTE */ } ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) @@ -209,11 +238,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_stat YYSETCONDITION(lex_state->yy_state); CG(zend_lineno) = lex_state->lineno; zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); -#ifdef ZEND_MULTIBYTE - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } + if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; @@ -225,14 +250,13 @@ ZEND_API void zend_restore_lexical_state(zend_lex_stat SCNG(input_filter) = lex_state->input_filter; SCNG(output_filter) = lex_state->output_filter; SCNG(script_encoding) = lex_state->script_encoding; - SCNG(internal_encoding) = lex_state->internal_encoding; -#endif /* ZEND_MULTIBYTE */ if (CG(heredoc)) { efree(CG(heredoc)); CG(heredoc) = NULL; CG(heredoc_len) = 0; } + RESET_DOC_COMMENT(); } ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) @@ -245,12 +269,214 @@ ZEND_API void zend_destroy_file_handle(zend_file_handl } } +#define BOM_UTF32_BE "\x00\x00\xfe\xff" +#define BOM_UTF32_LE "\xff\xfe\x00\x00" +#define BOM_UTF16_BE "\xfe\xff" +#define BOM_UTF16_LE "\xff\xfe" +#define BOM_UTF8 "\xef\xbb\xbf" +static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC) +{ + const unsigned char *p; + int wchar_size = 2; + int le = 0; + + /* utf-16 or utf-32? */ + p = script; + while ((p-script) < script_size) { + p = memchr(p, 0, script_size-(p-script)-2); + if (!p) { + break; + } + if (*(p+1) == '\0' && *(p+2) == '\0') { + wchar_size = 4; + break; + } + + /* searching for UTF-32 specific byte orders, so this will do */ + p += 4; + } + + /* BE or LE? */ + p = script; + while ((p-script) < script_size) { + if (*p == '\0' && *(p+wchar_size-1) != '\0') { + /* BE */ + le = 0; + break; + } else if (*p != '\0' && *(p+wchar_size-1) == '\0') { + /* LE* */ + le = 1; + break; + } + p += wchar_size; + } + + if (wchar_size == 2) { + return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be; + } else { + return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be; + } + + return NULL; +} + +static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D) +{ + const zend_encoding *script_encoding = NULL; + int bom_size; + unsigned char *pos1, *pos2; + + if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) { + return NULL; + } + + /* check out BOM */ + if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) { + script_encoding = zend_multibyte_encoding_utf32be; + bom_size = sizeof(BOM_UTF32_BE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) { + script_encoding = zend_multibyte_encoding_utf32le; + bom_size = sizeof(BOM_UTF32_LE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) { + script_encoding = zend_multibyte_encoding_utf16be; + bom_size = sizeof(BOM_UTF16_BE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) { + script_encoding = zend_multibyte_encoding_utf16le; + bom_size = sizeof(BOM_UTF16_LE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) { + script_encoding = zend_multibyte_encoding_utf8; + bom_size = sizeof(BOM_UTF8)-1; + } + + if (script_encoding) { + /* remove BOM */ + LANG_SCNG(script_org) += bom_size; + LANG_SCNG(script_org_size) -= bom_size; + + return script_encoding; + } + + /* script contains NULL bytes -> auto-detection */ + if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) { + /* check if the NULL byte is after the __HALT_COMPILER(); */ + pos2 = LANG_SCNG(script_org); + + while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) { + pos2 = memchr(pos2, '_', pos1 - pos2); + if (!pos2) break; + pos2++; + if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) { + pos2 += sizeof("_HALT_COMPILER")-1; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == '(') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ')') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ';') { + return NULL; + } + } + } + } + } + /* make best effort if BOM is missing */ + return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC); + } + + return NULL; +} + +static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D) +{ + const zend_encoding *script_encoding; + + if (CG(detect_unicode)) { + /* check out bom(byte order mark) and see if containing wchars */ + script_encoding = zend_multibyte_detect_unicode(TSRMLS_C); + if (script_encoding != NULL) { + /* bom or wchar detection is prior to 'script_encoding' option */ + return script_encoding; + } + } + + /* if no script_encoding specified, just leave alone */ + if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) { + return NULL; + } + + /* if multiple encodings specified, detect automagically */ + if (CG(script_encoding_list_size) > 1) { + return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC); + } + + return CG(script_encoding_list)[0]; +} + +ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC) +{ + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C); + + if (!script_encoding) { + return FAILURE; + } + + /* judge input/output filter */ + LANG_SCNG(script_encoding) = script_encoding; + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = NULL; + + if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) { + if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { + /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */ + LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; + LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script; + } else { + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = NULL; + } + return SUCCESS; + } + + if (zend_multibyte_check_lexer_compatibility(internal_encoding)) { + LANG_SCNG(input_filter) = encoding_filter_script_to_internal; + LANG_SCNG(output_filter) = NULL; + } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = encoding_filter_script_to_internal; + } else { + /* both script and internal encodings are incompatible w/ flex */ + LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; + LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal; + } + + return 0; +} + ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) { - char *file_path = NULL, *buf; + const char *file_path = NULL; + char *buf; size_t size, offset = 0; - + /* The shebang line was read, get the current position to obtain the buffer start */ if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) { if ((offset = ftell(file_handle->handle.fp)) == -1) { @@ -275,32 +501,24 @@ ZEND_API int open_file_for_scanning(zend_file_handle * SCNG(yy_start) = NULL; if (size != -1) { -#ifdef ZEND_MULTIBYTE - if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) { - return FAILURE; - } + if (CG(multibyte)) { + SCNG(script_org) = (unsigned char*)buf; + SCNG(script_org_size) = size; + SCNG(script_filtered) = NULL; - SCNG(yy_in) = NULL; + zend_multibyte_set_filter(NULL TSRMLS_CC); - zend_multibyte_set_filter(NULL TSRMLS_CC); - - if (!SCNG(input_filter)) { - SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); - SCNG(script_filtered_size) = SCNG(script_org_size); - } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); - if (SCNG(script_filtered) == NULL) { - zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " - "encoding \"%s\" to a compatible encoding", LANG_SCNG(script_encoding)->name); + if (SCNG(input_filter)) { + if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } + buf = (char*)SCNG(script_filtered); + size = SCNG(script_filtered_size); } } - SCNG(yy_start) = SCNG(script_filtered) - offset; - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); -#else /* !ZEND_MULTIBYTE */ - SCNG(yy_start) = buf - offset; + SCNG(yy_start) = (unsigned char *)buf - offset; yy_scan_buffer(buf, size TSRMLS_CC); -#endif /* ZEND_MULTIBYTE */ } else { zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed"); } @@ -322,6 +540,7 @@ ZEND_API int open_file_for_scanning(zend_file_handle * CG(zend_lineno) = 1; } + RESET_DOC_COMMENT(); CG(increment_lineno) = 0; return SUCCESS; } @@ -361,10 +580,12 @@ ZEND_API zend_op_array *compile_file(zend_file_handle init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); CG(in_compilation) = 1; CG(active_op_array) = op_array; + zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context))); + zend_init_compiler_context(TSRMLS_C); compiler_result = zendparse(TSRMLS_C); zend_do_return(&retval_znode, 0 TSRMLS_CC); CG(in_compilation) = original_in_compilation; - if (compiler_result==1) { /* parser error */ + if (compiler_result != 0) { /* parser error */ zend_bailout(); } compilation_successful=1; @@ -374,7 +595,7 @@ ZEND_API zend_op_array *compile_file(zend_file_handle CG(active_op_array) = original_active_op_array; if (compilation_successful) { pass_two(op_array TSRMLS_CC); - zend_release_labels(TSRMLS_C); + zend_release_labels(0 TSRMLS_CC); } else { efree(op_array); retval = NULL; @@ -428,36 +649,49 @@ zend_op_array *compile_filename(int type, zval *filena ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC) { + char *buf; + size_t size; + /* enforce two trailing NULLs for flex... */ - str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD); + if (IS_INTERNED(str->value.str.val)) { + char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD); + memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD); + str->value.str.val = tmp; + } else { + str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD); + } memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD); - SCNG(yy_in)=NULL; + SCNG(yy_in) = NULL; SCNG(yy_start) = NULL; -#ifdef ZEND_MULTIBYTE - SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val); - SCNG(script_org_size) = str->value.str.len; + buf = str->value.str.val; + size = str->value.str.len; - zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC); + if (CG(multibyte)) { + SCNG(script_org) = (unsigned char*)buf; + SCNG(script_org_size) = size; + SCNG(script_filtered) = NULL; - if (!SCNG(input_filter)) { - SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); - SCNG(script_filtered_size) = SCNG(script_org_size); - } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); + zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC); + + if (SCNG(input_filter)) { + if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } + buf = (char*)SCNG(script_filtered); + size = SCNG(script_filtered_size); + } } - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); -#else /* !ZEND_MULTIBYTE */ - yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC); -#endif /* ZEND_MULTIBYTE */ + yy_scan_buffer(buf, size TSRMLS_CC); zend_set_compiled_filename(filename TSRMLS_CC); CG(zend_lineno) = 1; CG(increment_lineno) = 0; + RESET_DOC_COMMENT(); return SUCCESS; } @@ -465,13 +699,12 @@ ZEND_API int zend_prepare_string_for_scanning(zval *st ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D) { size_t offset = SCNG(yy_cursor) - SCNG(yy_start); -#ifdef ZEND_MULTIBYTE if (SCNG(input_filter)) { - size_t original_offset = offset, length = 0; do { + size_t original_offset = offset, length = 0; + do { unsigned char *p = NULL; - SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC); - if (!p) { - break; + if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) { + return (size_t)-1; } efree(p); if (length > original_offset) { @@ -481,7 +714,6 @@ ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D) } } while (original_offset != length); } -#endif return offset; } @@ -519,21 +751,17 @@ zend_op_array *compile_string(zval *source_string, cha init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); CG(interactive) = orig_interactive; CG(active_op_array) = op_array; + zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context))); + zend_init_compiler_context(TSRMLS_C); BEGIN(ST_IN_SCRIPTING); compiler_result = zendparse(TSRMLS_C); -#ifdef ZEND_MULTIBYTE - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; } -#endif /* ZEND_MULTIBYTE */ - if (compiler_result==1) { + if (compiler_result != 0) { CG(active_op_array) = original_active_op_array; CG(unclean_shutdown)=1; destroy_op_array(op_array TSRMLS_CC); @@ -543,7 +771,7 @@ zend_op_array *compile_string(zval *source_string, cha zend_do_return(NULL, 0 TSRMLS_CC); CG(active_op_array) = original_active_op_array; pass_two(op_array TSRMLS_CC); - zend_release_labels(TSRMLS_C); + zend_release_labels(0 TSRMLS_CC); retval = op_array; } } @@ -571,16 +799,10 @@ int highlight_file(char *filename, zend_syntax_highlig return FAILURE; } zend_highlight(syntax_highlighter_ini TSRMLS_CC); -#ifdef ZEND_MULTIBYTE - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; } -#endif /* ZEND_MULTIBYTE */ zend_destroy_file_handle(&file_handle TSRMLS_CC); zend_restore_lexical_state(&original_lex_state TSRMLS_CC); return SUCCESS; @@ -600,125 +822,47 @@ int highlight_string(zval *str, zend_syntax_highlighte } BEGIN(INITIAL); zend_highlight(syntax_highlighter_ini TSRMLS_CC); -#ifdef ZEND_MULTIBYTE - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; } -#endif /* ZEND_MULTIBYTE */ zend_restore_lexical_state(&original_lex_state TSRMLS_CC); zval_dtor(str); return SUCCESS; } -END_EXTERN_C() -#ifdef ZEND_MULTIBYTE - -BEGIN_EXTERN_C() -ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC) +ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC) { - size_t original_offset, offset, free_flag, new_len, length; - unsigned char *p; + size_t length; + unsigned char *new_yy_start; - /* calculate current position */ - offset = original_offset = YYCURSOR - SCNG(yy_start); - if (old_input_filter && offset > 0) { - zend_encoding *new_encoding = SCNG(script_encoding); - zend_encoding_filter new_filter = SCNG(input_filter); - SCNG(script_encoding) = old_encoding; - SCNG(input_filter) = old_input_filter; - offset = zend_get_scanned_file_offset(TSRMLS_C); - SCNG(script_encoding) = new_encoding; - SCNG(input_filter) = new_filter; - } - /* convert and set */ if (!SCNG(input_filter)) { - length = SCNG(script_org_size) - offset; - p = SCNG(script_org) + offset; - free_flag = 0; + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + SCNG(script_filtered_size) = 0; + length = SCNG(script_org_size); + new_yy_start = SCNG(script_org); } else { - SCNG(input_filter)(&p, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC); - free_flag = 1; - } - - new_len = original_offset + length; - - if (new_len > YYLIMIT - SCNG(yy_start)) { - unsigned char *new_yy_start = erealloc(SCNG(yy_start), new_len); - SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); - SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); - SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); - SCNG(yy_start) = new_yy_start; + if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } SCNG(script_filtered) = new_yy_start; - SCNG(script_filtered_size) = new_len; + SCNG(script_filtered_size) = length; } - SCNG(yy_limit) = SCNG(yy_start) + new_len; - memmove(SCNG(yy_start) + original_offset, p, length); + SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); + SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); + SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); + SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start)); - if (free_flag) { - efree(p); - } + SCNG(yy_start) = new_yy_start; } -ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC) -{ - size_t n; - - if (CG(interactive) == 0) { - if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) { - return FAILURE; - } - n = len; - return n; - } - - /* interactive */ - if (SCNG(script_org)) { - efree(SCNG(script_org)); - } - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - } - SCNG(script_org) = NULL; - SCNG(script_org_size) = 0; - - /* TODO: support widechars */ - if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) { - return FAILURE; - } - n = len; - - SCNG(script_org_size) = n; - SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1); - memcpy(SCNG(script_org), buf, n); - - return n; -} - - -ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC) -{ - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } - SCNG(script_org_size) = n; - - SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1); - memcpy(SCNG(script_org), buf, n); - *(SCNG(script_org)+SCNG(script_org_size)) = '\0'; - - return 0; -} - - # define zend_copy_value(zendlval, yytext, yyleng) \ if (SCNG(output_filter)) { \ size_t sz = 0; \ @@ -728,11 +872,6 @@ ZEND_API int zend_multibyte_read_script(unsigned char zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \ zendlval->value.str.len = yyleng; \ } -#else /* ZEND_MULTIBYTE */ -# define zend_copy_value(zendlval, yytext, yyleng) \ - zendlval->value.str.val = (char *)estrndup(yytext, yyleng); \ - zendlval->value.str.len = yyleng; -#endif /* ZEND_MULTIBYTE */ static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC) { @@ -773,6 +912,14 @@ static void zend_scan_escape_string(zval *zendlval, ch *t++ = '\v'; zendlval->value.str.len--; break; + case 'e': +#ifdef PHP_WIN32 + *t++ = VK_ESCAPE; +#else + *t++ = '\e'; +#endif + zendlval->value.str.len--; + break; case '"': case '`': if (*s != quote_type) { @@ -836,7 +983,6 @@ static void zend_scan_escape_string(zval *zendlval, ch s++; } *t = 0; -#ifdef ZEND_MULTIBYTE if (SCNG(output_filter)) { size_t sz = 0; s = zendlval->value.str.val; @@ -844,7 +990,6 @@ static void zend_scan_escape_string(zval *zendlval, ch zendlval->value.str.len = sz; efree(s); } -#endif /* ZEND_MULTIBYTE */ } @@ -861,6 +1006,7 @@ LNUM [0-9]+ DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*) EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM}) HNUM "0x"[0-9a-fA-F]+ +BNUM "0b"[01]+ LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* WHITESPACE [ \n\r\t]+ TABS_AND_SPACES [ \t]* @@ -1008,6 +1154,10 @@ NEWLINE ("\r"|"\n"|"\r\n") return T_INTERFACE; } +"trait" { + return T_TRAIT; +} + "extends" { return T_EXTENDS; } @@ -1074,14 +1224,10 @@ NEWLINE ("\r"|"\n"|"\r\n") return T_DOUBLE_CAST; } -"("{TABS_AND_SPACES}"string"{TABS_AND_SPACES}")" { +"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" { return T_STRING_CAST; } -"("{TABS_AND_SPACES}"binary"{TABS_AND_SPACES}")" { - return T_STRING_CAST; -} - "("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" { return T_ARRAY_CAST; } @@ -1126,6 +1272,10 @@ NEWLINE ("\r"|"\n"|"\r\n") return T_USE; } +"insteadof" { + return T_INSTEADOF; +} + "global" { return T_GLOBAL; } @@ -1182,6 +1332,10 @@ NEWLINE ("\r"|"\n"|"\r\n") return T_ARRAY; } +"callable" { + return T_CALLABLE; +} + "++" { return T_INC; } @@ -1328,7 +1482,31 @@ NEWLINE ("\r"|"\n"|"\r\n") goto restart; } +{BNUM} { + char *bin = yytext + 2; /* Skip "0b" */ + int len = yyleng - 2; + /* Skip any leading 0s */ + while (*bin == '0') { + ++bin; + --len; + } + + if (len < SIZEOF_LONG * 8) { + if (len == 0) { + zendlval->value.lval = 0; + } else { + zendlval->value.lval = strtol(bin, NULL, 2); + } + zendlval->type = IS_LONG; + return T_LNUMBER; + } else { + zendlval->value.dval = zend_bin_strtod(bin, NULL); + zendlval->type = IS_DOUBLE; + return T_DNUMBER; + } +} + {LNUM} { if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */ zendlval->value.lval = strtol(yytext, NULL, 0); @@ -1361,7 +1539,11 @@ NEWLINE ("\r"|"\n"|"\r\n") } if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) { - zendlval->value.lval = strtol(hex, NULL, 16); + if (len == 0) { + zendlval->value.lval = 0; + } else { + zendlval->value.lval = strtol(hex, NULL, 16); + } zendlval->type = IS_LONG; return T_LNUMBER; } else { @@ -1383,7 +1565,7 @@ NEWLINE ("\r"|"\n"|"\r\n") return T_NUM_STRING; } -{LNUM}|{HNUM} { /* Offset must be treated as a string */ +{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */ zendlval->value.str.val = (char *)estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; zendlval->type = IS_STRING; @@ -1397,23 +1579,54 @@ NEWLINE ("\r"|"\n"|"\r\n") } "__CLASS__" { - char *class_name = NULL; - - if (CG(active_class_entry)) { - class_name = CG(active_class_entry)->name; + const char *class_name = NULL; + + if (CG(active_class_entry) + && (ZEND_ACC_TRAIT == + (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) { + /* We create a special __CLASS__ constant that is going to be resolved + at run-time */ + zendlval->value.str.len = sizeof("__CLASS__")-1; + zendlval->value.str.val = estrndup("__CLASS__", zendlval->value.str.len); + zendlval->type = IS_CONSTANT; + } else { + if (CG(active_class_entry)) { + class_name = CG(active_class_entry)->name; + } + + if (!class_name) { + class_name = ""; + } + + zendlval->value.str.len = strlen(class_name); + zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len); + zendlval->type = IS_STRING; } + return T_CLASS_C; +} - if (!class_name) { - class_name = ""; +"__TRAIT__" { + const char *trait_name = NULL; + + if (CG(active_class_entry) + && (ZEND_ACC_TRAIT == + (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) { + trait_name = CG(active_class_entry)->name; } - zendlval->value.str.len = strlen(class_name); - zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len); + + if (!trait_name) { + trait_name = ""; + } + + zendlval->value.str.len = strlen(trait_name); + zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len); zendlval->type = IS_STRING; - return T_CLASS_C; + + return T_TRAIT_C; } "__FUNCTION__" { - char *func_name = NULL; + const char *func_name = NULL; if (CG(active_op_array)) { func_name = CG(active_op_array)->function_name; @@ -1429,8 +1642,8 @@ NEWLINE ("\r"|"\n"|"\r\n") } "__METHOD__" { - char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL; - char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL; + const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL; + const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL; size_t len = 0; if (class_name) { @@ -1440,7 +1653,7 @@ NEWLINE ("\r"|"\n"|"\r\n") len += strlen(func_name); } - zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s", + zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s", class_name ? class_name : "", class_name && func_name ? "::" : "", func_name ? func_name : "" @@ -1505,7 +1718,7 @@ NEWLINE ("\r"|"\n"|"\r\n") } "" { - YYCTYPE *bracket = zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1)); + YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1)); if (bracket != SCNG(yy_text)) { /* Handle previously scanned HTML, as possible