--- embedaddon/php/Zend/zend_language_scanner.c 2012/02/21 23:47:52 1.1.1.1 +++ embedaddon/php/Zend/zend_language_scanner.c 2013/07/22 01:32:16 1.1.1.3 @@ -1,10 +1,10 @@ -/* Generated by re2c 0.13.5 on Sun Jan 1 17:48:17 2012 */ +/* Generated by re2c 0.13.5 on Sun Jun 16 14:52:22 2013 */ #line 1 "Zend/zend_language_scanner.l" /* +----------------------------------------------------------------------+ | Zend Engine | +----------------------------------------------------------------------+ - | Copyright (c) 1998-2012 Zend Technologies Ltd. (http://www.zend.com) | + | Copyright (c) 1998-2013 Zend Technologies Ltd. (http://www.zend.com) | +----------------------------------------------------------------------+ | This source file is subject to version 2.00 of the Zend license, | | that is bundled with this package in the file LICENSE, and is | @@ -23,7 +23,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: zend_language_scanner.c,v 1.1.1.1 2012/02/21 23:47:52 misho Exp $ */ +/* $Id: zend_language_scanner.c,v 1.1.1.3 2013/07/22 01:32:16 misho Exp $ */ #if 0 # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c) @@ -35,6 +35,9 @@ #include #include "zend.h" +#ifdef PHP_WIN32 +# include +#endif #include "zend_alloc.h" #include #include "zend_compile.h" @@ -122,6 +125,33 @@ do { \ BEGIN_EXTERN_C() +static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); + return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC); +} + +static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC); +} + +static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + return zend_multibyte_encoding_converter(to, to_length, from, from_length, +LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC); +} + +static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) +{ + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); + return zend_multibyte_encoding_converter(to, to_length, from, from_length, +internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC); +} + + static void _yy_push_state(int new_state TSRMLS_DC) { zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int)); @@ -149,6 +179,7 @@ static void yy_scan_buffer(char *str, unsigned int len void startup_scanner(TSRMLS_D) { + CG(parse_error) = 0; CG(heredoc) = NULL; CG(heredoc_len) = 0; CG(doc_comment) = NULL; @@ -162,6 +193,7 @@ void shutdown_scanner(TSRMLS_D) efree(CG(heredoc)); CG(heredoc_len)=0; } + CG(parse_error) = 0; zend_stack_destroy(&SCNG(state_stack)); RESET_DOC_COMMENT(); } @@ -183,7 +215,6 @@ ZEND_API void zend_save_lexical_state(zend_lex_state * lex_state->filename = zend_get_compiled_filename(TSRMLS_C); lex_state->lineno = CG(zend_lineno); -#ifdef ZEND_MULTIBYTE lex_state->script_org = SCNG(script_org); lex_state->script_org_size = SCNG(script_org_size); lex_state->script_filtered = SCNG(script_filtered); @@ -191,8 +222,6 @@ ZEND_API void zend_save_lexical_state(zend_lex_state * lex_state->input_filter = SCNG(input_filter); lex_state->output_filter = SCNG(output_filter); lex_state->script_encoding = SCNG(script_encoding); - lex_state->internal_encoding = SCNG(internal_encoding); -#endif /* ZEND_MULTIBYTE */ } ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) @@ -211,11 +240,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_stat YYSETCONDITION(lex_state->yy_state); CG(zend_lineno) = lex_state->lineno; zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); -#ifdef ZEND_MULTIBYTE - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } + if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; @@ -227,14 +252,13 @@ ZEND_API void zend_restore_lexical_state(zend_lex_stat SCNG(input_filter) = lex_state->input_filter; SCNG(output_filter) = lex_state->output_filter; SCNG(script_encoding) = lex_state->script_encoding; - SCNG(internal_encoding) = lex_state->internal_encoding; -#endif /* ZEND_MULTIBYTE */ if (CG(heredoc)) { efree(CG(heredoc)); CG(heredoc) = NULL; CG(heredoc_len) = 0; } + RESET_DOC_COMMENT(); } ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) @@ -247,12 +271,214 @@ ZEND_API void zend_destroy_file_handle(zend_file_handl } } +#define BOM_UTF32_BE "\x00\x00\xfe\xff" +#define BOM_UTF32_LE "\xff\xfe\x00\x00" +#define BOM_UTF16_BE "\xfe\xff" +#define BOM_UTF16_LE "\xff\xfe" +#define BOM_UTF8 "\xef\xbb\xbf" +static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC) +{ + const unsigned char *p; + int wchar_size = 2; + int le = 0; + + /* utf-16 or utf-32? */ + p = script; + while ((p-script) < script_size) { + p = memchr(p, 0, script_size-(p-script)-2); + if (!p) { + break; + } + if (*(p+1) == '\0' && *(p+2) == '\0') { + wchar_size = 4; + break; + } + + /* searching for UTF-32 specific byte orders, so this will do */ + p += 4; + } + + /* BE or LE? */ + p = script; + while ((p-script) < script_size) { + if (*p == '\0' && *(p+wchar_size-1) != '\0') { + /* BE */ + le = 0; + break; + } else if (*p != '\0' && *(p+wchar_size-1) == '\0') { + /* LE* */ + le = 1; + break; + } + p += wchar_size; + } + + if (wchar_size == 2) { + return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be; + } else { + return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be; + } + + return NULL; +} + +static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D) +{ + const zend_encoding *script_encoding = NULL; + int bom_size; + unsigned char *pos1, *pos2; + + if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) { + return NULL; + } + + /* check out BOM */ + if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) { + script_encoding = zend_multibyte_encoding_utf32be; + bom_size = sizeof(BOM_UTF32_BE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) { + script_encoding = zend_multibyte_encoding_utf32le; + bom_size = sizeof(BOM_UTF32_LE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) { + script_encoding = zend_multibyte_encoding_utf16be; + bom_size = sizeof(BOM_UTF16_BE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) { + script_encoding = zend_multibyte_encoding_utf16le; + bom_size = sizeof(BOM_UTF16_LE)-1; + } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) { + script_encoding = zend_multibyte_encoding_utf8; + bom_size = sizeof(BOM_UTF8)-1; + } + + if (script_encoding) { + /* remove BOM */ + LANG_SCNG(script_org) += bom_size; + LANG_SCNG(script_org_size) -= bom_size; + + return script_encoding; + } + + /* script contains NULL bytes -> auto-detection */ + if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) { + /* check if the NULL byte is after the __HALT_COMPILER(); */ + pos2 = LANG_SCNG(script_org); + + while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) { + pos2 = memchr(pos2, '_', pos1 - pos2); + if (!pos2) break; + pos2++; + if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) { + pos2 += sizeof("_HALT_COMPILER")-1; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == '(') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ')') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ';') { + return NULL; + } + } + } + } + } + /* make best effort if BOM is missing */ + return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC); + } + + return NULL; +} + +static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D) +{ + const zend_encoding *script_encoding; + + if (CG(detect_unicode)) { + /* check out bom(byte order mark) and see if containing wchars */ + script_encoding = zend_multibyte_detect_unicode(TSRMLS_C); + if (script_encoding != NULL) { + /* bom or wchar detection is prior to 'script_encoding' option */ + return script_encoding; + } + } + + /* if no script_encoding specified, just leave alone */ + if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) { + return NULL; + } + + /* if multiple encodings specified, detect automagically */ + if (CG(script_encoding_list_size) > 1) { + return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC); + } + + return CG(script_encoding_list)[0]; +} + +ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC) +{ + const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); + const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C); + + if (!script_encoding) { + return FAILURE; + } + + /* judge input/output filter */ + LANG_SCNG(script_encoding) = script_encoding; + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = NULL; + + if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) { + if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { + /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */ + LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; + LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script; + } else { + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = NULL; + } + return SUCCESS; + } + + if (zend_multibyte_check_lexer_compatibility(internal_encoding)) { + LANG_SCNG(input_filter) = encoding_filter_script_to_internal; + LANG_SCNG(output_filter) = NULL; + } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { + LANG_SCNG(input_filter) = NULL; + LANG_SCNG(output_filter) = encoding_filter_script_to_internal; + } else { + /* both script and internal encodings are incompatible w/ flex */ + LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; + LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal; + } + + return 0; +} + ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) { - char *file_path = NULL, *buf; + const char *file_path = NULL; + char *buf; size_t size, offset = 0; - + /* The shebang line was read, get the current position to obtain the buffer start */ if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) { if ((offset = ftell(file_handle->handle.fp)) == -1) { @@ -277,32 +503,24 @@ ZEND_API int open_file_for_scanning(zend_file_handle * SCNG(yy_start) = NULL; if (size != -1) { -#ifdef ZEND_MULTIBYTE - if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) { - return FAILURE; - } + if (CG(multibyte)) { + SCNG(script_org) = (unsigned char*)buf; + SCNG(script_org_size) = size; + SCNG(script_filtered) = NULL; - SCNG(yy_in) = NULL; + zend_multibyte_set_filter(NULL TSRMLS_CC); - zend_multibyte_set_filter(NULL TSRMLS_CC); - - if (!SCNG(input_filter)) { - SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); - SCNG(script_filtered_size) = SCNG(script_org_size); - } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); - if (SCNG(script_filtered) == NULL) { - zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " - "encoding \"%s\" to a compatible encoding", LANG_SCNG(script_encoding)->name); + if (SCNG(input_filter)) { + if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } + buf = (char*)SCNG(script_filtered); + size = SCNG(script_filtered_size); } } - SCNG(yy_start) = SCNG(script_filtered) - offset; - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); -#else /* !ZEND_MULTIBYTE */ - SCNG(yy_start) = buf - offset; + SCNG(yy_start) = (unsigned char *)buf - offset; yy_scan_buffer(buf, size TSRMLS_CC); -#endif /* ZEND_MULTIBYTE */ } else { zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed"); } @@ -324,6 +542,7 @@ ZEND_API int open_file_for_scanning(zend_file_handle * CG(zend_lineno) = 1; } + RESET_DOC_COMMENT(); CG(increment_lineno) = 0; return SUCCESS; } @@ -363,10 +582,12 @@ ZEND_API zend_op_array *compile_file(zend_file_handle init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); CG(in_compilation) = 1; CG(active_op_array) = op_array; + zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context))); + zend_init_compiler_context(TSRMLS_C); compiler_result = zendparse(TSRMLS_C); zend_do_return(&retval_znode, 0 TSRMLS_CC); CG(in_compilation) = original_in_compilation; - if (compiler_result==1) { /* parser error */ + if (compiler_result != 0) { /* parser error */ zend_bailout(); } compilation_successful=1; @@ -376,7 +597,7 @@ ZEND_API zend_op_array *compile_file(zend_file_handle CG(active_op_array) = original_active_op_array; if (compilation_successful) { pass_two(op_array TSRMLS_CC); - zend_release_labels(TSRMLS_C); + zend_release_labels(0 TSRMLS_CC); } else { efree(op_array); retval = NULL; @@ -430,36 +651,49 @@ zend_op_array *compile_filename(int type, zval *filena ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC) { + char *buf; + size_t size; + /* enforce two trailing NULLs for flex... */ - str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD); + if (IS_INTERNED(str->value.str.val)) { + char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD); + memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD); + str->value.str.val = tmp; + } else { + str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD); + } memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD); - SCNG(yy_in)=NULL; + SCNG(yy_in) = NULL; SCNG(yy_start) = NULL; -#ifdef ZEND_MULTIBYTE - SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val); - SCNG(script_org_size) = str->value.str.len; + buf = str->value.str.val; + size = str->value.str.len; - zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC); + if (CG(multibyte)) { + SCNG(script_org) = (unsigned char*)buf; + SCNG(script_org_size) = size; + SCNG(script_filtered) = NULL; - if (!SCNG(input_filter)) { - SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); - SCNG(script_filtered_size) = SCNG(script_org_size); - } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); + zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC); + + if (SCNG(input_filter)) { + if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } + buf = (char*)SCNG(script_filtered); + size = SCNG(script_filtered_size); + } } - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); -#else /* !ZEND_MULTIBYTE */ - yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC); -#endif /* ZEND_MULTIBYTE */ + yy_scan_buffer(buf, size TSRMLS_CC); zend_set_compiled_filename(filename TSRMLS_CC); CG(zend_lineno) = 1; CG(increment_lineno) = 0; + RESET_DOC_COMMENT(); return SUCCESS; } @@ -467,13 +701,12 @@ ZEND_API int zend_prepare_string_for_scanning(zval *st ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D) { size_t offset = SCNG(yy_cursor) - SCNG(yy_start); -#ifdef ZEND_MULTIBYTE if (SCNG(input_filter)) { - size_t original_offset = offset, length = 0; do { + size_t original_offset = offset, length = 0; + do { unsigned char *p = NULL; - SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC); - if (!p) { - break; + if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) { + return (size_t)-1; } efree(p); if (length > original_offset) { @@ -483,7 +716,6 @@ ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D) } } while (original_offset != length); } -#endif return offset; } @@ -521,21 +753,17 @@ zend_op_array *compile_string(zval *source_string, cha init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); CG(interactive) = orig_interactive; CG(active_op_array) = op_array; + zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context))); + zend_init_compiler_context(TSRMLS_C); BEGIN(ST_IN_SCRIPTING); compiler_result = zendparse(TSRMLS_C); -#ifdef ZEND_MULTIBYTE - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; } -#endif /* ZEND_MULTIBYTE */ - if (compiler_result==1) { + if (compiler_result != 0) { CG(active_op_array) = original_active_op_array; CG(unclean_shutdown)=1; destroy_op_array(op_array TSRMLS_CC); @@ -545,7 +773,7 @@ zend_op_array *compile_string(zval *source_string, cha zend_do_return(NULL, 0 TSRMLS_CC); CG(active_op_array) = original_active_op_array; pass_two(op_array TSRMLS_CC); - zend_release_labels(TSRMLS_C); + zend_release_labels(0 TSRMLS_CC); retval = op_array; } } @@ -573,16 +801,10 @@ int highlight_file(char *filename, zend_syntax_highlig return FAILURE; } zend_highlight(syntax_highlighter_ini TSRMLS_CC); -#ifdef ZEND_MULTIBYTE - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; } -#endif /* ZEND_MULTIBYTE */ zend_destroy_file_handle(&file_handle TSRMLS_CC); zend_restore_lexical_state(&original_lex_state TSRMLS_CC); return SUCCESS; @@ -602,125 +824,47 @@ int highlight_string(zval *str, zend_syntax_highlighte } BEGIN(INITIAL); zend_highlight(syntax_highlighter_ini TSRMLS_CC); -#ifdef ZEND_MULTIBYTE - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; } -#endif /* ZEND_MULTIBYTE */ zend_restore_lexical_state(&original_lex_state TSRMLS_CC); zval_dtor(str); return SUCCESS; } -END_EXTERN_C() -#ifdef ZEND_MULTIBYTE - -BEGIN_EXTERN_C() -ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC) +ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC) { - size_t original_offset, offset, free_flag, new_len, length; - unsigned char *p; + size_t length; + unsigned char *new_yy_start; - /* calculate current position */ - offset = original_offset = YYCURSOR - SCNG(yy_start); - if (old_input_filter && offset > 0) { - zend_encoding *new_encoding = SCNG(script_encoding); - zend_encoding_filter new_filter = SCNG(input_filter); - SCNG(script_encoding) = old_encoding; - SCNG(input_filter) = old_input_filter; - offset = zend_get_scanned_file_offset(TSRMLS_C); - SCNG(script_encoding) = new_encoding; - SCNG(input_filter) = new_filter; - } - /* convert and set */ if (!SCNG(input_filter)) { - length = SCNG(script_org_size) - offset; - p = SCNG(script_org) + offset; - free_flag = 0; + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + SCNG(script_filtered_size) = 0; + length = SCNG(script_org_size); + new_yy_start = SCNG(script_org); } else { - SCNG(input_filter)(&p, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC); - free_flag = 1; - } - - new_len = original_offset + length; - - if (new_len > YYLIMIT - SCNG(yy_start)) { - unsigned char *new_yy_start = erealloc(SCNG(yy_start), new_len); - SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); - SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); - SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); - SCNG(yy_start) = new_yy_start; + if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } SCNG(script_filtered) = new_yy_start; - SCNG(script_filtered_size) = new_len; + SCNG(script_filtered_size) = length; } - SCNG(yy_limit) = SCNG(yy_start) + new_len; - memmove(SCNG(yy_start) + original_offset, p, length); + SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); + SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); + SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); + SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start)); - if (free_flag) { - efree(p); - } + SCNG(yy_start) = new_yy_start; } -ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC) -{ - size_t n; - - if (CG(interactive) == 0) { - if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) { - return FAILURE; - } - n = len; - return n; - } - - /* interactive */ - if (SCNG(script_org)) { - efree(SCNG(script_org)); - } - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - } - SCNG(script_org) = NULL; - SCNG(script_org_size) = 0; - - /* TODO: support widechars */ - if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) { - return FAILURE; - } - n = len; - - SCNG(script_org_size) = n; - SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1); - memcpy(SCNG(script_org), buf, n); - - return n; -} - - -ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC) -{ - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } - SCNG(script_org_size) = n; - - SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1); - memcpy(SCNG(script_org), buf, n); - *(SCNG(script_org)+SCNG(script_org_size)) = '\0'; - - return 0; -} - - # define zend_copy_value(zendlval, yytext, yyleng) \ if (SCNG(output_filter)) { \ size_t sz = 0; \ @@ -730,11 +874,6 @@ ZEND_API int zend_multibyte_read_script(unsigned char zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \ zendlval->value.str.len = yyleng; \ } -#else /* ZEND_MULTIBYTE */ -# define zend_copy_value(zendlval, yytext, yyleng) \ - zendlval->value.str.val = (char *)estrndup(yytext, yyleng); \ - zendlval->value.str.len = yyleng; -#endif /* ZEND_MULTIBYTE */ static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC) { @@ -775,6 +914,14 @@ static void zend_scan_escape_string(zval *zendlval, ch *t++ = '\v'; zendlval->value.str.len--; break; + case 'e': +#ifdef PHP_WIN32 + *t++ = VK_ESCAPE; +#else + *t++ = '\e'; +#endif + zendlval->value.str.len--; + break; case '"': case '`': if (*s != quote_type) { @@ -838,7 +985,6 @@ static void zend_scan_escape_string(zval *zendlval, ch s++; } *t = 0; -#ifdef ZEND_MULTIBYTE if (SCNG(output_filter)) { size_t sz = 0; s = zendlval->value.str.val; @@ -846,7 +992,6 @@ static void zend_scan_escape_string(zval *zendlval, ch zendlval->value.str.len = sz; efree(s); } -#endif /* ZEND_MULTIBYTE */ } @@ -858,7 +1003,7 @@ restart: yymore_restart: -#line 862 "Zend/zend_language_scanner.c" +#line 1007 "Zend/zend_language_scanner.c" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -957,7 +1102,7 @@ yyc_INITIAL: yy3: YYDEBUG(3, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1586 "Zend/zend_language_scanner.l" +#line 1795 "Zend/zend_language_scanner.l" { if (YYCURSOR > YYLIMIT) { return 0; @@ -973,7 +1118,7 @@ inline_char_handler: if (YYCURSOR < YYLIMIT) { switch (*YYCURSOR) { case '?': - if (CG(short_tags) || !strncasecmp(YYCURSOR + 1, "php", 3)) { /* Assume [ \t\n\r] follows "php" */ + if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */ break; } continue; @@ -1001,7 +1146,6 @@ inline_char_handler: inline_html: yyleng = YYCURSOR - SCNG(yy_text); -#ifdef ZEND_MULTIBYTE if (SCNG(output_filter)) { int readsize; size_t sz = 0; @@ -1014,15 +1158,11 @@ inline_html: zendlval->value.str.val = (char *) estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; } -#else /* !ZEND_MULTIBYTE */ - zendlval->value.str.val = (char *) estrndup(yytext, yyleng); - zendlval->value.str.len = yyleng; -#endif zendlval->type = IS_STRING; HANDLE_NEWLINES(yytext, yyleng); return T_INLINE_HTML; } -#line 1026 "Zend/zend_language_scanner.c" +#line 1166 "Zend/zend_language_scanner.c" yy4: YYDEBUG(4, *YYCURSOR); yych = *++YYCURSOR; @@ -1040,7 +1180,7 @@ yy5: yy6: YYDEBUG(6, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1574 "Zend/zend_language_scanner.l" +#line 1783 "Zend/zend_language_scanner.l" { if (CG(short_tags)) { zendlval->value.str.val = yytext; /* no copying - intentional */ @@ -1052,14 +1192,14 @@ yy6: goto inline_char_handler; } } -#line 1056 "Zend/zend_language_scanner.c" +#line 1196 "Zend/zend_language_scanner.c" yy7: YYDEBUG(7, *YYCURSOR); ++YYCURSOR; if ((yych = *YYCURSOR) == '=') goto yy43; YYDEBUG(8, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1551 "Zend/zend_language_scanner.l" +#line 1760 "Zend/zend_language_scanner.l" { if (CG(asp_tags)) { zendlval->value.str.val = yytext; /* no copying - intentional */ @@ -1071,7 +1211,7 @@ yy7: goto inline_char_handler; } } -#line 1075 "Zend/zend_language_scanner.c" +#line 1215 "Zend/zend_language_scanner.c" yy9: YYDEBUG(9, *YYCURSOR); yych = *++YYCURSOR; @@ -1257,9 +1397,9 @@ yy35: ++YYCURSOR; YYDEBUG(38, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1507 "Zend/zend_language_scanner.l" +#line 1720 "Zend/zend_language_scanner.l" { - YYCTYPE *bracket = zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1)); + YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1)); if (bracket != SCNG(yy_text)) { /* Handle previously scanned HTML, as possible