Return to tokenizer.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / tokenizer |
1.1 ! misho 1: /* ! 2: +----------------------------------------------------------------------+ ! 3: | PHP Version 5 | ! 4: +----------------------------------------------------------------------+ ! 5: | Copyright (c) 1997-2012 The PHP Group | ! 6: +----------------------------------------------------------------------+ ! 7: | This source file is subject to version 3.01 of the PHP license, | ! 8: | that is bundled with this package in the file LICENSE, and is | ! 9: | available through the world-wide-web at the following url: | ! 10: | http://www.php.net/license/3_01.txt | ! 11: | If you did not receive a copy of the PHP license and are unable to | ! 12: | obtain it through the world-wide-web, please send a note to | ! 13: | license@php.net so we can mail you a copy immediately. | ! 14: +----------------------------------------------------------------------+ ! 15: | Author: Andrei Zmievski <andrei@php.net> | ! 16: +----------------------------------------------------------------------+ ! 17: */ ! 18: ! 19: /* $Id: tokenizer.c 321634 2012-01-01 13:15:04Z felipe $ */ ! 20: ! 21: #ifdef HAVE_CONFIG_H ! 22: #include "config.h" ! 23: #endif ! 24: ! 25: #include "php.h" ! 26: #include "php_ini.h" ! 27: #include "ext/standard/info.h" ! 28: #include "php_tokenizer.h" ! 29: ! 30: #include "zend.h" ! 31: #include "zend_language_scanner.h" ! 32: #include "zend_language_scanner_defs.h" ! 33: #include <zend_language_parser.h> ! 34: ! 35: #define zendtext LANG_SCNG(yy_text) ! 36: #define zendleng LANG_SCNG(yy_leng) ! 37: ! 38: /* {{{ arginfo */ ! 39: ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1) ! 40: ZEND_ARG_INFO(0, source) ! 41: ZEND_END_ARG_INFO() ! 42: ! 43: ZEND_BEGIN_ARG_INFO_EX(arginfo_token_name, 0, 0, 1) ! 44: ZEND_ARG_INFO(0, token) ! 45: ZEND_END_ARG_INFO() ! 46: /* }}} */ ! 47: ! 48: /* {{{ tokenizer_functions[] ! 49: * ! 50: * Every user visible function must have an entry in tokenizer_functions[]. ! 51: */ ! 52: const zend_function_entry tokenizer_functions[] = { ! 53: PHP_FE(token_get_all, arginfo_token_get_all) ! 54: PHP_FE(token_name, arginfo_token_name) ! 55: PHP_FE_END ! 56: }; ! 57: /* }}} */ ! 58: ! 59: /* {{{ tokenizer_module_entry ! 60: */ ! 61: zend_module_entry tokenizer_module_entry = { ! 62: #if ZEND_MODULE_API_NO >= 20010901 ! 63: STANDARD_MODULE_HEADER, ! 64: #endif ! 65: "tokenizer", ! 66: tokenizer_functions, ! 67: PHP_MINIT(tokenizer), ! 68: NULL, ! 69: NULL, ! 70: NULL, ! 71: PHP_MINFO(tokenizer), ! 72: #if ZEND_MODULE_API_NO >= 20010901 ! 73: "0.1", /* Replace with version number for your extension */ ! 74: #endif ! 75: STANDARD_MODULE_PROPERTIES ! 76: }; ! 77: /* }}} */ ! 78: ! 79: #ifdef COMPILE_DL_TOKENIZER ! 80: ZEND_GET_MODULE(tokenizer) ! 81: #endif ! 82: ! 83: /* {{{ PHP_MINIT_FUNCTION ! 84: */ ! 85: PHP_MINIT_FUNCTION(tokenizer) ! 86: { ! 87: tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU); ! 88: return SUCCESS; ! 89: } ! 90: /* }}} */ ! 91: ! 92: /* {{{ PHP_MINFO_FUNCTION ! 93: */ ! 94: PHP_MINFO_FUNCTION(tokenizer) ! 95: { ! 96: php_info_print_table_start(); ! 97: php_info_print_table_row(2, "Tokenizer Support", "enabled"); ! 98: php_info_print_table_end(); ! 99: } ! 100: /* }}} */ ! 101: ! 102: static void tokenize(zval *return_value TSRMLS_DC) ! 103: { ! 104: zval token; ! 105: zval *keyword; ! 106: int token_type; ! 107: zend_bool destroy; ! 108: int token_line = 1; ! 109: ! 110: array_init(return_value); ! 111: ! 112: ZVAL_NULL(&token); ! 113: while ((token_type = lex_scan(&token TSRMLS_CC))) { ! 114: destroy = 1; ! 115: switch (token_type) { ! 116: case T_CLOSE_TAG: ! 117: if (zendtext[zendleng - 1] != '>') { ! 118: CG(zend_lineno)++; ! 119: } ! 120: case T_OPEN_TAG: ! 121: case T_OPEN_TAG_WITH_ECHO: ! 122: case T_WHITESPACE: ! 123: case T_COMMENT: ! 124: case T_DOC_COMMENT: ! 125: destroy = 0; ! 126: break; ! 127: } ! 128: ! 129: if (token_type >= 256) { ! 130: MAKE_STD_ZVAL(keyword); ! 131: array_init(keyword); ! 132: add_next_index_long(keyword, token_type); ! 133: if (token_type == T_END_HEREDOC) { ! 134: if (CG(increment_lineno)) { ! 135: token_line = ++CG(zend_lineno); ! 136: CG(increment_lineno) = 0; ! 137: } ! 138: add_next_index_stringl(keyword, Z_STRVAL(token), Z_STRLEN(token), 1); ! 139: efree(Z_STRVAL(token)); ! 140: } else { ! 141: add_next_index_stringl(keyword, (char *)zendtext, zendleng, 1); ! 142: } ! 143: add_next_index_long(keyword, token_line); ! 144: add_next_index_zval(return_value, keyword); ! 145: } else { ! 146: add_next_index_stringl(return_value, (char *)zendtext, zendleng, 1); ! 147: } ! 148: if (destroy && Z_TYPE(token) != IS_NULL) { ! 149: zval_dtor(&token); ! 150: } ! 151: ZVAL_NULL(&token); ! 152: ! 153: token_line = CG(zend_lineno); ! 154: ! 155: if (token_type == T_HALT_COMPILER) { ! 156: break; ! 157: } ! 158: } ! 159: } ! 160: ! 161: /* {{{ proto array token_get_all(string source) ! 162: */ ! 163: PHP_FUNCTION(token_get_all) ! 164: { ! 165: char *source = NULL; ! 166: int argc = ZEND_NUM_ARGS(); ! 167: int source_len; ! 168: zval source_z; ! 169: zend_lex_state original_lex_state; ! 170: ! 171: if (zend_parse_parameters(argc TSRMLS_CC, "s", &source, &source_len) == FAILURE) ! 172: return; ! 173: ! 174: ZVAL_STRINGL(&source_z, source, source_len, 1); ! 175: zend_save_lexical_state(&original_lex_state TSRMLS_CC); ! 176: ! 177: if (zend_prepare_string_for_scanning(&source_z, "" TSRMLS_CC) == FAILURE) { ! 178: zend_restore_lexical_state(&original_lex_state TSRMLS_CC); ! 179: RETURN_EMPTY_STRING(); ! 180: } ! 181: ! 182: LANG_SCNG(yy_state) = yycINITIAL; ! 183: ! 184: tokenize(return_value TSRMLS_CC); ! 185: ! 186: zend_restore_lexical_state(&original_lex_state TSRMLS_CC); ! 187: zval_dtor(&source_z); ! 188: } ! 189: /* }}} */ ! 190: ! 191: /* {{{ proto string token_name(int type) ! 192: */ ! 193: PHP_FUNCTION(token_name) ! 194: { ! 195: int argc = ZEND_NUM_ARGS(); ! 196: long type; ! 197: ! 198: if (zend_parse_parameters(argc TSRMLS_CC, "l", &type) == FAILURE) { ! 199: return; ! 200: } ! 201: RETVAL_STRING(get_token_type_name(type), 1); ! 202: } ! 203: /* }}} */ ! 204: ! 205: /* ! 206: * Local variables: ! 207: * tab-width: 4 ! 208: * c-basic-offset: 4 ! 209: * End: ! 210: * vim600: noet sw=4 ts=4 fdm=marker ! 211: * vim<600: noet sw=4 ts=4 ! 212: */