File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / standard / url_scanner_ex.re
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 01:32:05 2013 UTC (11 years ago) by misho
Branches: php, MAIN
CVS tags: v5_4_17, HEAD
5.4.17

    1: /*
    2:   +----------------------------------------------------------------------+
    3:   | PHP Version 5                                                        |
    4:   +----------------------------------------------------------------------+
    5:   | Copyright (c) 1997-2013 The PHP Group                                |
    6:   +----------------------------------------------------------------------+
    7:   | This source file is subject to version 3.01 of the PHP license,      |
    8:   | that is bundled with this package in the file LICENSE, and is        |
    9:   | available through the world-wide-web at the following url:           |
   10:   | http://www.php.net/license/3_01.txt                                  |
   11:   | If you did not receive a copy of the PHP license and are unable to   |
   12:   | obtain it through the world-wide-web, please send a note to          |
   13:   | license@php.net so we can mail you a copy immediately.               |
   14:   +----------------------------------------------------------------------+
   15:   | Author: Sascha Schumann <sascha@schumann.cx>                         |
   16:   +----------------------------------------------------------------------+
   17: */
   18: 
   19: /* $Id: url_scanner_ex.re,v 1.1.1.3 2013/07/22 01:32:05 misho Exp $ */
   20: 
   21: #include "php.h"
   22: 
   23: #ifdef HAVE_UNISTD_H
   24: #include <unistd.h>
   25: #endif
   26: #ifdef HAVE_LIMITS_H
   27: #include <limits.h>
   28: #endif
   29: 
   30: #include <stdio.h>
   31: #include <stdlib.h>
   32: #include <string.h>
   33: 
   34: #include "php_ini.h"
   35: #include "php_globals.h"
   36: #define STATE_TAG SOME_OTHER_STATE_TAG
   37: #include "basic_functions.h"
   38: #include "url.h"
   39: #undef STATE_TAG
   40: 
   41: #define url_scanner url_scanner_ex
   42: 
   43: #include "php_smart_str.h"
   44: 
   45: static PHP_INI_MH(OnUpdateTags)
   46: {
   47: 	url_adapt_state_ex_t *ctx;
   48: 	char *key;
   49: 	char *lasts;
   50: 	char *tmp;
   51: 	
   52: 	ctx = &BG(url_adapt_state_ex);
   53: 	
   54: 	tmp = estrndup(new_value, new_value_length);
   55: 	
   56: 	if (ctx->tags)
   57: 		zend_hash_destroy(ctx->tags);
   58: 	else {
   59: 		ctx->tags = malloc(sizeof(HashTable));
   60: 		if (!ctx->tags) {
   61: 			return FAILURE;
   62: 		}
   63: 	}
   64: 
   65: 	zend_hash_init(ctx->tags, 0, NULL, NULL, 1);
   66: 	
   67: 	for (key = php_strtok_r(tmp, ",", &lasts);
   68: 			key;
   69: 			key = php_strtok_r(NULL, ",", &lasts)) {
   70: 		char *val;
   71: 
   72: 		val = strchr(key, '=');
   73: 		if (val) {
   74: 			char *q;
   75: 			int keylen;
   76: 			
   77: 			*val++ = '\0';
   78: 			for (q = key; *q; q++)
   79: 				*q = tolower(*q);
   80: 			keylen = q - key;
   81: 			/* key is stored withOUT NUL
   82: 			   val is stored WITH    NUL */
   83: 			zend_hash_add(ctx->tags, key, keylen, val, strlen(val)+1, NULL);
   84: 		}
   85: 	}
   86: 
   87: 	efree(tmp);
   88: 
   89: 	return SUCCESS;
   90: }
   91: 
   92: PHP_INI_BEGIN()
   93: 	STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
   94: PHP_INI_END()
   95: 
   96: /*!re2c
   97: any = [\000-\377];
   98: N = (any\[<]);
   99: alpha = [a-zA-Z];
  100: alphanamespace = [a-zA-Z:];
  101: alphadash = ([a-zA-Z] | "-");
  102: */
  103: 
  104: #define YYFILL(n) goto done
  105: #define YYCTYPE unsigned char
  106: #define YYCURSOR p
  107: #define YYLIMIT q
  108: #define YYMARKER r
  109: 	
  110: static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
  111: {
  112: 	register const char *p, *q;
  113: 	const char *bash = NULL;
  114: 	const char *sep = "?";
  115: 	
  116: 	q = (p = url->c) + url->len;
  117: 
  118: scan:
  119: /*!re2c
  120:   ":"		{ smart_str_append(dest, url); return; }
  121:   "?"		{ sep = separator; goto scan; }
  122:   "#"		{ bash = p - 1; goto done; }
  123:   (any\[:?#])+		{ goto scan; }
  124: */
  125: done:
  126: 	
  127: 	/* Don't modify URLs of the format "#mark" */
  128: 	if (bash && bash - url->c == 0) {
  129: 		smart_str_append(dest, url);
  130: 		return;
  131: 	}
  132: 
  133: 	if (bash)
  134: 		smart_str_appendl(dest, url->c, bash - url->c);
  135: 	else
  136: 		smart_str_append(dest, url);
  137: 
  138: 	smart_str_appends(dest, sep);
  139: 	smart_str_append(dest, url_app);
  140: 
  141: 	if (bash)
  142: 		smart_str_appendl(dest, bash, q - bash);
  143: }
  144: 
  145: 
  146: #undef YYFILL
  147: #undef YYCTYPE
  148: #undef YYCURSOR
  149: #undef YYLIMIT
  150: #undef YYMARKER
  151: 
  152: static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC)
  153: {
  154: 	char f = 0;
  155: 
  156: 	if (strncasecmp(ctx->arg.c, ctx->lookup_data, ctx->arg.len) == 0)
  157: 		f = 1;
  158: 
  159: 	if (quotes)
  160: 		smart_str_appendc(&ctx->result, type);
  161: 	if (f) {
  162: 		append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
  163: 	} else {
  164: 		smart_str_append(&ctx->result, &ctx->val);
  165: 	}
  166: 	if (quotes)
  167: 		smart_str_appendc(&ctx->result, type);
  168: }
  169: 
  170: enum {
  171: 	STATE_PLAIN = 0,
  172: 	STATE_TAG,
  173: 	STATE_NEXT_ARG,
  174: 	STATE_ARG,
  175: 	STATE_BEFORE_VAL,
  176: 	STATE_VAL
  177: };
  178: 
  179: #define YYFILL(n) goto stop
  180: #define YYCTYPE unsigned char
  181: #define YYCURSOR xp
  182: #define YYLIMIT end
  183: #define YYMARKER q
  184: #define STATE ctx->state
  185: 
  186: #define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC
  187: #define STD_ARGS ctx, start, xp TSRMLS_CC
  188: 
  189: #if SCANNER_DEBUG
  190: #define scdebug(x) printf x
  191: #else
  192: #define scdebug(x)
  193: #endif
  194: 
  195: static inline void passthru(STD_PARA) 
  196: {
  197: 	scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
  198: 	smart_str_appendl(&ctx->result, start, YYCURSOR - start);
  199: }
  200: 
  201: /*
  202:  * This function appends a hidden input field after a <form> or
  203:  * <fieldset>.  The latter is important for XHTML.
  204:  */
  205: 
  206: static void handle_form(STD_PARA) 
  207: {
  208: 	int doit = 0;
  209: 
  210: 	if (ctx->form_app.len > 0) {
  211: 		switch (ctx->tag.len) {
  212: 			case sizeof("form") - 1:
  213: 				if (!strncasecmp(ctx->tag.c, "form", sizeof("form") - 1)) {
  214: 					doit = 1;		
  215: 				}
  216: 				if (doit && ctx->val.c && ctx->lookup_data && *ctx->lookup_data) {
  217: 					char *e, *p = zend_memnstr(ctx->val.c, "://", sizeof("://") - 1, ctx->val.c + ctx->val.len);
  218: 					if (p) {
  219: 						e = memchr(p, '/', (ctx->val.c + ctx->val.len) - p);
  220: 						if (!e) {
  221: 							e = ctx->val.c + ctx->val.len;
  222: 						}
  223: 						if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) {
  224: 							doit = 0;
  225: 						}
  226: 					}
  227: 				}
  228: 				break;
  229: 
  230: 			case sizeof("fieldset") - 1:
  231: 				if (!strncasecmp(ctx->tag.c, "fieldset", sizeof("fieldset") - 1)) {
  232: 					doit = 1;		
  233: 				}
  234: 				break;
  235: 		}
  236: 
  237: 		if (doit)
  238: 			smart_str_append(&ctx->result, &ctx->form_app);
  239: 	}
  240: }
  241: 
  242: /*
  243:  *  HANDLE_TAG copies the HTML Tag and checks whether we 
  244:  *  have that tag in our table. If we might modify it,
  245:  *  we continue to scan the tag, otherwise we simply copy the complete
  246:  *  HTML stuff to the result buffer.
  247:  */
  248: 
  249: static inline void handle_tag(STD_PARA) 
  250: {
  251: 	int ok = 0;
  252: 	unsigned int i;
  253: 
  254: 	ctx->tag.len = 0;
  255: 	smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
  256: 	for (i = 0; i < ctx->tag.len; i++)
  257: 		ctx->tag.c[i] = tolower((int)(unsigned char)ctx->tag.c[i]);
  258: 	if (zend_hash_find(ctx->tags, ctx->tag.c, ctx->tag.len, (void **) &ctx->lookup_data) == SUCCESS)
  259: 		ok = 1;
  260: 	STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
  261: }
  262: 
  263: static inline void handle_arg(STD_PARA) 
  264: {
  265: 	ctx->arg.len = 0;
  266: 	smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
  267: }
  268: 
  269: static inline void handle_val(STD_PARA, char quotes, char type) 
  270: {
  271: 	smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
  272: 	tag_arg(ctx, quotes, type TSRMLS_CC);
  273: }
  274: 
  275: static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC)
  276: {
  277: 	char *end, *q;
  278: 	char *xp;
  279: 	char *start;
  280: 	int rest;
  281: 
  282: 	smart_str_appendl(&ctx->buf, newdata, newlen);
  283: 	
  284: 	YYCURSOR = ctx->buf.c;
  285: 	YYLIMIT = ctx->buf.c + ctx->buf.len;
  286: 
  287: 	switch (STATE) {
  288: 		case STATE_PLAIN: goto state_plain;
  289: 		case STATE_TAG: goto state_tag;
  290: 		case STATE_NEXT_ARG: goto state_next_arg;
  291: 		case STATE_ARG: goto state_arg;
  292: 		case STATE_BEFORE_VAL: goto state_before_val;
  293: 		case STATE_VAL: goto state_val;
  294: 	}
  295: 	
  296: 
  297: state_plain_begin:
  298: 	STATE = STATE_PLAIN;
  299: 	
  300: state_plain:
  301: 	start = YYCURSOR;
  302: /*!re2c
  303:   "<"				{ passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
  304:   N+ 				{ passthru(STD_ARGS); goto state_plain; }
  305: */
  306: 
  307: state_tag:	
  308: 	start = YYCURSOR;
  309: /*!re2c
  310:   alphanamespace+	{ handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
  311:   any		{ passthru(STD_ARGS); goto state_plain_begin; }
  312: */
  313: 
  314: state_next_arg_begin:
  315: 	STATE = STATE_NEXT_ARG;
  316: 	
  317: state_next_arg:
  318: 	start = YYCURSOR;
  319: /*!re2c
  320:   ">"		{ passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
  321:   [ \v\r\t\n]+	{ passthru(STD_ARGS); goto state_next_arg; }
  322:   alpha		{ --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
  323:   any		{ passthru(STD_ARGS); goto state_plain_begin; }
  324: */
  325: 
  326: state_arg:
  327: 	start = YYCURSOR;
  328: /*!re2c
  329:   alpha alphadash*	{ passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
  330:   any		{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
  331: */
  332: 
  333: state_before_val:
  334: 	start = YYCURSOR;
  335: /*!re2c
  336:   [ ]* "=" [ ]*		{ passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
  337:   any				{ --YYCURSOR; goto state_next_arg_begin; }
  338: */
  339: 
  340: 
  341: state_val:
  342: 	start = YYCURSOR;
  343: /*!re2c
  344:   ["] (any\[">])* ["]	{ handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
  345:   ['] (any\['>])* [']	{ handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
  346:   (any\[ \r\t\n>])+	{ handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
  347:   any					{ passthru(STD_ARGS); goto state_next_arg_begin; }
  348: */
  349: 
  350: stop:
  351: 	rest = YYLIMIT - start;
  352: 	scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
  353: 	/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */	
  354: 	if (rest < 0) rest = 0;
  355: 	
  356: 	if (rest) memmove(ctx->buf.c, start, rest);
  357: 	ctx->buf.len = rest;
  358: }
  359: 
  360: char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC)
  361: {
  362: 	smart_str surl = {0};
  363: 	smart_str buf = {0};
  364: 	smart_str url_app = {0};
  365: 
  366: 	smart_str_setl(&surl, url, urllen);
  367: 
  368: 	smart_str_appends(&url_app, name);
  369: 	smart_str_appendc(&url_app, '=');
  370: 	smart_str_appends(&url_app, value);
  371: 
  372: 	append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
  373: 
  374: 	smart_str_0(&buf);
  375: 	if (newlen) *newlen = buf.len;
  376: 
  377: 	smart_str_free(&url_app);
  378: 
  379: 	return buf.c;
  380: }
  381: 
  382: 
  383: static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC)
  384: {
  385: 	url_adapt_state_ex_t *ctx;
  386: 	char *retval;
  387: 
  388: 	ctx = &BG(url_adapt_state_ex);
  389: 
  390: 	xx_mainloop(ctx, src, srclen TSRMLS_CC);
  391: 
  392: 	*newlen = ctx->result.len;
  393: 	if (!ctx->result.c) {
  394: 		smart_str_appendl(&ctx->result, "", 0);
  395: 	}
  396: 	smart_str_0(&ctx->result);
  397: 	if (do_flush) {
  398: 		smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
  399: 		*newlen += ctx->buf.len;
  400: 		smart_str_free(&ctx->buf);
  401: 	}
  402: 	retval = ctx->result.c;
  403: 	ctx->result.c = NULL;
  404: 	ctx->result.len = 0;
  405: 	return retval;
  406: }
  407: 
  408: static int php_url_scanner_ex_activate(TSRMLS_D)
  409: {
  410: 	url_adapt_state_ex_t *ctx;
  411: 	
  412: 	ctx = &BG(url_adapt_state_ex);
  413: 
  414: 	memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
  415: 
  416: 	return SUCCESS;
  417: }
  418: 
  419: static int php_url_scanner_ex_deactivate(TSRMLS_D)
  420: {
  421: 	url_adapt_state_ex_t *ctx;
  422: 	
  423: 	ctx = &BG(url_adapt_state_ex);
  424: 
  425: 	smart_str_free(&ctx->result);
  426: 	smart_str_free(&ctx->buf);
  427: 	smart_str_free(&ctx->tag);
  428: 	smart_str_free(&ctx->arg);
  429: 
  430: 	return SUCCESS;
  431: }
  432: 
  433: static void php_url_scanner_output_handler(char *output, uint output_len, char **handled_output, uint *handled_output_len, int mode TSRMLS_DC)
  434: {
  435: 	size_t len;
  436: 
  437: 	if (BG(url_adapt_state_ex).url_app.len != 0) {
  438: 		*handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0) TSRMLS_CC);
  439: 		if (sizeof(uint) < sizeof(size_t)) {
  440: 			if (len > UINT_MAX)
  441: 				len = UINT_MAX;
  442: 		}
  443: 		*handled_output_len = len;
  444: 	} else if (BG(url_adapt_state_ex).url_app.len == 0) {
  445: 		url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex);
  446: 		if (ctx->buf.len) {
  447: 			smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
  448: 			smart_str_appendl(&ctx->result, output, output_len);
  449: 
  450: 			*handled_output = ctx->result.c;
  451: 			*handled_output_len = ctx->buf.len + output_len;
  452: 
  453: 			ctx->result.c = NULL;
  454: 			ctx->result.len = 0;
  455: 			smart_str_free(&ctx->buf);
  456: 		} else {
  457: 			*handled_output = estrndup(output, *handled_output_len = output_len);
  458: 		}
  459: 	} else {
  460: 		*handled_output = NULL;
  461: 	}
  462: }
  463: 
  464: PHPAPI int php_url_scanner_add_var(char *name, int name_len, char *value, int value_len, int urlencode TSRMLS_DC)
  465: {
  466: 	char *encoded;
  467: 	int encoded_len;
  468: 	smart_str val;
  469: 	
  470: 	if (! BG(url_adapt_state_ex).active) {
  471: 		php_url_scanner_ex_activate(TSRMLS_C);
  472: 		php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC);
  473: 		BG(url_adapt_state_ex).active = 1;
  474: 	}
  475: 
  476: 
  477: 	if (BG(url_adapt_state_ex).url_app.len != 0) {
  478: 		smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output);
  479: 	}
  480: 
  481: 	if (urlencode) {
  482: 		encoded = php_url_encode(value, value_len, &encoded_len);
  483: 		smart_str_setl(&val, encoded, encoded_len);
  484: 	} else {
  485: 		smart_str_setl(&val, value, value_len);
  486: 	}
  487: 	
  488: 	smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len);
  489: 	smart_str_appendc(&BG(url_adapt_state_ex).url_app, '=');
  490: 	smart_str_append(&BG(url_adapt_state_ex).url_app, &val);
  491: 
  492: 	smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\""); 
  493: 	smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len);
  494: 	smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\"");
  495: 	smart_str_append(&BG(url_adapt_state_ex).form_app, &val);
  496: 	smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />");
  497: 
  498: 	if (urlencode)
  499: 		efree(encoded);
  500: 
  501: 	return SUCCESS;
  502: }
  503: 
  504: PHPAPI int php_url_scanner_reset_vars(TSRMLS_D)
  505: {
  506: 	BG(url_adapt_state_ex).form_app.len = 0;
  507: 	BG(url_adapt_state_ex).url_app.len = 0;
  508: 
  509: 	return SUCCESS;
  510: }
  511: 
  512: PHP_MINIT_FUNCTION(url_scanner)
  513: {
  514: 	BG(url_adapt_state_ex).tags = NULL;
  515: 
  516: 	BG(url_adapt_state_ex).form_app.c = BG(url_adapt_state_ex).url_app.c = 0;
  517: 	BG(url_adapt_state_ex).form_app.len = BG(url_adapt_state_ex).url_app.len = 0;
  518: 
  519: 	REGISTER_INI_ENTRIES();
  520: 	return SUCCESS;
  521: }
  522: 
  523: PHP_MSHUTDOWN_FUNCTION(url_scanner)
  524: {
  525: 	UNREGISTER_INI_ENTRIES();
  526: 
  527: 	return SUCCESS;
  528: }
  529: 
  530: PHP_RINIT_FUNCTION(url_scanner)
  531: {
  532: 	BG(url_adapt_state_ex).active = 0;
  533: 	
  534: 	return SUCCESS;
  535: }
  536: 
  537: PHP_RSHUTDOWN_FUNCTION(url_scanner)
  538: {
  539: 	if (BG(url_adapt_state_ex).active) {
  540: 		php_url_scanner_ex_deactivate(TSRMLS_C);
  541: 		BG(url_adapt_state_ex).active = 0;
  542: 	}
  543: 
  544: 	smart_str_free(&BG(url_adapt_state_ex).form_app);
  545: 	smart_str_free(&BG(url_adapt_state_ex).url_app);
  546: 
  547: 	return SUCCESS;
  548: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>