Return to url_scanner_ex.re CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / standard |
1.1 ! misho 1: /* ! 2: +----------------------------------------------------------------------+ ! 3: | PHP Version 5 | ! 4: +----------------------------------------------------------------------+ ! 5: | Copyright (c) 1997-2006 The PHP Group | ! 6: +----------------------------------------------------------------------+ ! 7: | This source file is subject to version 3.01 of the PHP license, | ! 8: | that is bundled with this package in the file LICENSE, and is | ! 9: | available through the world-wide-web at the following url: | ! 10: | http://www.php.net/license/3_01.txt | ! 11: | If you did not receive a copy of the PHP license and are unable to | ! 12: | obtain it through the world-wide-web, please send a note to | ! 13: | license@php.net so we can mail you a copy immediately. | ! 14: +----------------------------------------------------------------------+ ! 15: | Author: Sascha Schumann <sascha@schumann.cx> | ! 16: +----------------------------------------------------------------------+ ! 17: */ ! 18: ! 19: /* $Id: url_scanner_ex.re 313832 2011-07-28 10:52:45Z pajoye $ */ ! 20: ! 21: #include "php.h" ! 22: ! 23: #ifdef HAVE_UNISTD_H ! 24: #include <unistd.h> ! 25: #endif ! 26: #ifdef HAVE_LIMITS_H ! 27: #include <limits.h> ! 28: #endif ! 29: ! 30: #include <stdio.h> ! 31: #include <stdlib.h> ! 32: #include <string.h> ! 33: ! 34: #include "php_ini.h" ! 35: #include "php_globals.h" ! 36: #define STATE_TAG SOME_OTHER_STATE_TAG ! 37: #include "basic_functions.h" ! 38: #include "url.h" ! 39: #undef STATE_TAG ! 40: ! 41: #define url_scanner url_scanner_ex ! 42: ! 43: #include "php_smart_str.h" ! 44: ! 45: static PHP_INI_MH(OnUpdateTags) ! 46: { ! 47: url_adapt_state_ex_t *ctx; ! 48: char *key; ! 49: char *lasts; ! 50: char *tmp; ! 51: ! 52: ctx = &BG(url_adapt_state_ex); ! 53: ! 54: tmp = estrndup(new_value, new_value_length); ! 55: ! 56: if (ctx->tags) ! 57: zend_hash_destroy(ctx->tags); ! 58: else { ! 59: ctx->tags = malloc(sizeof(HashTable)); ! 60: if (!ctx->tags) { ! 61: return FAILURE; ! 62: } ! 63: } ! 64: ! 65: zend_hash_init(ctx->tags, 0, NULL, NULL, 1); ! 66: ! 67: for (key = php_strtok_r(tmp, ",", &lasts); ! 68: key; ! 69: key = php_strtok_r(NULL, ",", &lasts)) { ! 70: char *val; ! 71: ! 72: val = strchr(key, '='); ! 73: if (val) { ! 74: char *q; ! 75: int keylen; ! 76: ! 77: *val++ = '\0'; ! 78: for (q = key; *q; q++) ! 79: *q = tolower(*q); ! 80: keylen = q - key; ! 81: /* key is stored withOUT NUL ! 82: val is stored WITH NUL */ ! 83: zend_hash_add(ctx->tags, key, keylen, val, strlen(val)+1, NULL); ! 84: } ! 85: } ! 86: ! 87: efree(tmp); ! 88: ! 89: return SUCCESS; ! 90: } ! 91: ! 92: PHP_INI_BEGIN() ! 93: STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals) ! 94: PHP_INI_END() ! 95: ! 96: /*!re2c ! 97: any = [\000-\377]; ! 98: N = (any\[<]); ! 99: alpha = [a-zA-Z]; ! 100: alphanamespace = [a-zA-Z:]; ! 101: alphadash = ([a-zA-Z] | "-"); ! 102: */ ! 103: ! 104: #define YYFILL(n) goto done ! 105: #define YYCTYPE unsigned char ! 106: #define YYCURSOR p ! 107: #define YYLIMIT q ! 108: #define YYMARKER r ! 109: ! 110: static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator) ! 111: { ! 112: register const char *p, *q; ! 113: const char *bash = NULL; ! 114: const char *sep = "?"; ! 115: ! 116: q = (p = url->c) + url->len; ! 117: ! 118: scan: ! 119: /*!re2c ! 120: ":" { smart_str_append(dest, url); return; } ! 121: "?" { sep = separator; goto scan; } ! 122: "#" { bash = p - 1; goto done; } ! 123: (any\[:?#])+ { goto scan; } ! 124: */ ! 125: done: ! 126: ! 127: /* Don't modify URLs of the format "#mark" */ ! 128: if (bash && bash - url->c == 0) { ! 129: smart_str_append(dest, url); ! 130: return; ! 131: } ! 132: ! 133: if (bash) ! 134: smart_str_appendl(dest, url->c, bash - url->c); ! 135: else ! 136: smart_str_append(dest, url); ! 137: ! 138: smart_str_appends(dest, sep); ! 139: smart_str_append(dest, url_app); ! 140: ! 141: if (bash) ! 142: smart_str_appendl(dest, bash, q - bash); ! 143: } ! 144: ! 145: ! 146: #undef YYFILL ! 147: #undef YYCTYPE ! 148: #undef YYCURSOR ! 149: #undef YYLIMIT ! 150: #undef YYMARKER ! 151: ! 152: static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC) ! 153: { ! 154: char f = 0; ! 155: ! 156: if (strncasecmp(ctx->arg.c, ctx->lookup_data, ctx->arg.len) == 0) ! 157: f = 1; ! 158: ! 159: if (quotes) ! 160: smart_str_appendc(&ctx->result, type); ! 161: if (f) { ! 162: append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output); ! 163: } else { ! 164: smart_str_append(&ctx->result, &ctx->val); ! 165: } ! 166: if (quotes) ! 167: smart_str_appendc(&ctx->result, type); ! 168: } ! 169: ! 170: enum { ! 171: STATE_PLAIN = 0, ! 172: STATE_TAG, ! 173: STATE_NEXT_ARG, ! 174: STATE_ARG, ! 175: STATE_BEFORE_VAL, ! 176: STATE_VAL ! 177: }; ! 178: ! 179: #define YYFILL(n) goto stop ! 180: #define YYCTYPE unsigned char ! 181: #define YYCURSOR xp ! 182: #define YYLIMIT end ! 183: #define YYMARKER q ! 184: #define STATE ctx->state ! 185: ! 186: #define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC ! 187: #define STD_ARGS ctx, start, xp TSRMLS_CC ! 188: ! 189: #if SCANNER_DEBUG ! 190: #define scdebug(x) printf x ! 191: #else ! 192: #define scdebug(x) ! 193: #endif ! 194: ! 195: static inline void passthru(STD_PARA) ! 196: { ! 197: scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start)); ! 198: smart_str_appendl(&ctx->result, start, YYCURSOR - start); ! 199: } ! 200: ! 201: /* ! 202: * This function appends a hidden input field after a <form> or ! 203: * <fieldset>. The latter is important for XHTML. ! 204: */ ! 205: ! 206: static void handle_form(STD_PARA) ! 207: { ! 208: int doit = 0; ! 209: ! 210: if (ctx->form_app.len > 0) { ! 211: switch (ctx->tag.len) { ! 212: case sizeof("form") - 1: ! 213: if (!strncasecmp(ctx->tag.c, "form", sizeof("form") - 1)) { ! 214: doit = 1; ! 215: } ! 216: if (doit && ctx->val.c && ctx->lookup_data && *ctx->lookup_data) { ! 217: char *e, *p = zend_memnstr(ctx->val.c, "://", sizeof("://") - 1, ctx->val.c + ctx->val.len); ! 218: if (p) { ! 219: e = memchr(p, '/', (ctx->val.c + ctx->val.len) - p); ! 220: if (!e) { ! 221: e = ctx->val.c + ctx->val.len; ! 222: } ! 223: if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) { ! 224: doit = 0; ! 225: } ! 226: } ! 227: } ! 228: break; ! 229: ! 230: case sizeof("fieldset") - 1: ! 231: if (!strncasecmp(ctx->tag.c, "fieldset", sizeof("fieldset") - 1)) { ! 232: doit = 1; ! 233: } ! 234: break; ! 235: } ! 236: ! 237: if (doit) ! 238: smart_str_append(&ctx->result, &ctx->form_app); ! 239: } ! 240: } ! 241: ! 242: /* ! 243: * HANDLE_TAG copies the HTML Tag and checks whether we ! 244: * have that tag in our table. If we might modify it, ! 245: * we continue to scan the tag, otherwise we simply copy the complete ! 246: * HTML stuff to the result buffer. ! 247: */ ! 248: ! 249: static inline void handle_tag(STD_PARA) ! 250: { ! 251: int ok = 0; ! 252: int i; ! 253: ! 254: ctx->tag.len = 0; ! 255: smart_str_appendl(&ctx->tag, start, YYCURSOR - start); ! 256: for (i = 0; i < ctx->tag.len; i++) ! 257: ctx->tag.c[i] = tolower((int)(unsigned char)ctx->tag.c[i]); ! 258: if (zend_hash_find(ctx->tags, ctx->tag.c, ctx->tag.len, (void **) &ctx->lookup_data) == SUCCESS) ! 259: ok = 1; ! 260: STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN; ! 261: } ! 262: ! 263: static inline void handle_arg(STD_PARA) ! 264: { ! 265: ctx->arg.len = 0; ! 266: smart_str_appendl(&ctx->arg, start, YYCURSOR - start); ! 267: } ! 268: ! 269: static inline void handle_val(STD_PARA, char quotes, char type) ! 270: { ! 271: smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2); ! 272: tag_arg(ctx, quotes, type TSRMLS_CC); ! 273: } ! 274: ! 275: static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC) ! 276: { ! 277: char *end, *q; ! 278: char *xp; ! 279: char *start; ! 280: int rest; ! 281: ! 282: smart_str_appendl(&ctx->buf, newdata, newlen); ! 283: ! 284: YYCURSOR = ctx->buf.c; ! 285: YYLIMIT = ctx->buf.c + ctx->buf.len; ! 286: ! 287: switch (STATE) { ! 288: case STATE_PLAIN: goto state_plain; ! 289: case STATE_TAG: goto state_tag; ! 290: case STATE_NEXT_ARG: goto state_next_arg; ! 291: case STATE_ARG: goto state_arg; ! 292: case STATE_BEFORE_VAL: goto state_before_val; ! 293: case STATE_VAL: goto state_val; ! 294: } ! 295: ! 296: ! 297: state_plain_begin: ! 298: STATE = STATE_PLAIN; ! 299: ! 300: state_plain: ! 301: start = YYCURSOR; ! 302: /*!re2c ! 303: "<" { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; } ! 304: N+ { passthru(STD_ARGS); goto state_plain; } ! 305: */ ! 306: ! 307: state_tag: ! 308: start = YYCURSOR; ! 309: /*!re2c ! 310: alphanamespace+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; } ! 311: any { passthru(STD_ARGS); goto state_plain_begin; } ! 312: */ ! 313: ! 314: state_next_arg_begin: ! 315: STATE = STATE_NEXT_ARG; ! 316: ! 317: state_next_arg: ! 318: start = YYCURSOR; ! 319: /*!re2c ! 320: ">" { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; } ! 321: [ \v\r\t\n]+ { passthru(STD_ARGS); goto state_next_arg; } ! 322: alpha { --YYCURSOR; STATE = STATE_ARG; goto state_arg; } ! 323: any { passthru(STD_ARGS); goto state_plain_begin; } ! 324: */ ! 325: ! 326: state_arg: ! 327: start = YYCURSOR; ! 328: /*!re2c ! 329: alpha alphadash* { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; } ! 330: any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; } ! 331: */ ! 332: ! 333: state_before_val: ! 334: start = YYCURSOR; ! 335: /*!re2c ! 336: [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; } ! 337: any { --YYCURSOR; goto state_next_arg_begin; } ! 338: */ ! 339: ! 340: ! 341: state_val: ! 342: start = YYCURSOR; ! 343: /*!re2c ! 344: ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; } ! 345: ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; } ! 346: (any\[ \r\t\n>])+ { handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; } ! 347: any { passthru(STD_ARGS); goto state_next_arg_begin; } ! 348: */ ! 349: ! 350: stop: ! 351: rest = YYLIMIT - start; ! 352: scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest)); ! 353: /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */ ! 354: if (rest < 0) rest = 0; ! 355: ! 356: if (rest) memmove(ctx->buf.c, start, rest); ! 357: ctx->buf.len = rest; ! 358: } ! 359: ! 360: char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC) ! 361: { ! 362: smart_str surl = {0}; ! 363: smart_str buf = {0}; ! 364: smart_str url_app = {0}; ! 365: ! 366: smart_str_setl(&surl, url, urllen); ! 367: ! 368: smart_str_appends(&url_app, name); ! 369: smart_str_appendc(&url_app, '='); ! 370: smart_str_appends(&url_app, value); ! 371: ! 372: append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output); ! 373: ! 374: smart_str_0(&buf); ! 375: if (newlen) *newlen = buf.len; ! 376: ! 377: smart_str_free(&url_app); ! 378: ! 379: return buf.c; ! 380: } ! 381: ! 382: ! 383: static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC) ! 384: { ! 385: url_adapt_state_ex_t *ctx; ! 386: char *retval; ! 387: ! 388: ctx = &BG(url_adapt_state_ex); ! 389: ! 390: xx_mainloop(ctx, src, srclen TSRMLS_CC); ! 391: ! 392: *newlen = ctx->result.len; ! 393: if (!ctx->result.c) { ! 394: smart_str_appendl(&ctx->result, "", 0); ! 395: } ! 396: smart_str_0(&ctx->result); ! 397: if (do_flush) { ! 398: smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len); ! 399: *newlen += ctx->buf.len; ! 400: smart_str_free(&ctx->buf); ! 401: } ! 402: retval = ctx->result.c; ! 403: ctx->result.c = NULL; ! 404: ctx->result.len = 0; ! 405: return retval; ! 406: } ! 407: ! 408: static int php_url_scanner_ex_activate(TSRMLS_D) ! 409: { ! 410: url_adapt_state_ex_t *ctx; ! 411: ! 412: ctx = &BG(url_adapt_state_ex); ! 413: ! 414: memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags)); ! 415: ! 416: return SUCCESS; ! 417: } ! 418: ! 419: static int php_url_scanner_ex_deactivate(TSRMLS_D) ! 420: { ! 421: url_adapt_state_ex_t *ctx; ! 422: ! 423: ctx = &BG(url_adapt_state_ex); ! 424: ! 425: smart_str_free(&ctx->result); ! 426: smart_str_free(&ctx->buf); ! 427: smart_str_free(&ctx->tag); ! 428: smart_str_free(&ctx->arg); ! 429: ! 430: return SUCCESS; ! 431: } ! 432: ! 433: static void php_url_scanner_output_handler(char *output, uint output_len, char **handled_output, uint *handled_output_len, int mode TSRMLS_DC) ! 434: { ! 435: size_t len; ! 436: ! 437: if (BG(url_adapt_state_ex).url_app.len != 0) { ! 438: *handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT) ? 1 : 0) TSRMLS_CC); ! 439: if (sizeof(uint) < sizeof(size_t)) { ! 440: if (len > UINT_MAX) ! 441: len = UINT_MAX; ! 442: } ! 443: *handled_output_len = len; ! 444: } else if (BG(url_adapt_state_ex).url_app.len == 0) { ! 445: url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex); ! 446: if (ctx->buf.len) { ! 447: smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len); ! 448: smart_str_appendl(&ctx->result, output, output_len); ! 449: ! 450: *handled_output = ctx->result.c; ! 451: *handled_output_len = ctx->buf.len + output_len; ! 452: ! 453: ctx->result.c = NULL; ! 454: ctx->result.len = 0; ! 455: smart_str_free(&ctx->buf); ! 456: } else { ! 457: *handled_output = NULL; ! 458: } ! 459: } else { ! 460: *handled_output = NULL; ! 461: } ! 462: } ! 463: ! 464: PHPAPI int php_url_scanner_add_var(char *name, int name_len, char *value, int value_len, int urlencode TSRMLS_DC) ! 465: { ! 466: char *encoded; ! 467: int encoded_len; ! 468: smart_str val; ! 469: ! 470: if (! BG(url_adapt_state_ex).active) { ! 471: php_url_scanner_ex_activate(TSRMLS_C); ! 472: php_ob_set_internal_handler(php_url_scanner_output_handler, 0, "URL-Rewriter", 1 TSRMLS_CC); ! 473: BG(url_adapt_state_ex).active = 1; ! 474: } ! 475: ! 476: ! 477: if (BG(url_adapt_state_ex).url_app.len != 0) { ! 478: smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output); ! 479: } ! 480: ! 481: if (urlencode) { ! 482: encoded = php_url_encode(value, value_len, &encoded_len); ! 483: smart_str_setl(&val, encoded, encoded_len); ! 484: } else { ! 485: smart_str_setl(&val, value, value_len); ! 486: } ! 487: ! 488: smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len); ! 489: smart_str_appendc(&BG(url_adapt_state_ex).url_app, '='); ! 490: smart_str_append(&BG(url_adapt_state_ex).url_app, &val); ! 491: ! 492: smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\""); ! 493: smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len); ! 494: smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\""); ! 495: smart_str_append(&BG(url_adapt_state_ex).form_app, &val); ! 496: smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />"); ! 497: ! 498: if (urlencode) ! 499: efree(encoded); ! 500: ! 501: return SUCCESS; ! 502: } ! 503: ! 504: PHPAPI int php_url_scanner_reset_vars(TSRMLS_D) ! 505: { ! 506: BG(url_adapt_state_ex).form_app.len = 0; ! 507: BG(url_adapt_state_ex).url_app.len = 0; ! 508: ! 509: return SUCCESS; ! 510: } ! 511: ! 512: PHP_MINIT_FUNCTION(url_scanner) ! 513: { ! 514: BG(url_adapt_state_ex).tags = NULL; ! 515: ! 516: BG(url_adapt_state_ex).form_app.c = BG(url_adapt_state_ex).url_app.c = 0; ! 517: BG(url_adapt_state_ex).form_app.len = BG(url_adapt_state_ex).url_app.len = 0; ! 518: ! 519: REGISTER_INI_ENTRIES(); ! 520: return SUCCESS; ! 521: } ! 522: ! 523: PHP_MSHUTDOWN_FUNCTION(url_scanner) ! 524: { ! 525: UNREGISTER_INI_ENTRIES(); ! 526: ! 527: return SUCCESS; ! 528: } ! 529: ! 530: PHP_RINIT_FUNCTION(url_scanner) ! 531: { ! 532: BG(url_adapt_state_ex).active = 0; ! 533: ! 534: return SUCCESS; ! 535: } ! 536: ! 537: PHP_RSHUTDOWN_FUNCTION(url_scanner) ! 538: { ! 539: if (BG(url_adapt_state_ex).active) { ! 540: php_url_scanner_ex_deactivate(TSRMLS_C); ! 541: BG(url_adapt_state_ex).active = 0; ! 542: } ! 543: ! 544: smart_str_free(&BG(url_adapt_state_ex).form_app); ! 545: smart_str_free(&BG(url_adapt_state_ex).url_app); ! 546: ! 547: return SUCCESS; ! 548: }