Annotation of embedaddon/php/ext/standard/url_scanner_ex.re, revision 1.1.1.4
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
1.1.1.3 misho 3: | PHP Version 5 |
1.1 misho 4: +----------------------------------------------------------------------+
1.1.1.3 misho 5: | Copyright (c) 1997-2013 The PHP Group |
1.1 misho 6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Author: Sascha Schumann <sascha@schumann.cx> |
16: +----------------------------------------------------------------------+
17: */
18:
1.1.1.2 misho 19: /* $Id$ */
1.1 misho 20:
21: #include "php.h"
22:
23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
26: #ifdef HAVE_LIMITS_H
27: #include <limits.h>
28: #endif
29:
30: #include <stdio.h>
31: #include <stdlib.h>
32: #include <string.h>
33:
34: #include "php_ini.h"
35: #include "php_globals.h"
36: #define STATE_TAG SOME_OTHER_STATE_TAG
37: #include "basic_functions.h"
38: #include "url.h"
39: #undef STATE_TAG
40:
41: #define url_scanner url_scanner_ex
42:
43: #include "php_smart_str.h"
44:
45: static PHP_INI_MH(OnUpdateTags)
46: {
47: url_adapt_state_ex_t *ctx;
48: char *key;
49: char *lasts;
50: char *tmp;
51:
52: ctx = &BG(url_adapt_state_ex);
53:
54: tmp = estrndup(new_value, new_value_length);
55:
56: if (ctx->tags)
57: zend_hash_destroy(ctx->tags);
58: else {
59: ctx->tags = malloc(sizeof(HashTable));
60: if (!ctx->tags) {
61: return FAILURE;
62: }
63: }
64:
65: zend_hash_init(ctx->tags, 0, NULL, NULL, 1);
66:
67: for (key = php_strtok_r(tmp, ",", &lasts);
68: key;
69: key = php_strtok_r(NULL, ",", &lasts)) {
70: char *val;
71:
72: val = strchr(key, '=');
73: if (val) {
74: char *q;
75: int keylen;
76:
77: *val++ = '\0';
78: for (q = key; *q; q++)
79: *q = tolower(*q);
80: keylen = q - key;
81: /* key is stored withOUT NUL
82: val is stored WITH NUL */
83: zend_hash_add(ctx->tags, key, keylen, val, strlen(val)+1, NULL);
84: }
85: }
86:
87: efree(tmp);
88:
89: return SUCCESS;
90: }
91:
92: PHP_INI_BEGIN()
93: STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
94: PHP_INI_END()
95:
96: /*!re2c
97: any = [\000-\377];
98: N = (any\[<]);
99: alpha = [a-zA-Z];
100: alphanamespace = [a-zA-Z:];
101: alphadash = ([a-zA-Z] | "-");
102: */
103:
104: #define YYFILL(n) goto done
105: #define YYCTYPE unsigned char
106: #define YYCURSOR p
107: #define YYLIMIT q
108: #define YYMARKER r
109:
110: static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
111: {
112: register const char *p, *q;
113: const char *bash = NULL;
114: const char *sep = "?";
115:
116: q = (p = url->c) + url->len;
117:
118: scan:
119: /*!re2c
120: ":" { smart_str_append(dest, url); return; }
121: "?" { sep = separator; goto scan; }
122: "#" { bash = p - 1; goto done; }
123: (any\[:?#])+ { goto scan; }
124: */
125: done:
126:
127: /* Don't modify URLs of the format "#mark" */
128: if (bash && bash - url->c == 0) {
129: smart_str_append(dest, url);
130: return;
131: }
132:
133: if (bash)
134: smart_str_appendl(dest, url->c, bash - url->c);
135: else
136: smart_str_append(dest, url);
137:
138: smart_str_appends(dest, sep);
139: smart_str_append(dest, url_app);
140:
141: if (bash)
142: smart_str_appendl(dest, bash, q - bash);
143: }
144:
145:
146: #undef YYFILL
147: #undef YYCTYPE
148: #undef YYCURSOR
149: #undef YYLIMIT
150: #undef YYMARKER
151:
152: static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC)
153: {
154: char f = 0;
155:
156: if (strncasecmp(ctx->arg.c, ctx->lookup_data, ctx->arg.len) == 0)
157: f = 1;
158:
159: if (quotes)
160: smart_str_appendc(&ctx->result, type);
161: if (f) {
162: append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
163: } else {
164: smart_str_append(&ctx->result, &ctx->val);
165: }
166: if (quotes)
167: smart_str_appendc(&ctx->result, type);
168: }
169:
170: enum {
171: STATE_PLAIN = 0,
172: STATE_TAG,
173: STATE_NEXT_ARG,
174: STATE_ARG,
175: STATE_BEFORE_VAL,
176: STATE_VAL
177: };
178:
179: #define YYFILL(n) goto stop
180: #define YYCTYPE unsigned char
181: #define YYCURSOR xp
182: #define YYLIMIT end
183: #define YYMARKER q
184: #define STATE ctx->state
185:
186: #define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC
187: #define STD_ARGS ctx, start, xp TSRMLS_CC
188:
189: #if SCANNER_DEBUG
190: #define scdebug(x) printf x
191: #else
192: #define scdebug(x)
193: #endif
194:
195: static inline void passthru(STD_PARA)
196: {
197: scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
198: smart_str_appendl(&ctx->result, start, YYCURSOR - start);
199: }
200:
201: /*
202: * This function appends a hidden input field after a <form> or
203: * <fieldset>. The latter is important for XHTML.
204: */
205:
206: static void handle_form(STD_PARA)
207: {
208: int doit = 0;
209:
210: if (ctx->form_app.len > 0) {
211: switch (ctx->tag.len) {
212: case sizeof("form") - 1:
213: if (!strncasecmp(ctx->tag.c, "form", sizeof("form") - 1)) {
214: doit = 1;
215: }
216: if (doit && ctx->val.c && ctx->lookup_data && *ctx->lookup_data) {
217: char *e, *p = zend_memnstr(ctx->val.c, "://", sizeof("://") - 1, ctx->val.c + ctx->val.len);
218: if (p) {
219: e = memchr(p, '/', (ctx->val.c + ctx->val.len) - p);
220: if (!e) {
221: e = ctx->val.c + ctx->val.len;
222: }
223: if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) {
224: doit = 0;
225: }
226: }
227: }
228: break;
229:
230: case sizeof("fieldset") - 1:
231: if (!strncasecmp(ctx->tag.c, "fieldset", sizeof("fieldset") - 1)) {
232: doit = 1;
233: }
234: break;
235: }
236:
237: if (doit)
238: smart_str_append(&ctx->result, &ctx->form_app);
239: }
240: }
241:
242: /*
243: * HANDLE_TAG copies the HTML Tag and checks whether we
244: * have that tag in our table. If we might modify it,
245: * we continue to scan the tag, otherwise we simply copy the complete
246: * HTML stuff to the result buffer.
247: */
248:
249: static inline void handle_tag(STD_PARA)
250: {
251: int ok = 0;
1.1.1.2 misho 252: unsigned int i;
1.1 misho 253:
254: ctx->tag.len = 0;
255: smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
256: for (i = 0; i < ctx->tag.len; i++)
257: ctx->tag.c[i] = tolower((int)(unsigned char)ctx->tag.c[i]);
258: if (zend_hash_find(ctx->tags, ctx->tag.c, ctx->tag.len, (void **) &ctx->lookup_data) == SUCCESS)
259: ok = 1;
260: STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
261: }
262:
263: static inline void handle_arg(STD_PARA)
264: {
265: ctx->arg.len = 0;
266: smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
267: }
268:
269: static inline void handle_val(STD_PARA, char quotes, char type)
270: {
271: smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
272: tag_arg(ctx, quotes, type TSRMLS_CC);
273: }
274:
275: static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC)
276: {
277: char *end, *q;
278: char *xp;
279: char *start;
280: int rest;
281:
282: smart_str_appendl(&ctx->buf, newdata, newlen);
283:
284: YYCURSOR = ctx->buf.c;
285: YYLIMIT = ctx->buf.c + ctx->buf.len;
286:
287: switch (STATE) {
288: case STATE_PLAIN: goto state_plain;
289: case STATE_TAG: goto state_tag;
290: case STATE_NEXT_ARG: goto state_next_arg;
291: case STATE_ARG: goto state_arg;
292: case STATE_BEFORE_VAL: goto state_before_val;
293: case STATE_VAL: goto state_val;
294: }
295:
296:
297: state_plain_begin:
298: STATE = STATE_PLAIN;
299:
300: state_plain:
301: start = YYCURSOR;
302: /*!re2c
303: "<" { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
304: N+ { passthru(STD_ARGS); goto state_plain; }
305: */
306:
307: state_tag:
308: start = YYCURSOR;
309: /*!re2c
310: alphanamespace+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
311: any { passthru(STD_ARGS); goto state_plain_begin; }
312: */
313:
314: state_next_arg_begin:
315: STATE = STATE_NEXT_ARG;
316:
317: state_next_arg:
318: start = YYCURSOR;
319: /*!re2c
1.1.1.4 ! misho 320: [/]? [>] { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
1.1 misho 321: [ \v\r\t\n]+ { passthru(STD_ARGS); goto state_next_arg; }
322: alpha { --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
323: any { passthru(STD_ARGS); goto state_plain_begin; }
324: */
325:
326: state_arg:
327: start = YYCURSOR;
328: /*!re2c
329: alpha alphadash* { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
330: any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
331: */
332:
333: state_before_val:
334: start = YYCURSOR;
335: /*!re2c
336: [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
337: any { --YYCURSOR; goto state_next_arg_begin; }
338: */
339:
340:
341: state_val:
342: start = YYCURSOR;
343: /*!re2c
344: ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
345: ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
1.1.1.4 ! misho 346: (any\[ \r\t\n>'"])+ { handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
1.1 misho 347: any { passthru(STD_ARGS); goto state_next_arg_begin; }
348: */
349:
350: stop:
351: rest = YYLIMIT - start;
352: scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
353: /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
354: if (rest < 0) rest = 0;
355:
356: if (rest) memmove(ctx->buf.c, start, rest);
357: ctx->buf.len = rest;
358: }
359:
360: char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC)
361: {
362: smart_str surl = {0};
363: smart_str buf = {0};
364: smart_str url_app = {0};
365:
366: smart_str_setl(&surl, url, urllen);
367:
368: smart_str_appends(&url_app, name);
369: smart_str_appendc(&url_app, '=');
370: smart_str_appends(&url_app, value);
371:
372: append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
373:
374: smart_str_0(&buf);
375: if (newlen) *newlen = buf.len;
376:
377: smart_str_free(&url_app);
378:
379: return buf.c;
380: }
381:
382:
383: static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC)
384: {
385: url_adapt_state_ex_t *ctx;
386: char *retval;
387:
388: ctx = &BG(url_adapt_state_ex);
389:
390: xx_mainloop(ctx, src, srclen TSRMLS_CC);
391:
392: *newlen = ctx->result.len;
393: if (!ctx->result.c) {
394: smart_str_appendl(&ctx->result, "", 0);
395: }
396: smart_str_0(&ctx->result);
397: if (do_flush) {
398: smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
399: *newlen += ctx->buf.len;
400: smart_str_free(&ctx->buf);
401: }
402: retval = ctx->result.c;
403: ctx->result.c = NULL;
404: ctx->result.len = 0;
405: return retval;
406: }
407:
408: static int php_url_scanner_ex_activate(TSRMLS_D)
409: {
410: url_adapt_state_ex_t *ctx;
411:
412: ctx = &BG(url_adapt_state_ex);
413:
414: memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
415:
416: return SUCCESS;
417: }
418:
419: static int php_url_scanner_ex_deactivate(TSRMLS_D)
420: {
421: url_adapt_state_ex_t *ctx;
422:
423: ctx = &BG(url_adapt_state_ex);
424:
425: smart_str_free(&ctx->result);
426: smart_str_free(&ctx->buf);
427: smart_str_free(&ctx->tag);
428: smart_str_free(&ctx->arg);
429:
430: return SUCCESS;
431: }
432:
433: static void php_url_scanner_output_handler(char *output, uint output_len, char **handled_output, uint *handled_output_len, int mode TSRMLS_DC)
434: {
435: size_t len;
436:
437: if (BG(url_adapt_state_ex).url_app.len != 0) {
1.1.1.2 misho 438: *handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0) TSRMLS_CC);
1.1 misho 439: if (sizeof(uint) < sizeof(size_t)) {
440: if (len > UINT_MAX)
441: len = UINT_MAX;
442: }
443: *handled_output_len = len;
444: } else if (BG(url_adapt_state_ex).url_app.len == 0) {
445: url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex);
446: if (ctx->buf.len) {
447: smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
448: smart_str_appendl(&ctx->result, output, output_len);
449:
450: *handled_output = ctx->result.c;
451: *handled_output_len = ctx->buf.len + output_len;
452:
453: ctx->result.c = NULL;
454: ctx->result.len = 0;
455: smart_str_free(&ctx->buf);
456: } else {
1.1.1.2 misho 457: *handled_output = estrndup(output, *handled_output_len = output_len);
1.1 misho 458: }
459: } else {
460: *handled_output = NULL;
461: }
462: }
463:
464: PHPAPI int php_url_scanner_add_var(char *name, int name_len, char *value, int value_len, int urlencode TSRMLS_DC)
465: {
1.1.1.4 ! misho 466: char *encoded = NULL;
1.1 misho 467: int encoded_len;
468: smart_str val;
469:
470: if (! BG(url_adapt_state_ex).active) {
471: php_url_scanner_ex_activate(TSRMLS_C);
1.1.1.2 misho 472: php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC);
1.1 misho 473: BG(url_adapt_state_ex).active = 1;
474: }
475:
476:
477: if (BG(url_adapt_state_ex).url_app.len != 0) {
478: smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output);
479: }
480:
481: if (urlencode) {
482: encoded = php_url_encode(value, value_len, &encoded_len);
483: smart_str_setl(&val, encoded, encoded_len);
484: } else {
485: smart_str_setl(&val, value, value_len);
486: }
487:
488: smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len);
489: smart_str_appendc(&BG(url_adapt_state_ex).url_app, '=');
490: smart_str_append(&BG(url_adapt_state_ex).url_app, &val);
491:
492: smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\"");
493: smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len);
494: smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\"");
495: smart_str_append(&BG(url_adapt_state_ex).form_app, &val);
496: smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />");
497:
498: if (urlencode)
499: efree(encoded);
500:
501: return SUCCESS;
502: }
503:
504: PHPAPI int php_url_scanner_reset_vars(TSRMLS_D)
505: {
506: BG(url_adapt_state_ex).form_app.len = 0;
507: BG(url_adapt_state_ex).url_app.len = 0;
508:
509: return SUCCESS;
510: }
511:
512: PHP_MINIT_FUNCTION(url_scanner)
513: {
514: BG(url_adapt_state_ex).tags = NULL;
515:
516: BG(url_adapt_state_ex).form_app.c = BG(url_adapt_state_ex).url_app.c = 0;
517: BG(url_adapt_state_ex).form_app.len = BG(url_adapt_state_ex).url_app.len = 0;
518:
519: REGISTER_INI_ENTRIES();
520: return SUCCESS;
521: }
522:
523: PHP_MSHUTDOWN_FUNCTION(url_scanner)
524: {
525: UNREGISTER_INI_ENTRIES();
526:
527: return SUCCESS;
528: }
529:
530: PHP_RINIT_FUNCTION(url_scanner)
531: {
532: BG(url_adapt_state_ex).active = 0;
533:
534: return SUCCESS;
535: }
536:
537: PHP_RSHUTDOWN_FUNCTION(url_scanner)
538: {
539: if (BG(url_adapt_state_ex).active) {
540: php_url_scanner_ex_deactivate(TSRMLS_C);
541: BG(url_adapt_state_ex).active = 0;
542: }
543:
544: smart_str_free(&BG(url_adapt_state_ex).form_app);
545: smart_str_free(&BG(url_adapt_state_ex).url_app);
546:
547: return SUCCESS;
548: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>