Annotation of embedaddon/php/ext/filter/sanitizing_filters.c, revision 1.1.1.4
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
1.1.1.4 ! misho 5: | Copyright (c) 1997-2014 The PHP Group |
1.1 misho 6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Authors: Derick Rethans <derick@php.net> |
16: +----------------------------------------------------------------------+
17: */
18:
1.1.1.2 misho 19: /* $Id$ */
1.1 misho 20:
21: #include "php_filter.h"
22: #include "filter_private.h"
23: #include "ext/standard/php_smart_str.h"
24:
25: /* {{{ STRUCTS */
26: typedef unsigned long filter_map[256];
27: /* }}} */
28:
29: /* {{{ HELPER FUNCTIONS */
30: static void php_filter_encode_html(zval *value, const unsigned char *chars)
31: {
32: smart_str str = {0};
33: int len = Z_STRLEN_P(value);
34: unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
35: unsigned char *e = s + len;
36:
37: if (Z_STRLEN_P(value) == 0) {
38: return;
39: }
40:
41: while (s < e) {
42: if (chars[*s]) {
43: smart_str_appendl(&str, "&#", 2);
44: smart_str_append_unsigned(&str, (unsigned long)*s);
45: smart_str_appendc(&str, ';');
46: } else {
47: /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
48: smart_str_appendc(&str, *s);
49: }
50: s++;
51: }
52:
53: smart_str_0(&str);
1.1.1.2 misho 54: str_efree(Z_STRVAL_P(value));
1.1 misho 55: Z_STRVAL_P(value) = str.c;
56: Z_STRLEN_P(value) = str.len;
57: }
58:
59: static const unsigned char hexchars[] = "0123456789ABCDEF";
60:
61: #define LOWALPHA "abcdefghijklmnopqrstuvwxyz"
62: #define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
63: #define DIGIT "0123456789"
64:
65: #define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._"
66:
67: static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
68: {
69: unsigned char *str, *p;
70: unsigned char tmp[256];
71: unsigned char *s = (unsigned char *)chars;
72: unsigned char *e = s + char_len;
73:
74: memset(tmp, 1, sizeof(tmp)-1);
75:
76: while (s < e) {
77: tmp[*s++] = 0;
78: }
79: /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
80: if (encode_nul) {
81: tmp[0] = 1;
82: }
83: if (high) {
84: memset(tmp + 127, 1, sizeof(tmp) - 127);
85: }
86: if (low) {
87: memset(tmp, 1, 32);
88: }
89: */
90: p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1);
91: s = (unsigned char *)Z_STRVAL_P(value);
92: e = s + Z_STRLEN_P(value);
93:
94: while (s < e) {
95: if (tmp[*s]) {
96: *p++ = '%';
97: *p++ = hexchars[(unsigned char) *s >> 4];
98: *p++ = hexchars[(unsigned char) *s & 15];
99: } else {
100: *p++ = *s;
101: }
102: s++;
103: }
104: *p = '\0';
1.1.1.2 misho 105: str_efree(Z_STRVAL_P(value));
1.1 misho 106: Z_STRVAL_P(value) = (char *)str;
107: Z_STRLEN_P(value) = p - str;
108: }
109:
110: static void php_filter_strip(zval *value, long flags)
111: {
112: unsigned char *buf, *str;
113: int i, c;
114:
115: /* Optimization for if no strip flags are set */
116: if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) {
117: return;
118: }
119:
120: str = (unsigned char *)Z_STRVAL_P(value);
121: buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
122: c = 0;
123: for (i = 0; i < Z_STRLEN_P(value); i++) {
124: if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
125: } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
126: } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
127: } else {
128: buf[c] = str[i];
129: ++c;
130: }
131: }
132: /* update zval string data */
133: buf[c] = '\0';
1.1.1.2 misho 134: str_efree(Z_STRVAL_P(value));
1.1 misho 135: Z_STRVAL_P(value) = (char *)buf;
136: Z_STRLEN_P(value) = c;
137: }
138: /* }}} */
139:
140: /* {{{ FILTER MAP HELPERS */
141: static void filter_map_init(filter_map *map)
142: {
143: memset(map, 0, sizeof(filter_map));
144: }
145:
146: static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
147: {
148: int l, i;
149:
150: l = strlen((const char*)allowed_list);
151: for (i = 0; i < l; ++i) {
152: (*map)[allowed_list[i]] = flag;
153: }
154: }
155:
156: static void filter_map_apply(zval *value, filter_map *map)
157: {
158: unsigned char *buf, *str;
159: int i, c;
160:
161: str = (unsigned char *)Z_STRVAL_P(value);
162: buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
163: c = 0;
164: for (i = 0; i < Z_STRLEN_P(value); i++) {
165: if ((*map)[str[i]]) {
166: buf[c] = str[i];
167: ++c;
168: }
169: }
170: /* update zval string data */
171: buf[c] = '\0';
1.1.1.2 misho 172: str_efree(Z_STRVAL_P(value));
1.1 misho 173: Z_STRVAL_P(value) = (char *)buf;
174: Z_STRLEN_P(value) = c;
175: }
176: /* }}} */
177:
178: /* {{{ php_filter_string */
179: void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
180: {
181: size_t new_len;
182: unsigned char enc[256] = {0};
183:
184: /* strip high/strip low ( see flags )*/
185: php_filter_strip(value, flags);
186:
187: if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
188: enc['\''] = enc['"'] = 1;
189: }
190: if (flags & FILTER_FLAG_ENCODE_AMP) {
191: enc['&'] = 1;
192: }
193: if (flags & FILTER_FLAG_ENCODE_LOW) {
194: memset(enc, 1, 32);
195: }
196: if (flags & FILTER_FLAG_ENCODE_HIGH) {
197: memset(enc + 127, 1, sizeof(enc) - 127);
198: }
199:
200: php_filter_encode_html(value, enc);
201:
202: /* strip tags, implicitly also removes \0 chars */
203: new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
204: Z_STRLEN_P(value) = new_len;
205:
206: if (new_len == 0) {
207: zval_dtor(value);
208: if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
209: ZVAL_NULL(value);
210: } else {
211: ZVAL_EMPTY_STRING(value);
212: }
213: return;
214: }
215: }
216: /* }}} */
217:
218: /* {{{ php_filter_encoded */
219: void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
220: {
221: /* apply strip_high and strip_low filters */
222: php_filter_strip(value, flags);
223: /* urlencode */
224: php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
225: }
226: /* }}} */
227:
228: /* {{{ php_filter_special_chars */
229: void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
230: {
231: unsigned char enc[256] = {0};
232:
233: php_filter_strip(value, flags);
234:
235: /* encodes ' " < > & \0 to numerical entities */
236: enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
237:
238: /* if strip low is not set, then we encode them as &#xx; */
239: memset(enc, 1, 32);
240:
241: if (flags & FILTER_FLAG_ENCODE_HIGH) {
242: memset(enc + 127, 1, sizeof(enc) - 127);
243: }
244:
245: php_filter_encode_html(value, enc);
246: }
247: /* }}} */
248:
249: /* {{{ php_filter_full_special_chars */
250: void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
251: {
252: char *buf;
1.1.1.2 misho 253: size_t len;
254: int quotes;
1.1 misho 255:
256: if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
257: quotes = ENT_QUOTES;
258: } else {
259: quotes = ENT_NOQUOTES;
260: }
261: buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC);
1.1.1.2 misho 262: str_efree(Z_STRVAL_P(value));
1.1 misho 263: Z_STRVAL_P(value) = buf;
264: Z_STRLEN_P(value) = len;
265: }
266: /* }}} */
267:
268: /* {{{ php_filter_unsafe_raw */
269: void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
270: {
271: /* Only if no flags are set (optimization) */
272: if (flags != 0 && Z_STRLEN_P(value) > 0) {
273: unsigned char enc[256] = {0};
274:
275: php_filter_strip(value, flags);
276:
277: if (flags & FILTER_FLAG_ENCODE_AMP) {
278: enc['&'] = 1;
279: }
280: if (flags & FILTER_FLAG_ENCODE_LOW) {
281: memset(enc, 1, 32);
282: }
283: if (flags & FILTER_FLAG_ENCODE_HIGH) {
284: memset(enc + 127, 1, sizeof(enc) - 127);
285: }
286:
287: php_filter_encode_html(value, enc);
288: } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
289: zval_dtor(value);
290: ZVAL_NULL(value);
291: }
292: }
293: /* }}} */
294:
295:
296:
297: /* {{{ php_filter_email */
298: #define SAFE "$-_.+"
299: #define EXTRA "!*'(),"
300: #define NATIONAL "{}|\\^~[]`"
301: #define PUNCTUATION "<>#%\""
302: #define RESERVED ";/?:@&="
303:
304: void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
305: {
306: /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
307: const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
308: filter_map map;
309:
310: filter_map_init(&map);
311: filter_map_update(&map, 1, allowed_list);
312: filter_map_apply(value, &map);
313: }
314: /* }}} */
315:
316: /* {{{ php_filter_url */
317: void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
318: {
319: /* Strip all chars not part of section 5 of
320: * http://www.faqs.org/rfcs/rfc1738.html */
321: const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
322: filter_map map;
323:
324: filter_map_init(&map);
325: filter_map_update(&map, 1, allowed_list);
326: filter_map_apply(value, &map);
327: }
328: /* }}} */
329:
330: /* {{{ php_filter_number_int */
331: void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
332: {
333: /* strip everything [^0-9+-] */
334: const unsigned char allowed_list[] = "+-" DIGIT;
335: filter_map map;
336:
337: filter_map_init(&map);
338: filter_map_update(&map, 1, allowed_list);
339: filter_map_apply(value, &map);
340: }
341: /* }}} */
342:
343: /* {{{ php_filter_number_float */
344: void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
345: {
346: /* strip everything [^0-9+-] */
347: const unsigned char allowed_list[] = "+-" DIGIT;
348: filter_map map;
349:
350: filter_map_init(&map);
351: filter_map_update(&map, 1, allowed_list);
352:
353: /* depending on flags, strip '.', 'e', ",", "'" */
354: if (flags & FILTER_FLAG_ALLOW_FRACTION) {
355: filter_map_update(&map, 2, (const unsigned char *) ".");
356: }
357: if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
358: filter_map_update(&map, 3, (const unsigned char *) ",");
359: }
360: if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
361: filter_map_update(&map, 4, (const unsigned char *) "eE");
362: }
363: filter_map_apply(value, &map);
364: }
365: /* }}} */
366:
367: /* {{{ php_filter_magic_quotes */
368: void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
369: {
370: char *buf;
371: int len;
372:
373: /* just call php_addslashes quotes */
374: buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC);
375:
1.1.1.2 misho 376: str_efree(Z_STRVAL_P(value));
1.1 misho 377: Z_STRVAL_P(value) = buf;
378: Z_STRLEN_P(value) = len;
379: }
380: /* }}} */
381:
382: /*
383: * Local variables:
384: * tab-width: 4
385: * c-basic-offset: 4
386: * End:
387: * vim600: noet sw=4 ts=4 fdm=marker
388: * vim<600: noet sw=4 ts=4
389: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>