Annotation of embedaddon/php/ext/filter/sanitizing_filters.c, revision 1.1.1.1
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
5: | Copyright (c) 1997-2012 The PHP Group |
6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Authors: Derick Rethans <derick@php.net> |
16: +----------------------------------------------------------------------+
17: */
18:
19: /* $Id: sanitizing_filters.c 321634 2012-01-01 13:15:04Z felipe $ */
20:
21: #include "php_filter.h"
22: #include "filter_private.h"
23: #include "ext/standard/php_smart_str.h"
24:
25: /* {{{ STRUCTS */
26: typedef unsigned long filter_map[256];
27: /* }}} */
28:
29: /* {{{ HELPER FUNCTIONS */
30: static void php_filter_encode_html(zval *value, const unsigned char *chars)
31: {
32: smart_str str = {0};
33: int len = Z_STRLEN_P(value);
34: unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
35: unsigned char *e = s + len;
36:
37: if (Z_STRLEN_P(value) == 0) {
38: return;
39: }
40:
41: while (s < e) {
42: if (chars[*s]) {
43: smart_str_appendl(&str, "&#", 2);
44: smart_str_append_unsigned(&str, (unsigned long)*s);
45: smart_str_appendc(&str, ';');
46: } else {
47: /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
48: smart_str_appendc(&str, *s);
49: }
50: s++;
51: }
52:
53: smart_str_0(&str);
54: efree(Z_STRVAL_P(value));
55: Z_STRVAL_P(value) = str.c;
56: Z_STRLEN_P(value) = str.len;
57: }
58:
59: static const unsigned char hexchars[] = "0123456789ABCDEF";
60:
61: #define LOWALPHA "abcdefghijklmnopqrstuvwxyz"
62: #define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
63: #define DIGIT "0123456789"
64:
65: #define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._"
66:
67: static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
68: {
69: unsigned char *str, *p;
70: unsigned char tmp[256];
71: unsigned char *s = (unsigned char *)chars;
72: unsigned char *e = s + char_len;
73:
74: memset(tmp, 1, sizeof(tmp)-1);
75:
76: while (s < e) {
77: tmp[*s++] = 0;
78: }
79: /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
80: if (encode_nul) {
81: tmp[0] = 1;
82: }
83: if (high) {
84: memset(tmp + 127, 1, sizeof(tmp) - 127);
85: }
86: if (low) {
87: memset(tmp, 1, 32);
88: }
89: */
90: p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1);
91: s = (unsigned char *)Z_STRVAL_P(value);
92: e = s + Z_STRLEN_P(value);
93:
94: while (s < e) {
95: if (tmp[*s]) {
96: *p++ = '%';
97: *p++ = hexchars[(unsigned char) *s >> 4];
98: *p++ = hexchars[(unsigned char) *s & 15];
99: } else {
100: *p++ = *s;
101: }
102: s++;
103: }
104: *p = '\0';
105: efree(Z_STRVAL_P(value));
106: Z_STRVAL_P(value) = (char *)str;
107: Z_STRLEN_P(value) = p - str;
108: }
109:
110: static void php_filter_strip(zval *value, long flags)
111: {
112: unsigned char *buf, *str;
113: int i, c;
114:
115: /* Optimization for if no strip flags are set */
116: if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) {
117: return;
118: }
119:
120: str = (unsigned char *)Z_STRVAL_P(value);
121: buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
122: c = 0;
123: for (i = 0; i < Z_STRLEN_P(value); i++) {
124: if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
125: } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
126: } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
127: } else {
128: buf[c] = str[i];
129: ++c;
130: }
131: }
132: /* update zval string data */
133: buf[c] = '\0';
134: efree(Z_STRVAL_P(value));
135: Z_STRVAL_P(value) = (char *)buf;
136: Z_STRLEN_P(value) = c;
137: }
138: /* }}} */
139:
140: /* {{{ FILTER MAP HELPERS */
141: static void filter_map_init(filter_map *map)
142: {
143: memset(map, 0, sizeof(filter_map));
144: }
145:
146: static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
147: {
148: int l, i;
149:
150: l = strlen((const char*)allowed_list);
151: for (i = 0; i < l; ++i) {
152: (*map)[allowed_list[i]] = flag;
153: }
154: }
155:
156: static void filter_map_apply(zval *value, filter_map *map)
157: {
158: unsigned char *buf, *str;
159: int i, c;
160:
161: str = (unsigned char *)Z_STRVAL_P(value);
162: buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
163: c = 0;
164: for (i = 0; i < Z_STRLEN_P(value); i++) {
165: if ((*map)[str[i]]) {
166: buf[c] = str[i];
167: ++c;
168: }
169: }
170: /* update zval string data */
171: buf[c] = '\0';
172: efree(Z_STRVAL_P(value));
173: Z_STRVAL_P(value) = (char *)buf;
174: Z_STRLEN_P(value) = c;
175: }
176: /* }}} */
177:
178: /* {{{ php_filter_string */
179: void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
180: {
181: size_t new_len;
182: unsigned char enc[256] = {0};
183:
184: /* strip high/strip low ( see flags )*/
185: php_filter_strip(value, flags);
186:
187: if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
188: enc['\''] = enc['"'] = 1;
189: }
190: if (flags & FILTER_FLAG_ENCODE_AMP) {
191: enc['&'] = 1;
192: }
193: if (flags & FILTER_FLAG_ENCODE_LOW) {
194: memset(enc, 1, 32);
195: }
196: if (flags & FILTER_FLAG_ENCODE_HIGH) {
197: memset(enc + 127, 1, sizeof(enc) - 127);
198: }
199:
200: php_filter_encode_html(value, enc);
201:
202: /* strip tags, implicitly also removes \0 chars */
203: new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
204: Z_STRLEN_P(value) = new_len;
205:
206: if (new_len == 0) {
207: zval_dtor(value);
208: if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
209: ZVAL_NULL(value);
210: } else {
211: ZVAL_EMPTY_STRING(value);
212: }
213: return;
214: }
215: }
216: /* }}} */
217:
218: /* {{{ php_filter_encoded */
219: void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
220: {
221: /* apply strip_high and strip_low filters */
222: php_filter_strip(value, flags);
223: /* urlencode */
224: php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
225: }
226: /* }}} */
227:
228: /* {{{ php_filter_special_chars */
229: void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
230: {
231: unsigned char enc[256] = {0};
232:
233: php_filter_strip(value, flags);
234:
235: /* encodes ' " < > & \0 to numerical entities */
236: enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
237:
238: /* if strip low is not set, then we encode them as &#xx; */
239: memset(enc, 1, 32);
240:
241: if (flags & FILTER_FLAG_ENCODE_HIGH) {
242: memset(enc + 127, 1, sizeof(enc) - 127);
243: }
244:
245: php_filter_encode_html(value, enc);
246: }
247: /* }}} */
248:
249: /* {{{ php_filter_full_special_chars */
250: void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
251: {
252: char *buf;
253: int len, quotes;
254:
255: if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
256: quotes = ENT_QUOTES;
257: } else {
258: quotes = ENT_NOQUOTES;
259: }
260: buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC);
261: efree(Z_STRVAL_P(value));
262: Z_STRVAL_P(value) = buf;
263: Z_STRLEN_P(value) = len;
264: }
265: /* }}} */
266:
267: /* {{{ php_filter_unsafe_raw */
268: void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
269: {
270: /* Only if no flags are set (optimization) */
271: if (flags != 0 && Z_STRLEN_P(value) > 0) {
272: unsigned char enc[256] = {0};
273:
274: php_filter_strip(value, flags);
275:
276: if (flags & FILTER_FLAG_ENCODE_AMP) {
277: enc['&'] = 1;
278: }
279: if (flags & FILTER_FLAG_ENCODE_LOW) {
280: memset(enc, 1, 32);
281: }
282: if (flags & FILTER_FLAG_ENCODE_HIGH) {
283: memset(enc + 127, 1, sizeof(enc) - 127);
284: }
285:
286: php_filter_encode_html(value, enc);
287: } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
288: zval_dtor(value);
289: ZVAL_NULL(value);
290: }
291: }
292: /* }}} */
293:
294:
295:
296: /* {{{ php_filter_email */
297: #define SAFE "$-_.+"
298: #define EXTRA "!*'(),"
299: #define NATIONAL "{}|\\^~[]`"
300: #define PUNCTUATION "<>#%\""
301: #define RESERVED ";/?:@&="
302:
303: void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
304: {
305: /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
306: const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
307: filter_map map;
308:
309: filter_map_init(&map);
310: filter_map_update(&map, 1, allowed_list);
311: filter_map_apply(value, &map);
312: }
313: /* }}} */
314:
315: /* {{{ php_filter_url */
316: void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
317: {
318: /* Strip all chars not part of section 5 of
319: * http://www.faqs.org/rfcs/rfc1738.html */
320: const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
321: filter_map map;
322:
323: filter_map_init(&map);
324: filter_map_update(&map, 1, allowed_list);
325: filter_map_apply(value, &map);
326: }
327: /* }}} */
328:
329: /* {{{ php_filter_number_int */
330: void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
331: {
332: /* strip everything [^0-9+-] */
333: const unsigned char allowed_list[] = "+-" DIGIT;
334: filter_map map;
335:
336: filter_map_init(&map);
337: filter_map_update(&map, 1, allowed_list);
338: filter_map_apply(value, &map);
339: }
340: /* }}} */
341:
342: /* {{{ php_filter_number_float */
343: void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
344: {
345: /* strip everything [^0-9+-] */
346: const unsigned char allowed_list[] = "+-" DIGIT;
347: filter_map map;
348:
349: filter_map_init(&map);
350: filter_map_update(&map, 1, allowed_list);
351:
352: /* depending on flags, strip '.', 'e', ",", "'" */
353: if (flags & FILTER_FLAG_ALLOW_FRACTION) {
354: filter_map_update(&map, 2, (const unsigned char *) ".");
355: }
356: if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
357: filter_map_update(&map, 3, (const unsigned char *) ",");
358: }
359: if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
360: filter_map_update(&map, 4, (const unsigned char *) "eE");
361: }
362: filter_map_apply(value, &map);
363: }
364: /* }}} */
365:
366: /* {{{ php_filter_magic_quotes */
367: void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
368: {
369: char *buf;
370: int len;
371:
372: /* just call php_addslashes quotes */
373: buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC);
374:
375: efree(Z_STRVAL_P(value));
376: Z_STRVAL_P(value) = buf;
377: Z_STRLEN_P(value) = len;
378: }
379: /* }}} */
380:
381: /*
382: * Local variables:
383: * tab-width: 4
384: * c-basic-offset: 4
385: * End:
386: * vim600: noet sw=4 ts=4 fdm=marker
387: * vim<600: noet sw=4 ts=4
388: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>