File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / filter / sanitizing_filters.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:47:56 2012 UTC (12 years, 5 months ago) by misho
Branches: php, MAIN
CVS tags: v5_3_10, HEAD
php

    1: /*
    2:   +----------------------------------------------------------------------+
    3:   | PHP Version 5                                                        |
    4:   +----------------------------------------------------------------------+
    5:   | Copyright (c) 1997-2012 The PHP Group                                |
    6:   +----------------------------------------------------------------------+
    7:   | This source file is subject to version 3.01 of the PHP license,      |
    8:   | that is bundled with this package in the file LICENSE, and is        |
    9:   | available through the world-wide-web at the following url:           |
   10:   | http://www.php.net/license/3_01.txt                                  |
   11:   | If you did not receive a copy of the PHP license and are unable to   |
   12:   | obtain it through the world-wide-web, please send a note to          |
   13:   | license@php.net so we can mail you a copy immediately.               |
   14:   +----------------------------------------------------------------------+
   15:   | Authors: Derick Rethans <derick@php.net>                             |
   16:   +----------------------------------------------------------------------+
   17: */
   18: 
   19: /* $Id: sanitizing_filters.c,v 1.1.1.1 2012/02/21 23:47:56 misho Exp $ */
   20: 
   21: #include "php_filter.h"
   22: #include "filter_private.h"
   23: #include "ext/standard/php_smart_str.h"
   24: 
   25: /* {{{ STRUCTS */
   26: typedef unsigned long filter_map[256];
   27: /* }}} */
   28: 
   29: /* {{{ HELPER FUNCTIONS */
   30: static void php_filter_encode_html(zval *value, const unsigned char *chars)
   31: {
   32: 	smart_str str = {0};
   33: 	int len = Z_STRLEN_P(value);
   34: 	unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
   35: 	unsigned char *e = s + len;
   36: 
   37: 	if (Z_STRLEN_P(value) == 0) {
   38: 		return;
   39: 	}
   40: 
   41: 	while (s < e) {
   42: 		if (chars[*s]) {
   43: 			smart_str_appendl(&str, "&#", 2);
   44: 			smart_str_append_unsigned(&str, (unsigned long)*s);
   45: 			smart_str_appendc(&str, ';');
   46: 		} else {
   47: 			/* XXX: this needs to be optimized to work with blocks of 'safe' chars */
   48: 			smart_str_appendc(&str, *s);
   49: 		}
   50: 		s++;
   51: 	}
   52: 
   53: 	smart_str_0(&str);
   54: 	efree(Z_STRVAL_P(value));
   55: 	Z_STRVAL_P(value) = str.c;
   56: 	Z_STRLEN_P(value) = str.len;
   57: }
   58: 
   59: static const unsigned char hexchars[] = "0123456789ABCDEF";
   60: 
   61: #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
   62: #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
   63: #define DIGIT       "0123456789"
   64: 
   65: #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
   66: 
   67: static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
   68: {
   69: 	unsigned char *str, *p;
   70: 	unsigned char tmp[256];
   71: 	unsigned char *s = (unsigned char *)chars;
   72: 	unsigned char *e = s + char_len;
   73: 
   74: 	memset(tmp, 1, sizeof(tmp)-1);
   75: 
   76: 	while (s < e) {
   77: 		tmp[*s++] = 0;
   78: 	}
   79: /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
   80: 	if (encode_nul) {
   81: 		tmp[0] = 1;
   82: 	}
   83: 	if (high) {
   84: 		memset(tmp + 127, 1, sizeof(tmp) - 127);
   85: 	}
   86: 	if (low) {
   87: 		memset(tmp, 1, 32);
   88: 	}
   89: */
   90: 	p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1);
   91: 	s = (unsigned char *)Z_STRVAL_P(value);
   92: 	e = s + Z_STRLEN_P(value);
   93: 
   94: 	while (s < e) {
   95: 		if (tmp[*s]) {
   96: 			*p++ = '%';
   97: 			*p++ = hexchars[(unsigned char) *s >> 4];
   98: 			*p++ = hexchars[(unsigned char) *s & 15];
   99: 		} else {
  100: 			*p++ = *s;	
  101: 		}
  102: 		s++;	
  103: 	}
  104: 	*p = '\0';
  105: 	efree(Z_STRVAL_P(value));
  106: 	Z_STRVAL_P(value) = (char *)str;
  107: 	Z_STRLEN_P(value) = p - str;
  108: }
  109: 
  110: static void php_filter_strip(zval *value, long flags)
  111: {
  112: 	unsigned char *buf, *str;
  113: 	int   i, c;
  114: 	
  115: 	/* Optimization for if no strip flags are set */
  116: 	if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) {
  117: 		return;
  118: 	}
  119: 
  120: 	str = (unsigned char *)Z_STRVAL_P(value);
  121: 	buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
  122: 	c = 0;
  123: 	for (i = 0; i < Z_STRLEN_P(value); i++) {
  124: 		if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
  125: 		} else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
  126: 		} else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
  127: 		} else {
  128: 			buf[c] = str[i];
  129: 			++c;
  130: 		}
  131: 	}
  132: 	/* update zval string data */
  133: 	buf[c] = '\0';
  134: 	efree(Z_STRVAL_P(value));
  135: 	Z_STRVAL_P(value) = (char *)buf;
  136: 	Z_STRLEN_P(value) = c;
  137: }
  138: /* }}} */
  139: 
  140: /* {{{ FILTER MAP HELPERS */
  141: static void filter_map_init(filter_map *map)
  142: {
  143: 	memset(map, 0, sizeof(filter_map));
  144: }
  145: 
  146: static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
  147: {
  148: 	int l, i;
  149: 
  150: 	l = strlen((const char*)allowed_list);
  151: 	for (i = 0; i < l; ++i) {
  152: 		(*map)[allowed_list[i]] = flag;
  153: 	}
  154: }
  155: 
  156: static void filter_map_apply(zval *value, filter_map *map)
  157: {
  158: 	unsigned char *buf, *str;
  159: 	int   i, c;
  160: 	
  161: 	str = (unsigned char *)Z_STRVAL_P(value);
  162: 	buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
  163: 	c = 0;
  164: 	for (i = 0; i < Z_STRLEN_P(value); i++) {
  165: 		if ((*map)[str[i]]) {
  166: 			buf[c] = str[i];
  167: 			++c;
  168: 		}
  169: 	}
  170: 	/* update zval string data */
  171: 	buf[c] = '\0';
  172: 	efree(Z_STRVAL_P(value));
  173: 	Z_STRVAL_P(value) = (char *)buf;
  174: 	Z_STRLEN_P(value) = c;
  175: }
  176: /* }}} */
  177: 
  178: /* {{{ php_filter_string */
  179: void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
  180: {
  181: 	size_t new_len;
  182: 	unsigned char enc[256] = {0};
  183: 
  184: 	/* strip high/strip low ( see flags )*/
  185: 	php_filter_strip(value, flags);
  186: 
  187: 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
  188: 		enc['\''] = enc['"'] = 1;
  189: 	}
  190: 	if (flags & FILTER_FLAG_ENCODE_AMP) {
  191: 		enc['&'] = 1;
  192: 	}
  193: 	if (flags & FILTER_FLAG_ENCODE_LOW) {
  194: 		memset(enc, 1, 32);
  195: 	}
  196: 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
  197: 		memset(enc + 127, 1, sizeof(enc) - 127);
  198: 	}
  199: 
  200: 	php_filter_encode_html(value, enc);
  201: 
  202: 	/* strip tags, implicitly also removes \0 chars */
  203: 	new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
  204: 	Z_STRLEN_P(value) = new_len;
  205: 
  206: 	if (new_len == 0) {
  207: 		zval_dtor(value);
  208: 		if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
  209: 			ZVAL_NULL(value);
  210: 		} else {
  211: 			ZVAL_EMPTY_STRING(value);			
  212: 		}
  213: 		return;
  214: 	}
  215: }
  216: /* }}} */
  217: 
  218: /* {{{ php_filter_encoded */
  219: void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
  220: {
  221: 	/* apply strip_high and strip_low filters */
  222: 	php_filter_strip(value, flags);
  223: 	/* urlencode */
  224: 	php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
  225: }
  226: /* }}} */
  227: 
  228: /* {{{ php_filter_special_chars */
  229: void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
  230: {
  231: 	unsigned char enc[256] = {0};
  232: 
  233: 	php_filter_strip(value, flags);
  234: 
  235: 	/* encodes ' " < > & \0 to numerical entities */
  236: 	enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
  237: 
  238: 	/* if strip low is not set, then we encode them as &#xx; */
  239: 	memset(enc, 1, 32);
  240: 
  241: 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
  242: 		memset(enc + 127, 1, sizeof(enc) - 127);
  243: 	}
  244: 	
  245: 	php_filter_encode_html(value, enc);	
  246: }
  247: /* }}} */
  248: 
  249: /* {{{ php_filter_full_special_chars */
  250: void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
  251: {
  252: 	char *buf;
  253: 	int   len, quotes;
  254: 	
  255: 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
  256: 		quotes = ENT_QUOTES;
  257: 	} else {
  258: 		quotes = ENT_NOQUOTES;
  259: 	}
  260: 	buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC);
  261: 	efree(Z_STRVAL_P(value));
  262: 	Z_STRVAL_P(value) = buf;
  263: 	Z_STRLEN_P(value) = len;
  264: }
  265: /* }}} */
  266: 
  267: /* {{{ php_filter_unsafe_raw */
  268: void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
  269: {
  270: 	/* Only if no flags are set (optimization) */
  271: 	if (flags != 0 && Z_STRLEN_P(value) > 0) {
  272: 		unsigned char enc[256] = {0};
  273: 
  274: 		php_filter_strip(value, flags);
  275: 
  276: 		if (flags & FILTER_FLAG_ENCODE_AMP) {
  277: 			enc['&'] = 1;
  278: 		}
  279: 		if (flags & FILTER_FLAG_ENCODE_LOW) {
  280: 			memset(enc, 1, 32);
  281: 		}
  282: 		if (flags & FILTER_FLAG_ENCODE_HIGH) {
  283: 			memset(enc + 127, 1, sizeof(enc) - 127);
  284: 		}
  285: 
  286: 		php_filter_encode_html(value, enc);	
  287: 	} else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
  288: 		zval_dtor(value);
  289: 		ZVAL_NULL(value);
  290: 	}
  291: }
  292: /* }}} */
  293: 
  294: 
  295: 
  296: /* {{{ php_filter_email */
  297: #define SAFE        "$-_.+"
  298: #define EXTRA       "!*'(),"
  299: #define NATIONAL    "{}|\\^~[]`"
  300: #define PUNCTUATION "<>#%\""
  301: #define RESERVED    ";/?:@&="
  302: 
  303: void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
  304: {
  305: 	/* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
  306: 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
  307: 	filter_map     map;
  308: 
  309: 	filter_map_init(&map);
  310: 	filter_map_update(&map, 1, allowed_list);
  311: 	filter_map_apply(value, &map);
  312: }
  313: /* }}} */
  314: 
  315: /* {{{ php_filter_url */
  316: void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
  317: {
  318: 	/* Strip all chars not part of section 5 of
  319: 	 * http://www.faqs.org/rfcs/rfc1738.html */
  320: 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
  321: 	filter_map     map;
  322: 
  323: 	filter_map_init(&map);
  324: 	filter_map_update(&map, 1, allowed_list);
  325: 	filter_map_apply(value, &map);
  326: }
  327: /* }}} */
  328: 
  329: /* {{{ php_filter_number_int */
  330: void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
  331: {
  332: 	/* strip everything [^0-9+-] */
  333: 	const unsigned char allowed_list[] = "+-" DIGIT;
  334: 	filter_map     map;
  335: 
  336: 	filter_map_init(&map);
  337: 	filter_map_update(&map, 1, allowed_list);
  338: 	filter_map_apply(value, &map);
  339: }
  340: /* }}} */
  341: 
  342: /* {{{ php_filter_number_float */
  343: void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
  344: {
  345: 	/* strip everything [^0-9+-] */
  346: 	const unsigned char allowed_list[] = "+-" DIGIT;
  347: 	filter_map     map;
  348: 
  349: 	filter_map_init(&map);
  350: 	filter_map_update(&map, 1, allowed_list);
  351: 
  352: 	/* depending on flags, strip '.', 'e', ",", "'" */
  353: 	if (flags & FILTER_FLAG_ALLOW_FRACTION) {
  354: 		filter_map_update(&map, 2, (const unsigned char *) ".");
  355: 	}
  356: 	if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
  357: 		filter_map_update(&map, 3,  (const unsigned char *) ",");
  358: 	}
  359: 	if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
  360: 		filter_map_update(&map, 4,  (const unsigned char *) "eE");
  361: 	}
  362: 	filter_map_apply(value, &map);
  363: }
  364: /* }}} */
  365: 
  366: /* {{{ php_filter_magic_quotes */
  367: void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
  368: {
  369: 	char *buf;
  370: 	int   len;
  371: 	
  372: 	/* just call php_addslashes quotes */
  373: 	buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC);
  374: 
  375: 	efree(Z_STRVAL_P(value));
  376: 	Z_STRVAL_P(value) = buf;
  377: 	Z_STRLEN_P(value) = len;
  378: }
  379: /* }}} */
  380: 
  381: /*
  382:  * Local variables:
  383:  * tab-width: 4
  384:  * c-basic-offset: 4
  385:  * End:
  386:  * vim600: noet sw=4 ts=4 fdm=marker
  387:  * vim<600: noet sw=4 ts=4
  388:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>