File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / filter / sanitizing_filters.c
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 20:03:48 2014 UTC (10 years, 1 month ago) by misho
Branches: php, MAIN
CVS tags: v5_4_29, HEAD
php 5.4.29

    1: /*
    2:   +----------------------------------------------------------------------+
    3:   | PHP Version 5                                                        |
    4:   +----------------------------------------------------------------------+
    5:   | Copyright (c) 1997-2014 The PHP Group                                |
    6:   +----------------------------------------------------------------------+
    7:   | This source file is subject to version 3.01 of the PHP license,      |
    8:   | that is bundled with this package in the file LICENSE, and is        |
    9:   | available through the world-wide-web at the following url:           |
   10:   | http://www.php.net/license/3_01.txt                                  |
   11:   | If you did not receive a copy of the PHP license and are unable to   |
   12:   | obtain it through the world-wide-web, please send a note to          |
   13:   | license@php.net so we can mail you a copy immediately.               |
   14:   +----------------------------------------------------------------------+
   15:   | Authors: Derick Rethans <derick@php.net>                             |
   16:   +----------------------------------------------------------------------+
   17: */
   18: 
   19: /* $Id: sanitizing_filters.c,v 1.1.1.4 2014/06/15 20:03:48 misho Exp $ */
   20: 
   21: #include "php_filter.h"
   22: #include "filter_private.h"
   23: #include "ext/standard/php_smart_str.h"
   24: 
   25: /* {{{ STRUCTS */
   26: typedef unsigned long filter_map[256];
   27: /* }}} */
   28: 
   29: /* {{{ HELPER FUNCTIONS */
   30: static void php_filter_encode_html(zval *value, const unsigned char *chars)
   31: {
   32: 	smart_str str = {0};
   33: 	int len = Z_STRLEN_P(value);
   34: 	unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
   35: 	unsigned char *e = s + len;
   36: 
   37: 	if (Z_STRLEN_P(value) == 0) {
   38: 		return;
   39: 	}
   40: 
   41: 	while (s < e) {
   42: 		if (chars[*s]) {
   43: 			smart_str_appendl(&str, "&#", 2);
   44: 			smart_str_append_unsigned(&str, (unsigned long)*s);
   45: 			smart_str_appendc(&str, ';');
   46: 		} else {
   47: 			/* XXX: this needs to be optimized to work with blocks of 'safe' chars */
   48: 			smart_str_appendc(&str, *s);
   49: 		}
   50: 		s++;
   51: 	}
   52: 
   53: 	smart_str_0(&str);
   54: 	str_efree(Z_STRVAL_P(value));
   55: 	Z_STRVAL_P(value) = str.c;
   56: 	Z_STRLEN_P(value) = str.len;
   57: }
   58: 
   59: static const unsigned char hexchars[] = "0123456789ABCDEF";
   60: 
   61: #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
   62: #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
   63: #define DIGIT       "0123456789"
   64: 
   65: #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
   66: 
   67: static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
   68: {
   69: 	unsigned char *str, *p;
   70: 	unsigned char tmp[256];
   71: 	unsigned char *s = (unsigned char *)chars;
   72: 	unsigned char *e = s + char_len;
   73: 
   74: 	memset(tmp, 1, sizeof(tmp)-1);
   75: 
   76: 	while (s < e) {
   77: 		tmp[*s++] = 0;
   78: 	}
   79: /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
   80: 	if (encode_nul) {
   81: 		tmp[0] = 1;
   82: 	}
   83: 	if (high) {
   84: 		memset(tmp + 127, 1, sizeof(tmp) - 127);
   85: 	}
   86: 	if (low) {
   87: 		memset(tmp, 1, 32);
   88: 	}
   89: */
   90: 	p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1);
   91: 	s = (unsigned char *)Z_STRVAL_P(value);
   92: 	e = s + Z_STRLEN_P(value);
   93: 
   94: 	while (s < e) {
   95: 		if (tmp[*s]) {
   96: 			*p++ = '%';
   97: 			*p++ = hexchars[(unsigned char) *s >> 4];
   98: 			*p++ = hexchars[(unsigned char) *s & 15];
   99: 		} else {
  100: 			*p++ = *s;	
  101: 		}
  102: 		s++;	
  103: 	}
  104: 	*p = '\0';
  105: 	str_efree(Z_STRVAL_P(value));
  106: 	Z_STRVAL_P(value) = (char *)str;
  107: 	Z_STRLEN_P(value) = p - str;
  108: }
  109: 
  110: static void php_filter_strip(zval *value, long flags)
  111: {
  112: 	unsigned char *buf, *str;
  113: 	int   i, c;
  114: 	
  115: 	/* Optimization for if no strip flags are set */
  116: 	if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) {
  117: 		return;
  118: 	}
  119: 
  120: 	str = (unsigned char *)Z_STRVAL_P(value);
  121: 	buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
  122: 	c = 0;
  123: 	for (i = 0; i < Z_STRLEN_P(value); i++) {
  124: 		if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
  125: 		} else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
  126: 		} else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
  127: 		} else {
  128: 			buf[c] = str[i];
  129: 			++c;
  130: 		}
  131: 	}
  132: 	/* update zval string data */
  133: 	buf[c] = '\0';
  134: 	str_efree(Z_STRVAL_P(value));
  135: 	Z_STRVAL_P(value) = (char *)buf;
  136: 	Z_STRLEN_P(value) = c;
  137: }
  138: /* }}} */
  139: 
  140: /* {{{ FILTER MAP HELPERS */
  141: static void filter_map_init(filter_map *map)
  142: {
  143: 	memset(map, 0, sizeof(filter_map));
  144: }
  145: 
  146: static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
  147: {
  148: 	int l, i;
  149: 
  150: 	l = strlen((const char*)allowed_list);
  151: 	for (i = 0; i < l; ++i) {
  152: 		(*map)[allowed_list[i]] = flag;
  153: 	}
  154: }
  155: 
  156: static void filter_map_apply(zval *value, filter_map *map)
  157: {
  158: 	unsigned char *buf, *str;
  159: 	int   i, c;
  160: 	
  161: 	str = (unsigned char *)Z_STRVAL_P(value);
  162: 	buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
  163: 	c = 0;
  164: 	for (i = 0; i < Z_STRLEN_P(value); i++) {
  165: 		if ((*map)[str[i]]) {
  166: 			buf[c] = str[i];
  167: 			++c;
  168: 		}
  169: 	}
  170: 	/* update zval string data */
  171: 	buf[c] = '\0';
  172: 	str_efree(Z_STRVAL_P(value));
  173: 	Z_STRVAL_P(value) = (char *)buf;
  174: 	Z_STRLEN_P(value) = c;
  175: }
  176: /* }}} */
  177: 
  178: /* {{{ php_filter_string */
  179: void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
  180: {
  181: 	size_t new_len;
  182: 	unsigned char enc[256] = {0};
  183: 
  184: 	/* strip high/strip low ( see flags )*/
  185: 	php_filter_strip(value, flags);
  186: 
  187: 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
  188: 		enc['\''] = enc['"'] = 1;
  189: 	}
  190: 	if (flags & FILTER_FLAG_ENCODE_AMP) {
  191: 		enc['&'] = 1;
  192: 	}
  193: 	if (flags & FILTER_FLAG_ENCODE_LOW) {
  194: 		memset(enc, 1, 32);
  195: 	}
  196: 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
  197: 		memset(enc + 127, 1, sizeof(enc) - 127);
  198: 	}
  199: 
  200: 	php_filter_encode_html(value, enc);
  201: 
  202: 	/* strip tags, implicitly also removes \0 chars */
  203: 	new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
  204: 	Z_STRLEN_P(value) = new_len;
  205: 
  206: 	if (new_len == 0) {
  207: 		zval_dtor(value);
  208: 		if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
  209: 			ZVAL_NULL(value);
  210: 		} else {
  211: 			ZVAL_EMPTY_STRING(value);			
  212: 		}
  213: 		return;
  214: 	}
  215: }
  216: /* }}} */
  217: 
  218: /* {{{ php_filter_encoded */
  219: void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
  220: {
  221: 	/* apply strip_high and strip_low filters */
  222: 	php_filter_strip(value, flags);
  223: 	/* urlencode */
  224: 	php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
  225: }
  226: /* }}} */
  227: 
  228: /* {{{ php_filter_special_chars */
  229: void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
  230: {
  231: 	unsigned char enc[256] = {0};
  232: 
  233: 	php_filter_strip(value, flags);
  234: 
  235: 	/* encodes ' " < > & \0 to numerical entities */
  236: 	enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
  237: 
  238: 	/* if strip low is not set, then we encode them as &#xx; */
  239: 	memset(enc, 1, 32);
  240: 
  241: 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
  242: 		memset(enc + 127, 1, sizeof(enc) - 127);
  243: 	}
  244: 	
  245: 	php_filter_encode_html(value, enc);	
  246: }
  247: /* }}} */
  248: 
  249: /* {{{ php_filter_full_special_chars */
  250: void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
  251: {
  252: 	char *buf;
  253: 	size_t len;
  254: 	int quotes;
  255: 	
  256: 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
  257: 		quotes = ENT_QUOTES;
  258: 	} else {
  259: 		quotes = ENT_NOQUOTES;
  260: 	}
  261: 	buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC);
  262: 	str_efree(Z_STRVAL_P(value));
  263: 	Z_STRVAL_P(value) = buf;
  264: 	Z_STRLEN_P(value) = len;
  265: }
  266: /* }}} */
  267: 
  268: /* {{{ php_filter_unsafe_raw */
  269: void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
  270: {
  271: 	/* Only if no flags are set (optimization) */
  272: 	if (flags != 0 && Z_STRLEN_P(value) > 0) {
  273: 		unsigned char enc[256] = {0};
  274: 
  275: 		php_filter_strip(value, flags);
  276: 
  277: 		if (flags & FILTER_FLAG_ENCODE_AMP) {
  278: 			enc['&'] = 1;
  279: 		}
  280: 		if (flags & FILTER_FLAG_ENCODE_LOW) {
  281: 			memset(enc, 1, 32);
  282: 		}
  283: 		if (flags & FILTER_FLAG_ENCODE_HIGH) {
  284: 			memset(enc + 127, 1, sizeof(enc) - 127);
  285: 		}
  286: 
  287: 		php_filter_encode_html(value, enc);	
  288: 	} else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
  289: 		zval_dtor(value);
  290: 		ZVAL_NULL(value);
  291: 	}
  292: }
  293: /* }}} */
  294: 
  295: 
  296: 
  297: /* {{{ php_filter_email */
  298: #define SAFE        "$-_.+"
  299: #define EXTRA       "!*'(),"
  300: #define NATIONAL    "{}|\\^~[]`"
  301: #define PUNCTUATION "<>#%\""
  302: #define RESERVED    ";/?:@&="
  303: 
  304: void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
  305: {
  306: 	/* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
  307: 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
  308: 	filter_map     map;
  309: 
  310: 	filter_map_init(&map);
  311: 	filter_map_update(&map, 1, allowed_list);
  312: 	filter_map_apply(value, &map);
  313: }
  314: /* }}} */
  315: 
  316: /* {{{ php_filter_url */
  317: void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
  318: {
  319: 	/* Strip all chars not part of section 5 of
  320: 	 * http://www.faqs.org/rfcs/rfc1738.html */
  321: 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
  322: 	filter_map     map;
  323: 
  324: 	filter_map_init(&map);
  325: 	filter_map_update(&map, 1, allowed_list);
  326: 	filter_map_apply(value, &map);
  327: }
  328: /* }}} */
  329: 
  330: /* {{{ php_filter_number_int */
  331: void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
  332: {
  333: 	/* strip everything [^0-9+-] */
  334: 	const unsigned char allowed_list[] = "+-" DIGIT;
  335: 	filter_map     map;
  336: 
  337: 	filter_map_init(&map);
  338: 	filter_map_update(&map, 1, allowed_list);
  339: 	filter_map_apply(value, &map);
  340: }
  341: /* }}} */
  342: 
  343: /* {{{ php_filter_number_float */
  344: void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
  345: {
  346: 	/* strip everything [^0-9+-] */
  347: 	const unsigned char allowed_list[] = "+-" DIGIT;
  348: 	filter_map     map;
  349: 
  350: 	filter_map_init(&map);
  351: 	filter_map_update(&map, 1, allowed_list);
  352: 
  353: 	/* depending on flags, strip '.', 'e', ",", "'" */
  354: 	if (flags & FILTER_FLAG_ALLOW_FRACTION) {
  355: 		filter_map_update(&map, 2, (const unsigned char *) ".");
  356: 	}
  357: 	if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
  358: 		filter_map_update(&map, 3,  (const unsigned char *) ",");
  359: 	}
  360: 	if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
  361: 		filter_map_update(&map, 4,  (const unsigned char *) "eE");
  362: 	}
  363: 	filter_map_apply(value, &map);
  364: }
  365: /* }}} */
  366: 
  367: /* {{{ php_filter_magic_quotes */
  368: void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
  369: {
  370: 	char *buf;
  371: 	int   len;
  372: 	
  373: 	/* just call php_addslashes quotes */
  374: 	buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC);
  375: 
  376: 	str_efree(Z_STRVAL_P(value));
  377: 	Z_STRVAL_P(value) = buf;
  378: 	Z_STRLEN_P(value) = len;
  379: }
  380: /* }}} */
  381: 
  382: /*
  383:  * Local variables:
  384:  * tab-width: 4
  385:  * c-basic-offset: 4
  386:  * End:
  387:  * vim600: noet sw=4 ts=4 fdm=marker
  388:  * vim<600: noet sw=4 ts=4
  389:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>