Annotation of embedaddon/php/ext/filter/sanitizing_filters.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:   +----------------------------------------------------------------------+
                      3:   | PHP Version 5                                                        |
                      4:   +----------------------------------------------------------------------+
                      5:   | Copyright (c) 1997-2012 The PHP Group                                |
                      6:   +----------------------------------------------------------------------+
                      7:   | This source file is subject to version 3.01 of the PHP license,      |
                      8:   | that is bundled with this package in the file LICENSE, and is        |
                      9:   | available through the world-wide-web at the following url:           |
                     10:   | http://www.php.net/license/3_01.txt                                  |
                     11:   | If you did not receive a copy of the PHP license and are unable to   |
                     12:   | obtain it through the world-wide-web, please send a note to          |
                     13:   | license@php.net so we can mail you a copy immediately.               |
                     14:   +----------------------------------------------------------------------+
                     15:   | Authors: Derick Rethans <derick@php.net>                             |
                     16:   +----------------------------------------------------------------------+
                     17: */
                     18: 
                     19: /* $Id: sanitizing_filters.c 321634 2012-01-01 13:15:04Z felipe $ */
                     20: 
                     21: #include "php_filter.h"
                     22: #include "filter_private.h"
                     23: #include "ext/standard/php_smart_str.h"
                     24: 
                     25: /* {{{ STRUCTS */
                     26: typedef unsigned long filter_map[256];
                     27: /* }}} */
                     28: 
                     29: /* {{{ HELPER FUNCTIONS */
                     30: static void php_filter_encode_html(zval *value, const unsigned char *chars)
                     31: {
                     32:        smart_str str = {0};
                     33:        int len = Z_STRLEN_P(value);
                     34:        unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
                     35:        unsigned char *e = s + len;
                     36: 
                     37:        if (Z_STRLEN_P(value) == 0) {
                     38:                return;
                     39:        }
                     40: 
                     41:        while (s < e) {
                     42:                if (chars[*s]) {
                     43:                        smart_str_appendl(&str, "&#", 2);
                     44:                        smart_str_append_unsigned(&str, (unsigned long)*s);
                     45:                        smart_str_appendc(&str, ';');
                     46:                } else {
                     47:                        /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
                     48:                        smart_str_appendc(&str, *s);
                     49:                }
                     50:                s++;
                     51:        }
                     52: 
                     53:        smart_str_0(&str);
                     54:        efree(Z_STRVAL_P(value));
                     55:        Z_STRVAL_P(value) = str.c;
                     56:        Z_STRLEN_P(value) = str.len;
                     57: }
                     58: 
                     59: static const unsigned char hexchars[] = "0123456789ABCDEF";
                     60: 
                     61: #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
                     62: #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                     63: #define DIGIT       "0123456789"
                     64: 
                     65: #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
                     66: 
                     67: static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
                     68: {
                     69:        unsigned char *str, *p;
                     70:        unsigned char tmp[256];
                     71:        unsigned char *s = (unsigned char *)chars;
                     72:        unsigned char *e = s + char_len;
                     73: 
                     74:        memset(tmp, 1, sizeof(tmp)-1);
                     75: 
                     76:        while (s < e) {
                     77:                tmp[*s++] = 0;
                     78:        }
                     79: /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
                     80:        if (encode_nul) {
                     81:                tmp[0] = 1;
                     82:        }
                     83:        if (high) {
                     84:                memset(tmp + 127, 1, sizeof(tmp) - 127);
                     85:        }
                     86:        if (low) {
                     87:                memset(tmp, 1, 32);
                     88:        }
                     89: */
                     90:        p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1);
                     91:        s = (unsigned char *)Z_STRVAL_P(value);
                     92:        e = s + Z_STRLEN_P(value);
                     93: 
                     94:        while (s < e) {
                     95:                if (tmp[*s]) {
                     96:                        *p++ = '%';
                     97:                        *p++ = hexchars[(unsigned char) *s >> 4];
                     98:                        *p++ = hexchars[(unsigned char) *s & 15];
                     99:                } else {
                    100:                        *p++ = *s;      
                    101:                }
                    102:                s++;    
                    103:        }
                    104:        *p = '\0';
                    105:        efree(Z_STRVAL_P(value));
                    106:        Z_STRVAL_P(value) = (char *)str;
                    107:        Z_STRLEN_P(value) = p - str;
                    108: }
                    109: 
                    110: static void php_filter_strip(zval *value, long flags)
                    111: {
                    112:        unsigned char *buf, *str;
                    113:        int   i, c;
                    114:        
                    115:        /* Optimization for if no strip flags are set */
                    116:        if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) {
                    117:                return;
                    118:        }
                    119: 
                    120:        str = (unsigned char *)Z_STRVAL_P(value);
                    121:        buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
                    122:        c = 0;
                    123:        for (i = 0; i < Z_STRLEN_P(value); i++) {
                    124:                if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
                    125:                } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
                    126:                } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
                    127:                } else {
                    128:                        buf[c] = str[i];
                    129:                        ++c;
                    130:                }
                    131:        }
                    132:        /* update zval string data */
                    133:        buf[c] = '\0';
                    134:        efree(Z_STRVAL_P(value));
                    135:        Z_STRVAL_P(value) = (char *)buf;
                    136:        Z_STRLEN_P(value) = c;
                    137: }
                    138: /* }}} */
                    139: 
                    140: /* {{{ FILTER MAP HELPERS */
                    141: static void filter_map_init(filter_map *map)
                    142: {
                    143:        memset(map, 0, sizeof(filter_map));
                    144: }
                    145: 
                    146: static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
                    147: {
                    148:        int l, i;
                    149: 
                    150:        l = strlen((const char*)allowed_list);
                    151:        for (i = 0; i < l; ++i) {
                    152:                (*map)[allowed_list[i]] = flag;
                    153:        }
                    154: }
                    155: 
                    156: static void filter_map_apply(zval *value, filter_map *map)
                    157: {
                    158:        unsigned char *buf, *str;
                    159:        int   i, c;
                    160:        
                    161:        str = (unsigned char *)Z_STRVAL_P(value);
                    162:        buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
                    163:        c = 0;
                    164:        for (i = 0; i < Z_STRLEN_P(value); i++) {
                    165:                if ((*map)[str[i]]) {
                    166:                        buf[c] = str[i];
                    167:                        ++c;
                    168:                }
                    169:        }
                    170:        /* update zval string data */
                    171:        buf[c] = '\0';
                    172:        efree(Z_STRVAL_P(value));
                    173:        Z_STRVAL_P(value) = (char *)buf;
                    174:        Z_STRLEN_P(value) = c;
                    175: }
                    176: /* }}} */
                    177: 
                    178: /* {{{ php_filter_string */
                    179: void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
                    180: {
                    181:        size_t new_len;
                    182:        unsigned char enc[256] = {0};
                    183: 
                    184:        /* strip high/strip low ( see flags )*/
                    185:        php_filter_strip(value, flags);
                    186: 
                    187:        if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
                    188:                enc['\''] = enc['"'] = 1;
                    189:        }
                    190:        if (flags & FILTER_FLAG_ENCODE_AMP) {
                    191:                enc['&'] = 1;
                    192:        }
                    193:        if (flags & FILTER_FLAG_ENCODE_LOW) {
                    194:                memset(enc, 1, 32);
                    195:        }
                    196:        if (flags & FILTER_FLAG_ENCODE_HIGH) {
                    197:                memset(enc + 127, 1, sizeof(enc) - 127);
                    198:        }
                    199: 
                    200:        php_filter_encode_html(value, enc);
                    201: 
                    202:        /* strip tags, implicitly also removes \0 chars */
                    203:        new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
                    204:        Z_STRLEN_P(value) = new_len;
                    205: 
                    206:        if (new_len == 0) {
                    207:                zval_dtor(value);
                    208:                if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
                    209:                        ZVAL_NULL(value);
                    210:                } else {
                    211:                        ZVAL_EMPTY_STRING(value);                       
                    212:                }
                    213:                return;
                    214:        }
                    215: }
                    216: /* }}} */
                    217: 
                    218: /* {{{ php_filter_encoded */
                    219: void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
                    220: {
                    221:        /* apply strip_high and strip_low filters */
                    222:        php_filter_strip(value, flags);
                    223:        /* urlencode */
                    224:        php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
                    225: }
                    226: /* }}} */
                    227: 
                    228: /* {{{ php_filter_special_chars */
                    229: void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
                    230: {
                    231:        unsigned char enc[256] = {0};
                    232: 
                    233:        php_filter_strip(value, flags);
                    234: 
                    235:        /* encodes ' " < > & \0 to numerical entities */
                    236:        enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
                    237: 
                    238:        /* if strip low is not set, then we encode them as &#xx; */
                    239:        memset(enc, 1, 32);
                    240: 
                    241:        if (flags & FILTER_FLAG_ENCODE_HIGH) {
                    242:                memset(enc + 127, 1, sizeof(enc) - 127);
                    243:        }
                    244:        
                    245:        php_filter_encode_html(value, enc);     
                    246: }
                    247: /* }}} */
                    248: 
                    249: /* {{{ php_filter_full_special_chars */
                    250: void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
                    251: {
                    252:        char *buf;
                    253:        int   len, quotes;
                    254:        
                    255:        if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
                    256:                quotes = ENT_QUOTES;
                    257:        } else {
                    258:                quotes = ENT_NOQUOTES;
                    259:        }
                    260:        buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC);
                    261:        efree(Z_STRVAL_P(value));
                    262:        Z_STRVAL_P(value) = buf;
                    263:        Z_STRLEN_P(value) = len;
                    264: }
                    265: /* }}} */
                    266: 
                    267: /* {{{ php_filter_unsafe_raw */
                    268: void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
                    269: {
                    270:        /* Only if no flags are set (optimization) */
                    271:        if (flags != 0 && Z_STRLEN_P(value) > 0) {
                    272:                unsigned char enc[256] = {0};
                    273: 
                    274:                php_filter_strip(value, flags);
                    275: 
                    276:                if (flags & FILTER_FLAG_ENCODE_AMP) {
                    277:                        enc['&'] = 1;
                    278:                }
                    279:                if (flags & FILTER_FLAG_ENCODE_LOW) {
                    280:                        memset(enc, 1, 32);
                    281:                }
                    282:                if (flags & FILTER_FLAG_ENCODE_HIGH) {
                    283:                        memset(enc + 127, 1, sizeof(enc) - 127);
                    284:                }
                    285: 
                    286:                php_filter_encode_html(value, enc);     
                    287:        } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
                    288:                zval_dtor(value);
                    289:                ZVAL_NULL(value);
                    290:        }
                    291: }
                    292: /* }}} */
                    293: 
                    294: 
                    295: 
                    296: /* {{{ php_filter_email */
                    297: #define SAFE        "$-_.+"
                    298: #define EXTRA       "!*'(),"
                    299: #define NATIONAL    "{}|\\^~[]`"
                    300: #define PUNCTUATION "<>#%\""
                    301: #define RESERVED    ";/?:@&="
                    302: 
                    303: void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
                    304: {
                    305:        /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
                    306:        const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
                    307:        filter_map     map;
                    308: 
                    309:        filter_map_init(&map);
                    310:        filter_map_update(&map, 1, allowed_list);
                    311:        filter_map_apply(value, &map);
                    312: }
                    313: /* }}} */
                    314: 
                    315: /* {{{ php_filter_url */
                    316: void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
                    317: {
                    318:        /* Strip all chars not part of section 5 of
                    319:         * http://www.faqs.org/rfcs/rfc1738.html */
                    320:        const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
                    321:        filter_map     map;
                    322: 
                    323:        filter_map_init(&map);
                    324:        filter_map_update(&map, 1, allowed_list);
                    325:        filter_map_apply(value, &map);
                    326: }
                    327: /* }}} */
                    328: 
                    329: /* {{{ php_filter_number_int */
                    330: void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
                    331: {
                    332:        /* strip everything [^0-9+-] */
                    333:        const unsigned char allowed_list[] = "+-" DIGIT;
                    334:        filter_map     map;
                    335: 
                    336:        filter_map_init(&map);
                    337:        filter_map_update(&map, 1, allowed_list);
                    338:        filter_map_apply(value, &map);
                    339: }
                    340: /* }}} */
                    341: 
                    342: /* {{{ php_filter_number_float */
                    343: void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
                    344: {
                    345:        /* strip everything [^0-9+-] */
                    346:        const unsigned char allowed_list[] = "+-" DIGIT;
                    347:        filter_map     map;
                    348: 
                    349:        filter_map_init(&map);
                    350:        filter_map_update(&map, 1, allowed_list);
                    351: 
                    352:        /* depending on flags, strip '.', 'e', ",", "'" */
                    353:        if (flags & FILTER_FLAG_ALLOW_FRACTION) {
                    354:                filter_map_update(&map, 2, (const unsigned char *) ".");
                    355:        }
                    356:        if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
                    357:                filter_map_update(&map, 3,  (const unsigned char *) ",");
                    358:        }
                    359:        if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
                    360:                filter_map_update(&map, 4,  (const unsigned char *) "eE");
                    361:        }
                    362:        filter_map_apply(value, &map);
                    363: }
                    364: /* }}} */
                    365: 
                    366: /* {{{ php_filter_magic_quotes */
                    367: void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
                    368: {
                    369:        char *buf;
                    370:        int   len;
                    371:        
                    372:        /* just call php_addslashes quotes */
                    373:        buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC);
                    374: 
                    375:        efree(Z_STRVAL_P(value));
                    376:        Z_STRVAL_P(value) = buf;
                    377:        Z_STRLEN_P(value) = len;
                    378: }
                    379: /* }}} */
                    380: 
                    381: /*
                    382:  * Local variables:
                    383:  * tab-width: 4
                    384:  * c-basic-offset: 4
                    385:  * End:
                    386:  * vim600: noet sw=4 ts=4 fdm=marker
                    387:  * vim<600: noet sw=4 ts=4
                    388:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>