Annotation of embedaddon/php/ext/filter/sanitizing_filters.c, revision 1.1.1.2

1.1       misho       1: /*
                      2:   +----------------------------------------------------------------------+
                      3:   | PHP Version 5                                                        |
                      4:   +----------------------------------------------------------------------+
                      5:   | Copyright (c) 1997-2012 The PHP Group                                |
                      6:   +----------------------------------------------------------------------+
                      7:   | This source file is subject to version 3.01 of the PHP license,      |
                      8:   | that is bundled with this package in the file LICENSE, and is        |
                      9:   | available through the world-wide-web at the following url:           |
                     10:   | http://www.php.net/license/3_01.txt                                  |
                     11:   | If you did not receive a copy of the PHP license and are unable to   |
                     12:   | obtain it through the world-wide-web, please send a note to          |
                     13:   | license@php.net so we can mail you a copy immediately.               |
                     14:   +----------------------------------------------------------------------+
                     15:   | Authors: Derick Rethans <derick@php.net>                             |
                     16:   +----------------------------------------------------------------------+
                     17: */
                     18: 
1.1.1.2 ! misho      19: /* $Id$ */
1.1       misho      20: 
                     21: #include "php_filter.h"
                     22: #include "filter_private.h"
                     23: #include "ext/standard/php_smart_str.h"
                     24: 
                     25: /* {{{ STRUCTS */
                     26: typedef unsigned long filter_map[256];
                     27: /* }}} */
                     28: 
                     29: /* {{{ HELPER FUNCTIONS */
                     30: static void php_filter_encode_html(zval *value, const unsigned char *chars)
                     31: {
                     32:        smart_str str = {0};
                     33:        int len = Z_STRLEN_P(value);
                     34:        unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
                     35:        unsigned char *e = s + len;
                     36: 
                     37:        if (Z_STRLEN_P(value) == 0) {
                     38:                return;
                     39:        }
                     40: 
                     41:        while (s < e) {
                     42:                if (chars[*s]) {
                     43:                        smart_str_appendl(&str, "&#", 2);
                     44:                        smart_str_append_unsigned(&str, (unsigned long)*s);
                     45:                        smart_str_appendc(&str, ';');
                     46:                } else {
                     47:                        /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
                     48:                        smart_str_appendc(&str, *s);
                     49:                }
                     50:                s++;
                     51:        }
                     52: 
                     53:        smart_str_0(&str);
1.1.1.2 ! misho      54:        str_efree(Z_STRVAL_P(value));
1.1       misho      55:        Z_STRVAL_P(value) = str.c;
                     56:        Z_STRLEN_P(value) = str.len;
                     57: }
                     58: 
                     59: static const unsigned char hexchars[] = "0123456789ABCDEF";
                     60: 
                     61: #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
                     62: #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                     63: #define DIGIT       "0123456789"
                     64: 
                     65: #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
                     66: 
                     67: static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
                     68: {
                     69:        unsigned char *str, *p;
                     70:        unsigned char tmp[256];
                     71:        unsigned char *s = (unsigned char *)chars;
                     72:        unsigned char *e = s + char_len;
                     73: 
                     74:        memset(tmp, 1, sizeof(tmp)-1);
                     75: 
                     76:        while (s < e) {
                     77:                tmp[*s++] = 0;
                     78:        }
                     79: /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
                     80:        if (encode_nul) {
                     81:                tmp[0] = 1;
                     82:        }
                     83:        if (high) {
                     84:                memset(tmp + 127, 1, sizeof(tmp) - 127);
                     85:        }
                     86:        if (low) {
                     87:                memset(tmp, 1, 32);
                     88:        }
                     89: */
                     90:        p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1);
                     91:        s = (unsigned char *)Z_STRVAL_P(value);
                     92:        e = s + Z_STRLEN_P(value);
                     93: 
                     94:        while (s < e) {
                     95:                if (tmp[*s]) {
                     96:                        *p++ = '%';
                     97:                        *p++ = hexchars[(unsigned char) *s >> 4];
                     98:                        *p++ = hexchars[(unsigned char) *s & 15];
                     99:                } else {
                    100:                        *p++ = *s;      
                    101:                }
                    102:                s++;    
                    103:        }
                    104:        *p = '\0';
1.1.1.2 ! misho     105:        str_efree(Z_STRVAL_P(value));
1.1       misho     106:        Z_STRVAL_P(value) = (char *)str;
                    107:        Z_STRLEN_P(value) = p - str;
                    108: }
                    109: 
                    110: static void php_filter_strip(zval *value, long flags)
                    111: {
                    112:        unsigned char *buf, *str;
                    113:        int   i, c;
                    114:        
                    115:        /* Optimization for if no strip flags are set */
                    116:        if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) {
                    117:                return;
                    118:        }
                    119: 
                    120:        str = (unsigned char *)Z_STRVAL_P(value);
                    121:        buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
                    122:        c = 0;
                    123:        for (i = 0; i < Z_STRLEN_P(value); i++) {
                    124:                if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
                    125:                } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
                    126:                } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
                    127:                } else {
                    128:                        buf[c] = str[i];
                    129:                        ++c;
                    130:                }
                    131:        }
                    132:        /* update zval string data */
                    133:        buf[c] = '\0';
1.1.1.2 ! misho     134:        str_efree(Z_STRVAL_P(value));
1.1       misho     135:        Z_STRVAL_P(value) = (char *)buf;
                    136:        Z_STRLEN_P(value) = c;
                    137: }
                    138: /* }}} */
                    139: 
                    140: /* {{{ FILTER MAP HELPERS */
                    141: static void filter_map_init(filter_map *map)
                    142: {
                    143:        memset(map, 0, sizeof(filter_map));
                    144: }
                    145: 
                    146: static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
                    147: {
                    148:        int l, i;
                    149: 
                    150:        l = strlen((const char*)allowed_list);
                    151:        for (i = 0; i < l; ++i) {
                    152:                (*map)[allowed_list[i]] = flag;
                    153:        }
                    154: }
                    155: 
                    156: static void filter_map_apply(zval *value, filter_map *map)
                    157: {
                    158:        unsigned char *buf, *str;
                    159:        int   i, c;
                    160:        
                    161:        str = (unsigned char *)Z_STRVAL_P(value);
                    162:        buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
                    163:        c = 0;
                    164:        for (i = 0; i < Z_STRLEN_P(value); i++) {
                    165:                if ((*map)[str[i]]) {
                    166:                        buf[c] = str[i];
                    167:                        ++c;
                    168:                }
                    169:        }
                    170:        /* update zval string data */
                    171:        buf[c] = '\0';
1.1.1.2 ! misho     172:        str_efree(Z_STRVAL_P(value));
1.1       misho     173:        Z_STRVAL_P(value) = (char *)buf;
                    174:        Z_STRLEN_P(value) = c;
                    175: }
                    176: /* }}} */
                    177: 
                    178: /* {{{ php_filter_string */
                    179: void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
                    180: {
                    181:        size_t new_len;
                    182:        unsigned char enc[256] = {0};
                    183: 
                    184:        /* strip high/strip low ( see flags )*/
                    185:        php_filter_strip(value, flags);
                    186: 
                    187:        if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
                    188:                enc['\''] = enc['"'] = 1;
                    189:        }
                    190:        if (flags & FILTER_FLAG_ENCODE_AMP) {
                    191:                enc['&'] = 1;
                    192:        }
                    193:        if (flags & FILTER_FLAG_ENCODE_LOW) {
                    194:                memset(enc, 1, 32);
                    195:        }
                    196:        if (flags & FILTER_FLAG_ENCODE_HIGH) {
                    197:                memset(enc + 127, 1, sizeof(enc) - 127);
                    198:        }
                    199: 
                    200:        php_filter_encode_html(value, enc);
                    201: 
                    202:        /* strip tags, implicitly also removes \0 chars */
                    203:        new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
                    204:        Z_STRLEN_P(value) = new_len;
                    205: 
                    206:        if (new_len == 0) {
                    207:                zval_dtor(value);
                    208:                if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
                    209:                        ZVAL_NULL(value);
                    210:                } else {
                    211:                        ZVAL_EMPTY_STRING(value);                       
                    212:                }
                    213:                return;
                    214:        }
                    215: }
                    216: /* }}} */
                    217: 
                    218: /* {{{ php_filter_encoded */
                    219: void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
                    220: {
                    221:        /* apply strip_high and strip_low filters */
                    222:        php_filter_strip(value, flags);
                    223:        /* urlencode */
                    224:        php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
                    225: }
                    226: /* }}} */
                    227: 
                    228: /* {{{ php_filter_special_chars */
                    229: void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
                    230: {
                    231:        unsigned char enc[256] = {0};
                    232: 
                    233:        php_filter_strip(value, flags);
                    234: 
                    235:        /* encodes ' " < > & \0 to numerical entities */
                    236:        enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
                    237: 
                    238:        /* if strip low is not set, then we encode them as &#xx; */
                    239:        memset(enc, 1, 32);
                    240: 
                    241:        if (flags & FILTER_FLAG_ENCODE_HIGH) {
                    242:                memset(enc + 127, 1, sizeof(enc) - 127);
                    243:        }
                    244:        
                    245:        php_filter_encode_html(value, enc);     
                    246: }
                    247: /* }}} */
                    248: 
                    249: /* {{{ php_filter_full_special_chars */
                    250: void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
                    251: {
                    252:        char *buf;
1.1.1.2 ! misho     253:        size_t len;
        !           254:        int quotes;
1.1       misho     255:        
                    256:        if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
                    257:                quotes = ENT_QUOTES;
                    258:        } else {
                    259:                quotes = ENT_NOQUOTES;
                    260:        }
                    261:        buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC);
1.1.1.2 ! misho     262:        str_efree(Z_STRVAL_P(value));
1.1       misho     263:        Z_STRVAL_P(value) = buf;
                    264:        Z_STRLEN_P(value) = len;
                    265: }
                    266: /* }}} */
                    267: 
                    268: /* {{{ php_filter_unsafe_raw */
                    269: void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
                    270: {
                    271:        /* Only if no flags are set (optimization) */
                    272:        if (flags != 0 && Z_STRLEN_P(value) > 0) {
                    273:                unsigned char enc[256] = {0};
                    274: 
                    275:                php_filter_strip(value, flags);
                    276: 
                    277:                if (flags & FILTER_FLAG_ENCODE_AMP) {
                    278:                        enc['&'] = 1;
                    279:                }
                    280:                if (flags & FILTER_FLAG_ENCODE_LOW) {
                    281:                        memset(enc, 1, 32);
                    282:                }
                    283:                if (flags & FILTER_FLAG_ENCODE_HIGH) {
                    284:                        memset(enc + 127, 1, sizeof(enc) - 127);
                    285:                }
                    286: 
                    287:                php_filter_encode_html(value, enc);     
                    288:        } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
                    289:                zval_dtor(value);
                    290:                ZVAL_NULL(value);
                    291:        }
                    292: }
                    293: /* }}} */
                    294: 
                    295: 
                    296: 
                    297: /* {{{ php_filter_email */
                    298: #define SAFE        "$-_.+"
                    299: #define EXTRA       "!*'(),"
                    300: #define NATIONAL    "{}|\\^~[]`"
                    301: #define PUNCTUATION "<>#%\""
                    302: #define RESERVED    ";/?:@&="
                    303: 
                    304: void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
                    305: {
                    306:        /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
                    307:        const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
                    308:        filter_map     map;
                    309: 
                    310:        filter_map_init(&map);
                    311:        filter_map_update(&map, 1, allowed_list);
                    312:        filter_map_apply(value, &map);
                    313: }
                    314: /* }}} */
                    315: 
                    316: /* {{{ php_filter_url */
                    317: void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
                    318: {
                    319:        /* Strip all chars not part of section 5 of
                    320:         * http://www.faqs.org/rfcs/rfc1738.html */
                    321:        const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
                    322:        filter_map     map;
                    323: 
                    324:        filter_map_init(&map);
                    325:        filter_map_update(&map, 1, allowed_list);
                    326:        filter_map_apply(value, &map);
                    327: }
                    328: /* }}} */
                    329: 
                    330: /* {{{ php_filter_number_int */
                    331: void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
                    332: {
                    333:        /* strip everything [^0-9+-] */
                    334:        const unsigned char allowed_list[] = "+-" DIGIT;
                    335:        filter_map     map;
                    336: 
                    337:        filter_map_init(&map);
                    338:        filter_map_update(&map, 1, allowed_list);
                    339:        filter_map_apply(value, &map);
                    340: }
                    341: /* }}} */
                    342: 
                    343: /* {{{ php_filter_number_float */
                    344: void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
                    345: {
                    346:        /* strip everything [^0-9+-] */
                    347:        const unsigned char allowed_list[] = "+-" DIGIT;
                    348:        filter_map     map;
                    349: 
                    350:        filter_map_init(&map);
                    351:        filter_map_update(&map, 1, allowed_list);
                    352: 
                    353:        /* depending on flags, strip '.', 'e', ",", "'" */
                    354:        if (flags & FILTER_FLAG_ALLOW_FRACTION) {
                    355:                filter_map_update(&map, 2, (const unsigned char *) ".");
                    356:        }
                    357:        if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
                    358:                filter_map_update(&map, 3,  (const unsigned char *) ",");
                    359:        }
                    360:        if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
                    361:                filter_map_update(&map, 4,  (const unsigned char *) "eE");
                    362:        }
                    363:        filter_map_apply(value, &map);
                    364: }
                    365: /* }}} */
                    366: 
                    367: /* {{{ php_filter_magic_quotes */
                    368: void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
                    369: {
                    370:        char *buf;
                    371:        int   len;
                    372:        
                    373:        /* just call php_addslashes quotes */
                    374:        buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC);
                    375: 
1.1.1.2 ! misho     376:        str_efree(Z_STRVAL_P(value));
1.1       misho     377:        Z_STRVAL_P(value) = buf;
                    378:        Z_STRLEN_P(value) = len;
                    379: }
                    380: /* }}} */
                    381: 
                    382: /*
                    383:  * Local variables:
                    384:  * tab-width: 4
                    385:  * c-basic-offset: 4
                    386:  * End:
                    387:  * vim600: noet sw=4 ts=4 fdm=marker
                    388:  * vim<600: noet sw=4 ts=4
                    389:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>