Annotation of embedaddon/php/ext/standard/cyr_convert.c, revision 1.1

1.1     ! misho       1: /*
        !             2:    +----------------------------------------------------------------------+
        !             3:    | PHP Version 5                                                        |
        !             4:    +----------------------------------------------------------------------+
        !             5:    | Copyright (c) 1997-2012 The PHP Group                                |
        !             6:    +----------------------------------------------------------------------+
        !             7:    | This source file is subject to version 3.01 of the PHP license,      |
        !             8:    | that is bundled with this package in the file LICENSE, and is        |
        !             9:    | available through the world-wide-web at the following url:           |
        !            10:    | http://www.php.net/license/3_01.txt                                  |
        !            11:    | If you did not receive a copy of the PHP license and are unable to   |
        !            12:    | obtain it through the world-wide-web, please send a note to          |
        !            13:    | license@php.net so we can mail you a copy immediately.               |
        !            14:    +----------------------------------------------------------------------+
        !            15:    | Author: Kirill Maximov <kir@rus.net>                                 |
        !            16:    +----------------------------------------------------------------------+
        !            17:  */
        !            18: 
        !            19: /* $Id: cyr_convert.c 321634 2012-01-01 13:15:04Z felipe $ */
        !            20: 
        !            21: #include <stdlib.h>
        !            22: 
        !            23: #ifdef HAVE_UNISTD_H
        !            24: #include <unistd.h>
        !            25: #endif
        !            26: #include <string.h>
        !            27: #include <errno.h>
        !            28: 
        !            29: #include "php.h"
        !            30: #include "cyr_convert.h"
        !            31: 
        !            32: #include <stdio.h>
        !            33: 
        !            34: /*****************************************************************************
        !            35: * This is codetables for different Cyrillic charsets (relative to koi8-r). 
        !            36: * Each table contains data for 128-255 symbols from ASCII table.
        !            37: * First 256 symbols are for conversion from koi8-r to corresponding charset,
        !            38: * second 256 symbols are for reverse conversion, from charset to koi8-r.
        !            39: *
        !            40: * Here we have the following tables:
        !            41: * _cyr_win1251   - for windows-1251 charset
        !            42: * _cyr_iso88595  - for iso8859-5 charset
        !            43: * _cyr_cp866     - for x-cp866 charset
        !            44: * _cyr_mac       - for x-mac-cyrillic charset
        !            45: *
        !            46: *****************************************************************************/
        !            47: 
        !            48: typedef unsigned char _cyr_charset_table[512];
        !            49: 
        !            50: /* {{{ static const _cyr_charset_table _cyr_win1251
        !            51:  */
        !            52: static const _cyr_charset_table _cyr_win1251 = {
        !            53: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
        !            54: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
        !            55: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
        !            56: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
        !            57: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
        !            58: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
        !            59: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
        !            60: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
        !            61: 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
        !            62: 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
        !            63: 154,174,190,46,159,189,46,46,179,191,180,157,46,46,156,183,
        !            64: 46,46,182,166,173,46,46,158,163,152,164,155,46,46,46,167,
        !            65: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
        !            66: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
        !            67: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
        !            68: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
        !            69: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
        !            70: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
        !            71: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
        !            72: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
        !            73: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
        !            74: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
        !            75: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
        !            76: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
        !            77: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
        !            78: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
        !            79: 32,32,32,184,186,32,179,191,32,32,32,32,32,180,162,32,
        !            80: 32,32,32,168,170,32,178,175,32,32,32,32,32,165,161,169,
        !            81: 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
        !            82: 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
        !            83: 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,
        !            84: 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218,
        !            85: },
        !            86: _cyr_cp866 = { 
        !            87: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
        !            88: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
        !            89: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
        !            90: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
        !            91: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
        !            92: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
        !            93: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
        !            94: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
        !            95: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
        !            96: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
        !            97: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
        !            98: 35,35,35,124,124,124,124,43,43,124,124,43,43,43,43,43,
        !            99: 43,45,45,124,45,43,124,124,43,43,45,45,124,45,43,45,
        !           100: 45,45,45,43,43,43,43,43,43,43,43,35,35,124,124,35,
        !           101: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
        !           102: 179,163,180,164,183,167,190,174,32,149,158,32,152,159,148,154,
        !           103: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
        !           104: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
        !           105: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
        !           106: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
        !           107: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
        !           108: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
        !           109: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
        !           110: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
        !           111: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
        !           112: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
        !           113: 205,186,213,241,243,201,32,245,187,212,211,200,190,32,247,198,
        !           114: 199,204,181,240,242,185,32,244,203,207,208,202,216,32,246,32,
        !           115: 238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174,
        !           116: 175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234,
        !           117: 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
        !           118: 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
        !           119: },
        !           120: _cyr_iso88595 = {
        !           121: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
        !           122: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
        !           123: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
        !           124: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
        !           125: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
        !           126: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
        !           127: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
        !           128: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
        !           129: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
        !           130: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
        !           131: 32,179,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
        !           132: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
        !           133: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
        !           134: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
        !           135: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
        !           136: 32,163,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
        !           137: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
        !           138: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
        !           139: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
        !           140: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
        !           141: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
        !           142: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
        !           143: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
        !           144: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
        !           145: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
        !           146: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
        !           147: 32,32,32,241,32,32,32,32,32,32,32,32,32,32,32,32,
        !           148: 32,32,32,161,32,32,32,32,32,32,32,32,32,32,32,32,
        !           149: 238,208,209,230,212,213,228,211,229,216,217,218,219,220,221,222,
        !           150: 223,239,224,225,226,227,214,210,236,235,215,232,237,233,231,234,
        !           151: 206,176,177,198,180,181,196,179,197,184,185,186,187,188,189,190,
        !           152: 191,207,192,193,194,195,182,178,204,203,183,200,205,201,199,202,
        !           153: },
        !           154: _cyr_mac = {
        !           155: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
        !           156: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
        !           157: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
        !           158: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
        !           159: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
        !           160: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
        !           161: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
        !           162: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
        !           163: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
        !           164: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
        !           165: 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
        !           166: 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
        !           167: 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
        !           168: 144,145,146,147,148,149,150,151,152,153,154,155,156,179,163,209,
        !           169: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
        !           170: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,255,
        !           171: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
        !           172: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
        !           173: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
        !           174: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
        !           175: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
        !           176: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
        !           177: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
        !           178: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
        !           179: 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
        !           180: 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
        !           181: 160,161,162,222,164,165,166,167,168,169,170,171,172,173,174,175,
        !           182: 176,177,178,221,180,181,182,183,184,185,186,187,188,189,190,191,
        !           183: 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
        !           184: 239,223,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
        !           185: 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
        !           186: 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
        !           187: };
        !           188: /* }}} */
        !           189: 
        !           190: /* {{{ static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC)
        !           191: * This is the function that performs real in-place conversion of the string 
        !           192: * between charsets. 
        !           193: * Parameters:
        !           194: *    str - string to be converted
        !           195: *    from,to - one-symbol label of source and destination charset
        !           196: * The following symbols are used as labels:
        !           197: *    k - koi8-r
        !           198: *    w - windows-1251
        !           199: *    i - iso8859-5
        !           200: *    a - x-cp866
        !           201: *    d - x-cp866
        !           202: *    m - x-mac-cyrillic
        !           203: *****************************************************************************/
        !           204: static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC)
        !           205: {
        !           206:        const unsigned char *from_table, *to_table;
        !           207:        unsigned char tmp;
        !           208:        int i;
        !           209: 
        !           210:        from_table = NULL;
        !           211:        to_table   = NULL;
        !           212:        
        !           213:        switch (toupper((int)(unsigned char)from))
        !           214:        {
        !           215:                case 'W':
        !           216:                        from_table = _cyr_win1251;
        !           217:                        break;
        !           218:                case 'A':
        !           219:                case 'D':
        !           220:                        from_table = _cyr_cp866;
        !           221:                        break;
        !           222:                case 'I':
        !           223:                        from_table = _cyr_iso88595;
        !           224:                        break;
        !           225:                case 'M':
        !           226:                        from_table = _cyr_mac;
        !           227:                        break;
        !           228:                case 'K':
        !           229:                        break;
        !           230:                default:
        !           231:                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown source charset: %c", from);
        !           232:                        break;
        !           233:        }
        !           234: 
        !           235:        switch (toupper((int)(unsigned char)to))
        !           236:        {
        !           237:                case 'W':
        !           238:                        to_table = _cyr_win1251;
        !           239:                        break;
        !           240:                case 'A':
        !           241:                case 'D':
        !           242:                        to_table = _cyr_cp866;
        !           243:                        break;
        !           244:                case 'I':
        !           245:                        to_table = _cyr_iso88595;
        !           246:                        break;
        !           247:                case 'M':
        !           248:                        to_table = _cyr_mac;
        !           249:                        break;
        !           250:                case 'K':
        !           251:                        break;
        !           252:                default:
        !           253:                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown destination charset: %c", to);
        !           254:                        break;
        !           255:        }
        !           256: 
        !           257: 
        !           258:        if (!str)
        !           259:                return (char *)str;
        !           260:        
        !           261:        for( i = 0; i<length; i++)
        !           262:        {
        !           263:                tmp = (from_table == NULL)? str[i] : from_table[ str[i] ];
        !           264:                str[i] = (to_table == NULL) ? tmp : to_table[tmp + 256];
        !           265:        }
        !           266:        return (char *)str;
        !           267: }
        !           268: /* }}} */
        !           269: 
        !           270: /* {{{ proto string convert_cyr_string(string str, string from, string to)
        !           271:    Convert from one Cyrillic character set to another */
        !           272: PHP_FUNCTION(convert_cyr_string)
        !           273: {
        !           274:        char *input, *fr_cs, *to_cs;
        !           275:        int input_len, fr_cs_len, to_cs_len;
        !           276:        unsigned char *str;
        !           277: 
        !           278:        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss", &input, &input_len, &fr_cs, &fr_cs_len, &to_cs, &to_cs_len) == FAILURE) {
        !           279:                return;
        !           280:        }
        !           281: 
        !           282:        str = (unsigned char*) estrndup(input, input_len);
        !           283: 
        !           284:        php_convert_cyr_string(str, input_len, fr_cs[0], to_cs[0] TSRMLS_CC);
        !           285:        RETVAL_STRING((char *)str, 0)
        !           286: }
        !           287: /* }}} */
        !           288: 
        !           289: /*
        !           290:  * Local variables:
        !           291:  * tab-width: 4
        !           292:  * c-basic-offset: 4
        !           293:  * End:
        !           294:  * vim600: sw=4 ts=4 fdm=marker
        !           295:  * vim<600: sw=4 ts=4
        !           296:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>