Annotation of embedaddon/php/ext/standard/cyr_convert.c, revision 1.1.1.2

1.1       misho       1: /*
                      2:    +----------------------------------------------------------------------+
                      3:    | PHP Version 5                                                        |
                      4:    +----------------------------------------------------------------------+
                      5:    | Copyright (c) 1997-2012 The PHP Group                                |
                      6:    +----------------------------------------------------------------------+
                      7:    | This source file is subject to version 3.01 of the PHP license,      |
                      8:    | that is bundled with this package in the file LICENSE, and is        |
                      9:    | available through the world-wide-web at the following url:           |
                     10:    | http://www.php.net/license/3_01.txt                                  |
                     11:    | If you did not receive a copy of the PHP license and are unable to   |
                     12:    | obtain it through the world-wide-web, please send a note to          |
                     13:    | license@php.net so we can mail you a copy immediately.               |
                     14:    +----------------------------------------------------------------------+
                     15:    | Author: Kirill Maximov <kir@rus.net>                                 |
                     16:    +----------------------------------------------------------------------+
                     17:  */
                     18: 
1.1.1.2 ! misho      19: /* $Id$ */
1.1       misho      20: 
                     21: #include <stdlib.h>
                     22: 
                     23: #ifdef HAVE_UNISTD_H
                     24: #include <unistd.h>
                     25: #endif
                     26: #include <string.h>
                     27: #include <errno.h>
                     28: 
                     29: #include "php.h"
                     30: #include "cyr_convert.h"
                     31: 
                     32: #include <stdio.h>
                     33: 
                     34: /*****************************************************************************
                     35: * This is codetables for different Cyrillic charsets (relative to koi8-r). 
                     36: * Each table contains data for 128-255 symbols from ASCII table.
                     37: * First 256 symbols are for conversion from koi8-r to corresponding charset,
                     38: * second 256 symbols are for reverse conversion, from charset to koi8-r.
                     39: *
                     40: * Here we have the following tables:
                     41: * _cyr_win1251   - for windows-1251 charset
                     42: * _cyr_iso88595  - for iso8859-5 charset
                     43: * _cyr_cp866     - for x-cp866 charset
                     44: * _cyr_mac       - for x-mac-cyrillic charset
                     45: *
                     46: *****************************************************************************/
                     47: 
                     48: typedef unsigned char _cyr_charset_table[512];
                     49: 
                     50: /* {{{ static const _cyr_charset_table _cyr_win1251
                     51:  */
                     52: static const _cyr_charset_table _cyr_win1251 = {
                     53: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
                     54: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
                     55: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
                     56: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
                     57: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
                     58: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
                     59: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
                     60: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
                     61: 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
                     62: 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
                     63: 154,174,190,46,159,189,46,46,179,191,180,157,46,46,156,183,
                     64: 46,46,182,166,173,46,46,158,163,152,164,155,46,46,46,167,
                     65: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
                     66: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
                     67: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
                     68: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
                     69: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
                     70: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
                     71: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
                     72: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
                     73: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
                     74: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
                     75: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
                     76: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
                     77: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
                     78: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
                     79: 32,32,32,184,186,32,179,191,32,32,32,32,32,180,162,32,
                     80: 32,32,32,168,170,32,178,175,32,32,32,32,32,165,161,169,
                     81: 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
                     82: 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
                     83: 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,
                     84: 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218,
                     85: },
                     86: _cyr_cp866 = { 
                     87: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
                     88: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
                     89: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
                     90: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
                     91: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
                     92: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
                     93: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
                     94: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
                     95: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
                     96: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
                     97: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
                     98: 35,35,35,124,124,124,124,43,43,124,124,43,43,43,43,43,
                     99: 43,45,45,124,45,43,124,124,43,43,45,45,124,45,43,45,
                    100: 45,45,45,43,43,43,43,43,43,43,43,35,35,124,124,35,
                    101: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
                    102: 179,163,180,164,183,167,190,174,32,149,158,32,152,159,148,154,
                    103: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
                    104: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
                    105: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
                    106: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
                    107: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
                    108: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
                    109: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
                    110: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
                    111: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
                    112: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
                    113: 205,186,213,241,243,201,32,245,187,212,211,200,190,32,247,198,
                    114: 199,204,181,240,242,185,32,244,203,207,208,202,216,32,246,32,
                    115: 238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174,
                    116: 175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234,
                    117: 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
                    118: 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
                    119: },
                    120: _cyr_iso88595 = {
                    121: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
                    122: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
                    123: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
                    124: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
                    125: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
                    126: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
                    127: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
                    128: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
                    129: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
                    130: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
                    131: 32,179,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
                    132: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
                    133: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
                    134: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
                    135: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
                    136: 32,163,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
                    137: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
                    138: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
                    139: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
                    140: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
                    141: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
                    142: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
                    143: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
                    144: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
                    145: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
                    146: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
                    147: 32,32,32,241,32,32,32,32,32,32,32,32,32,32,32,32,
                    148: 32,32,32,161,32,32,32,32,32,32,32,32,32,32,32,32,
                    149: 238,208,209,230,212,213,228,211,229,216,217,218,219,220,221,222,
                    150: 223,239,224,225,226,227,214,210,236,235,215,232,237,233,231,234,
                    151: 206,176,177,198,180,181,196,179,197,184,185,186,187,188,189,190,
                    152: 191,207,192,193,194,195,182,178,204,203,183,200,205,201,199,202,
                    153: },
                    154: _cyr_mac = {
                    155: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
                    156: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
                    157: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
                    158: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
                    159: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
                    160: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
                    161: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
                    162: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
                    163: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
                    164: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
                    165: 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
                    166: 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
                    167: 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
                    168: 144,145,146,147,148,149,150,151,152,153,154,155,156,179,163,209,
                    169: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
                    170: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,255,
                    171: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
                    172: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
                    173: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
                    174: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
                    175: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
                    176: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
                    177: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
                    178: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
                    179: 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
                    180: 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
                    181: 160,161,162,222,164,165,166,167,168,169,170,171,172,173,174,175,
                    182: 176,177,178,221,180,181,182,183,184,185,186,187,188,189,190,191,
                    183: 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
                    184: 239,223,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
                    185: 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
                    186: 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
                    187: };
                    188: /* }}} */
                    189: 
                    190: /* {{{ static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC)
                    191: * This is the function that performs real in-place conversion of the string 
                    192: * between charsets. 
                    193: * Parameters:
                    194: *    str - string to be converted
                    195: *    from,to - one-symbol label of source and destination charset
                    196: * The following symbols are used as labels:
                    197: *    k - koi8-r
                    198: *    w - windows-1251
                    199: *    i - iso8859-5
                    200: *    a - x-cp866
                    201: *    d - x-cp866
                    202: *    m - x-mac-cyrillic
                    203: *****************************************************************************/
                    204: static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC)
                    205: {
                    206:        const unsigned char *from_table, *to_table;
                    207:        unsigned char tmp;
                    208:        int i;
                    209: 
                    210:        from_table = NULL;
                    211:        to_table   = NULL;
                    212:        
                    213:        switch (toupper((int)(unsigned char)from))
                    214:        {
                    215:                case 'W':
                    216:                        from_table = _cyr_win1251;
                    217:                        break;
                    218:                case 'A':
                    219:                case 'D':
                    220:                        from_table = _cyr_cp866;
                    221:                        break;
                    222:                case 'I':
                    223:                        from_table = _cyr_iso88595;
                    224:                        break;
                    225:                case 'M':
                    226:                        from_table = _cyr_mac;
                    227:                        break;
                    228:                case 'K':
                    229:                        break;
                    230:                default:
                    231:                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown source charset: %c", from);
                    232:                        break;
                    233:        }
                    234: 
                    235:        switch (toupper((int)(unsigned char)to))
                    236:        {
                    237:                case 'W':
                    238:                        to_table = _cyr_win1251;
                    239:                        break;
                    240:                case 'A':
                    241:                case 'D':
                    242:                        to_table = _cyr_cp866;
                    243:                        break;
                    244:                case 'I':
                    245:                        to_table = _cyr_iso88595;
                    246:                        break;
                    247:                case 'M':
                    248:                        to_table = _cyr_mac;
                    249:                        break;
                    250:                case 'K':
                    251:                        break;
                    252:                default:
                    253:                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown destination charset: %c", to);
                    254:                        break;
                    255:        }
                    256: 
                    257: 
                    258:        if (!str)
                    259:                return (char *)str;
                    260:        
                    261:        for( i = 0; i<length; i++)
                    262:        {
                    263:                tmp = (from_table == NULL)? str[i] : from_table[ str[i] ];
                    264:                str[i] = (to_table == NULL) ? tmp : to_table[tmp + 256];
                    265:        }
                    266:        return (char *)str;
                    267: }
                    268: /* }}} */
                    269: 
                    270: /* {{{ proto string convert_cyr_string(string str, string from, string to)
                    271:    Convert from one Cyrillic character set to another */
                    272: PHP_FUNCTION(convert_cyr_string)
                    273: {
                    274:        char *input, *fr_cs, *to_cs;
                    275:        int input_len, fr_cs_len, to_cs_len;
                    276:        unsigned char *str;
                    277: 
                    278:        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss", &input, &input_len, &fr_cs, &fr_cs_len, &to_cs, &to_cs_len) == FAILURE) {
                    279:                return;
                    280:        }
                    281: 
                    282:        str = (unsigned char*) estrndup(input, input_len);
                    283: 
                    284:        php_convert_cyr_string(str, input_len, fr_cs[0], to_cs[0] TSRMLS_CC);
1.1.1.2 ! misho     285:        RETVAL_STRING((char *)str, 0);
1.1       misho     286: }
                    287: /* }}} */
                    288: 
                    289: /*
                    290:  * Local variables:
                    291:  * tab-width: 4
                    292:  * c-basic-offset: 4
                    293:  * End:
                    294:  * vim600: sw=4 ts=4 fdm=marker
                    295:  * vim<600: sw=4 ts=4
                    296:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>