Return to cyr_convert.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / standard |
1.1 ! misho 1: /* ! 2: +----------------------------------------------------------------------+ ! 3: | PHP Version 5 | ! 4: +----------------------------------------------------------------------+ ! 5: | Copyright (c) 1997-2012 The PHP Group | ! 6: +----------------------------------------------------------------------+ ! 7: | This source file is subject to version 3.01 of the PHP license, | ! 8: | that is bundled with this package in the file LICENSE, and is | ! 9: | available through the world-wide-web at the following url: | ! 10: | http://www.php.net/license/3_01.txt | ! 11: | If you did not receive a copy of the PHP license and are unable to | ! 12: | obtain it through the world-wide-web, please send a note to | ! 13: | license@php.net so we can mail you a copy immediately. | ! 14: +----------------------------------------------------------------------+ ! 15: | Author: Kirill Maximov <kir@rus.net> | ! 16: +----------------------------------------------------------------------+ ! 17: */ ! 18: ! 19: /* $Id: cyr_convert.c 321634 2012-01-01 13:15:04Z felipe $ */ ! 20: ! 21: #include <stdlib.h> ! 22: ! 23: #ifdef HAVE_UNISTD_H ! 24: #include <unistd.h> ! 25: #endif ! 26: #include <string.h> ! 27: #include <errno.h> ! 28: ! 29: #include "php.h" ! 30: #include "cyr_convert.h" ! 31: ! 32: #include <stdio.h> ! 33: ! 34: /***************************************************************************** ! 35: * This is codetables for different Cyrillic charsets (relative to koi8-r). ! 36: * Each table contains data for 128-255 symbols from ASCII table. ! 37: * First 256 symbols are for conversion from koi8-r to corresponding charset, ! 38: * second 256 symbols are for reverse conversion, from charset to koi8-r. ! 39: * ! 40: * Here we have the following tables: ! 41: * _cyr_win1251 - for windows-1251 charset ! 42: * _cyr_iso88595 - for iso8859-5 charset ! 43: * _cyr_cp866 - for x-cp866 charset ! 44: * _cyr_mac - for x-mac-cyrillic charset ! 45: * ! 46: *****************************************************************************/ ! 47: ! 48: typedef unsigned char _cyr_charset_table[512]; ! 49: ! 50: /* {{{ static const _cyr_charset_table _cyr_win1251 ! 51: */ ! 52: static const _cyr_charset_table _cyr_win1251 = { ! 53: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, ! 54: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, ! 55: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, ! 56: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, ! 57: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, ! 58: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, ! 59: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, ! 60: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, ! 61: 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46, ! 62: 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46, ! 63: 154,174,190,46,159,189,46,46,179,191,180,157,46,46,156,183, ! 64: 46,46,182,166,173,46,46,158,163,152,164,155,46,46,46,167, ! 65: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240, ! 66: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241, ! 67: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208, ! 68: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209, ! 69: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, ! 70: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, ! 71: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, ! 72: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, ! 73: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, ! 74: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, ! 75: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, ! 76: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, ! 77: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, ! 78: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, ! 79: 32,32,32,184,186,32,179,191,32,32,32,32,32,180,162,32, ! 80: 32,32,32,168,170,32,178,175,32,32,32,32,32,165,161,169, ! 81: 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238, ! 82: 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250, ! 83: 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206, ! 84: 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218, ! 85: }, ! 86: _cyr_cp866 = { ! 87: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, ! 88: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, ! 89: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, ! 90: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, ! 91: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, ! 92: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, ! 93: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, ! 94: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, ! 95: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240, ! 96: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241, ! 97: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208, ! 98: 35,35,35,124,124,124,124,43,43,124,124,43,43,43,43,43, ! 99: 43,45,45,124,45,43,124,124,43,43,45,45,124,45,43,45, ! 100: 45,45,45,43,43,43,43,43,43,43,43,35,35,124,124,35, ! 101: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209, ! 102: 179,163,180,164,183,167,190,174,32,149,158,32,152,159,148,154, ! 103: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, ! 104: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, ! 105: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, ! 106: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, ! 107: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, ! 108: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, ! 109: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, ! 110: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, ! 111: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, ! 112: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, ! 113: 205,186,213,241,243,201,32,245,187,212,211,200,190,32,247,198, ! 114: 199,204,181,240,242,185,32,244,203,207,208,202,216,32,246,32, ! 115: 238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174, ! 116: 175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234, ! 117: 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142, ! 118: 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154, ! 119: }, ! 120: _cyr_iso88595 = { ! 121: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, ! 122: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, ! 123: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, ! 124: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, ! 125: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, ! 126: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, ! 127: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, ! 128: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, ! 129: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, ! 130: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, ! 131: 32,179,32,32,32,32,32,32,32,32,32,32,32,32,32,32, ! 132: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240, ! 133: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241, ! 134: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208, ! 135: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209, ! 136: 32,163,32,32,32,32,32,32,32,32,32,32,32,32,32,32, ! 137: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, ! 138: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, ! 139: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, ! 140: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, ! 141: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, ! 142: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, ! 143: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, ! 144: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, ! 145: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, ! 146: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, ! 147: 32,32,32,241,32,32,32,32,32,32,32,32,32,32,32,32, ! 148: 32,32,32,161,32,32,32,32,32,32,32,32,32,32,32,32, ! 149: 238,208,209,230,212,213,228,211,229,216,217,218,219,220,221,222, ! 150: 223,239,224,225,226,227,214,210,236,235,215,232,237,233,231,234, ! 151: 206,176,177,198,180,181,196,179,197,184,185,186,187,188,189,190, ! 152: 191,207,192,193,194,195,182,178,204,203,183,200,205,201,199,202, ! 153: }, ! 154: _cyr_mac = { ! 155: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, ! 156: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, ! 157: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, ! 158: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, ! 159: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, ! 160: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, ! 161: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, ! 162: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, ! 163: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240, ! 164: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241, ! 165: 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, ! 166: 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, ! 167: 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, ! 168: 144,145,146,147,148,149,150,151,152,153,154,155,156,179,163,209, ! 169: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208, ! 170: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,255, ! 171: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, ! 172: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, ! 173: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, ! 174: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, ! 175: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, ! 176: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, ! 177: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, ! 178: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, ! 179: 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, ! 180: 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, ! 181: 160,161,162,222,164,165,166,167,168,169,170,171,172,173,174,175, ! 182: 176,177,178,221,180,181,182,183,184,185,186,187,188,189,190,191, ! 183: 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238, ! 184: 239,223,240,241,242,243,230,226,252,251,231,248,253,249,247,250, ! 185: 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142, ! 186: 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154, ! 187: }; ! 188: /* }}} */ ! 189: ! 190: /* {{{ static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC) ! 191: * This is the function that performs real in-place conversion of the string ! 192: * between charsets. ! 193: * Parameters: ! 194: * str - string to be converted ! 195: * from,to - one-symbol label of source and destination charset ! 196: * The following symbols are used as labels: ! 197: * k - koi8-r ! 198: * w - windows-1251 ! 199: * i - iso8859-5 ! 200: * a - x-cp866 ! 201: * d - x-cp866 ! 202: * m - x-mac-cyrillic ! 203: *****************************************************************************/ ! 204: static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC) ! 205: { ! 206: const unsigned char *from_table, *to_table; ! 207: unsigned char tmp; ! 208: int i; ! 209: ! 210: from_table = NULL; ! 211: to_table = NULL; ! 212: ! 213: switch (toupper((int)(unsigned char)from)) ! 214: { ! 215: case 'W': ! 216: from_table = _cyr_win1251; ! 217: break; ! 218: case 'A': ! 219: case 'D': ! 220: from_table = _cyr_cp866; ! 221: break; ! 222: case 'I': ! 223: from_table = _cyr_iso88595; ! 224: break; ! 225: case 'M': ! 226: from_table = _cyr_mac; ! 227: break; ! 228: case 'K': ! 229: break; ! 230: default: ! 231: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown source charset: %c", from); ! 232: break; ! 233: } ! 234: ! 235: switch (toupper((int)(unsigned char)to)) ! 236: { ! 237: case 'W': ! 238: to_table = _cyr_win1251; ! 239: break; ! 240: case 'A': ! 241: case 'D': ! 242: to_table = _cyr_cp866; ! 243: break; ! 244: case 'I': ! 245: to_table = _cyr_iso88595; ! 246: break; ! 247: case 'M': ! 248: to_table = _cyr_mac; ! 249: break; ! 250: case 'K': ! 251: break; ! 252: default: ! 253: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown destination charset: %c", to); ! 254: break; ! 255: } ! 256: ! 257: ! 258: if (!str) ! 259: return (char *)str; ! 260: ! 261: for( i = 0; i<length; i++) ! 262: { ! 263: tmp = (from_table == NULL)? str[i] : from_table[ str[i] ]; ! 264: str[i] = (to_table == NULL) ? tmp : to_table[tmp + 256]; ! 265: } ! 266: return (char *)str; ! 267: } ! 268: /* }}} */ ! 269: ! 270: /* {{{ proto string convert_cyr_string(string str, string from, string to) ! 271: Convert from one Cyrillic character set to another */ ! 272: PHP_FUNCTION(convert_cyr_string) ! 273: { ! 274: char *input, *fr_cs, *to_cs; ! 275: int input_len, fr_cs_len, to_cs_len; ! 276: unsigned char *str; ! 277: ! 278: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss", &input, &input_len, &fr_cs, &fr_cs_len, &to_cs, &to_cs_len) == FAILURE) { ! 279: return; ! 280: } ! 281: ! 282: str = (unsigned char*) estrndup(input, input_len); ! 283: ! 284: php_convert_cyr_string(str, input_len, fr_cs[0], to_cs[0] TSRMLS_CC); ! 285: RETVAL_STRING((char *)str, 0) ! 286: } ! 287: /* }}} */ ! 288: ! 289: /* ! 290: * Local variables: ! 291: * tab-width: 4 ! 292: * c-basic-offset: 4 ! 293: * End: ! 294: * vim600: sw=4 ts=4 fdm=marker ! 295: * vim<600: sw=4 ts=4 ! 296: */