File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / standard / cyr_convert.c
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 20:03:57 2014 UTC (10 years, 1 month ago) by misho
Branches: php, MAIN
CVS tags: v5_4_29, HEAD
php 5.4.29

    1: /*
    2:    +----------------------------------------------------------------------+
    3:    | PHP Version 5                                                        |
    4:    +----------------------------------------------------------------------+
    5:    | Copyright (c) 1997-2014 The PHP Group                                |
    6:    +----------------------------------------------------------------------+
    7:    | This source file is subject to version 3.01 of the PHP license,      |
    8:    | that is bundled with this package in the file LICENSE, and is        |
    9:    | available through the world-wide-web at the following url:           |
   10:    | http://www.php.net/license/3_01.txt                                  |
   11:    | If you did not receive a copy of the PHP license and are unable to   |
   12:    | obtain it through the world-wide-web, please send a note to          |
   13:    | license@php.net so we can mail you a copy immediately.               |
   14:    +----------------------------------------------------------------------+
   15:    | Author: Kirill Maximov <kir@rus.net>                                 |
   16:    +----------------------------------------------------------------------+
   17:  */
   18: 
   19: /* $Id: cyr_convert.c,v 1.1.1.4 2014/06/15 20:03:57 misho Exp $ */
   20: 
   21: #include <stdlib.h>
   22: 
   23: #ifdef HAVE_UNISTD_H
   24: #include <unistd.h>
   25: #endif
   26: #include <string.h>
   27: #include <errno.h>
   28: 
   29: #include "php.h"
   30: #include "cyr_convert.h"
   31: 
   32: #include <stdio.h>
   33: 
   34: /*****************************************************************************
   35: * This is codetables for different Cyrillic charsets (relative to koi8-r). 
   36: * Each table contains data for 128-255 symbols from ASCII table.
   37: * First 256 symbols are for conversion from koi8-r to corresponding charset,
   38: * second 256 symbols are for reverse conversion, from charset to koi8-r.
   39: *
   40: * Here we have the following tables:
   41: * _cyr_win1251   - for windows-1251 charset
   42: * _cyr_iso88595  - for iso8859-5 charset
   43: * _cyr_cp866     - for x-cp866 charset
   44: * _cyr_mac       - for x-mac-cyrillic charset
   45: *
   46: *****************************************************************************/
   47: 
   48: typedef unsigned char _cyr_charset_table[512];
   49: 
   50: /* {{{ static const _cyr_charset_table _cyr_win1251
   51:  */
   52: static const _cyr_charset_table _cyr_win1251 = {
   53: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
   54: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
   55: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
   56: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
   57: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
   58: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
   59: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
   60: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
   61: 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
   62: 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
   63: 154,174,190,46,159,189,46,46,179,191,180,157,46,46,156,183,
   64: 46,46,182,166,173,46,46,158,163,152,164,155,46,46,46,167,
   65: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
   66: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
   67: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
   68: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
   69: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
   70: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
   71: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
   72: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
   73: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
   74: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
   75: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
   76: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
   77: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
   78: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
   79: 32,32,32,184,186,32,179,191,32,32,32,32,32,180,162,32,
   80: 32,32,32,168,170,32,178,175,32,32,32,32,32,165,161,169,
   81: 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
   82: 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
   83: 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,
   84: 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218,
   85: },
   86: _cyr_cp866 = { 
   87: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
   88: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
   89: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
   90: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
   91: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
   92: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
   93: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
   94: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
   95: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
   96: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
   97: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
   98: 35,35,35,124,124,124,124,43,43,124,124,43,43,43,43,43,
   99: 43,45,45,124,45,43,124,124,43,43,45,45,124,45,43,45,
  100: 45,45,45,43,43,43,43,43,43,43,43,35,35,124,124,35,
  101: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
  102: 179,163,180,164,183,167,190,174,32,149,158,32,152,159,148,154,
  103: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  104: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  105: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  106: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  107: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  108: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  109: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  110: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  111: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  112: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  113: 205,186,213,241,243,201,32,245,187,212,211,200,190,32,247,198,
  114: 199,204,181,240,242,185,32,244,203,207,208,202,216,32,246,32,
  115: 238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174,
  116: 175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234,
  117: 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
  118: 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
  119: },
  120: _cyr_iso88595 = {
  121: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  122: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  123: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  124: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  125: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  126: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  127: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  128: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  129: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  130: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  131: 32,179,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  132: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
  133: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
  134: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
  135: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
  136: 32,163,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  137: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  138: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  139: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  140: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  141: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  142: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  143: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  144: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  145: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  146: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  147: 32,32,32,241,32,32,32,32,32,32,32,32,32,32,32,32,
  148: 32,32,32,161,32,32,32,32,32,32,32,32,32,32,32,32,
  149: 238,208,209,230,212,213,228,211,229,216,217,218,219,220,221,222,
  150: 223,239,224,225,226,227,214,210,236,235,215,232,237,233,231,234,
  151: 206,176,177,198,180,181,196,179,197,184,185,186,187,188,189,190,
  152: 191,207,192,193,194,195,182,178,204,203,183,200,205,201,199,202,
  153: },
  154: _cyr_mac = {
  155: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  156: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  157: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  158: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  159: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  160: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  161: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  162: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  163: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
  164: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
  165: 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  166: 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  167: 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  168: 144,145,146,147,148,149,150,151,152,153,154,155,156,179,163,209,
  169: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
  170: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,255,
  171: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  172: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  173: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  174: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  175: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  176: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  177: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  178: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  179: 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  180: 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
  181: 160,161,162,222,164,165,166,167,168,169,170,171,172,173,174,175,
  182: 176,177,178,221,180,181,182,183,184,185,186,187,188,189,190,191,
  183: 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
  184: 239,223,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
  185: 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
  186: 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
  187: };
  188: /* }}} */
  189: 
  190: /* {{{ static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC)
  191: * This is the function that performs real in-place conversion of the string 
  192: * between charsets. 
  193: * Parameters:
  194: *    str - string to be converted
  195: *    from,to - one-symbol label of source and destination charset
  196: * The following symbols are used as labels:
  197: *    k - koi8-r
  198: *    w - windows-1251
  199: *    i - iso8859-5
  200: *    a - x-cp866
  201: *    d - x-cp866
  202: *    m - x-mac-cyrillic
  203: *****************************************************************************/
  204: static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC)
  205: {
  206: 	const unsigned char *from_table, *to_table;
  207: 	unsigned char tmp;
  208: 	int i;
  209: 
  210: 	from_table = NULL;
  211: 	to_table   = NULL;
  212: 	
  213: 	switch (toupper((int)(unsigned char)from))
  214: 	{
  215: 		case 'W':
  216: 			from_table = _cyr_win1251;
  217: 			break;
  218: 		case 'A':
  219: 		case 'D':
  220: 			from_table = _cyr_cp866;
  221: 			break;
  222: 		case 'I':
  223: 			from_table = _cyr_iso88595;
  224: 			break;
  225: 		case 'M':
  226: 			from_table = _cyr_mac;
  227: 			break;
  228: 		case 'K':
  229: 			break;
  230: 		default:
  231: 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown source charset: %c", from);
  232: 			break;
  233: 	}
  234: 
  235: 	switch (toupper((int)(unsigned char)to))
  236: 	{
  237: 		case 'W':
  238: 			to_table = _cyr_win1251;
  239: 			break;
  240: 		case 'A':
  241: 		case 'D':
  242: 			to_table = _cyr_cp866;
  243: 			break;
  244: 		case 'I':
  245: 			to_table = _cyr_iso88595;
  246: 			break;
  247: 		case 'M':
  248: 			to_table = _cyr_mac;
  249: 			break;
  250: 		case 'K':
  251: 			break;
  252: 		default:
  253: 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown destination charset: %c", to);
  254: 			break;
  255: 	}
  256: 
  257: 
  258: 	if (!str)
  259: 		return (char *)str;
  260: 	
  261: 	for( i = 0; i<length; i++)
  262: 	{
  263: 		tmp = (from_table == NULL)? str[i] : from_table[ str[i] ];
  264: 		str[i] = (to_table == NULL) ? tmp : to_table[tmp + 256];
  265: 	}
  266: 	return (char *)str;
  267: }
  268: /* }}} */
  269: 
  270: /* {{{ proto string convert_cyr_string(string str, string from, string to)
  271:    Convert from one Cyrillic character set to another */
  272: PHP_FUNCTION(convert_cyr_string)
  273: {
  274: 	char *input, *fr_cs, *to_cs;
  275: 	int input_len, fr_cs_len, to_cs_len;
  276: 	unsigned char *str;
  277: 
  278: 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss", &input, &input_len, &fr_cs, &fr_cs_len, &to_cs, &to_cs_len) == FAILURE) {
  279: 		return;
  280: 	}
  281: 
  282: 	str = (unsigned char*) estrndup(input, input_len);
  283: 
  284: 	php_convert_cyr_string(str, input_len, fr_cs[0], to_cs[0] TSRMLS_CC);
  285: 	RETVAL_STRING((char *)str, 0);
  286: }
  287: /* }}} */
  288: 
  289: /*
  290:  * Local variables:
  291:  * tab-width: 4
  292:  * c-basic-offset: 4
  293:  * End:
  294:  * vim600: sw=4 ts=4 fdm=marker
  295:  * vim<600: sw=4 ts=4
  296:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>