Annotation of embedaddon/php/ext/standard/cyr_convert.c, revision 1.1.1.2
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
5: | Copyright (c) 1997-2012 The PHP Group |
6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Author: Kirill Maximov <kir@rus.net> |
16: +----------------------------------------------------------------------+
17: */
18:
1.1.1.2 ! misho 19: /* $Id$ */
1.1 misho 20:
21: #include <stdlib.h>
22:
23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
26: #include <string.h>
27: #include <errno.h>
28:
29: #include "php.h"
30: #include "cyr_convert.h"
31:
32: #include <stdio.h>
33:
34: /*****************************************************************************
35: * This is codetables for different Cyrillic charsets (relative to koi8-r).
36: * Each table contains data for 128-255 symbols from ASCII table.
37: * First 256 symbols are for conversion from koi8-r to corresponding charset,
38: * second 256 symbols are for reverse conversion, from charset to koi8-r.
39: *
40: * Here we have the following tables:
41: * _cyr_win1251 - for windows-1251 charset
42: * _cyr_iso88595 - for iso8859-5 charset
43: * _cyr_cp866 - for x-cp866 charset
44: * _cyr_mac - for x-mac-cyrillic charset
45: *
46: *****************************************************************************/
47:
48: typedef unsigned char _cyr_charset_table[512];
49:
50: /* {{{ static const _cyr_charset_table _cyr_win1251
51: */
52: static const _cyr_charset_table _cyr_win1251 = {
53: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
54: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
55: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
56: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
57: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
58: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
59: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
60: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
61: 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
62: 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
63: 154,174,190,46,159,189,46,46,179,191,180,157,46,46,156,183,
64: 46,46,182,166,173,46,46,158,163,152,164,155,46,46,46,167,
65: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
66: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
67: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
68: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
69: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
70: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
71: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
72: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
73: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
74: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
75: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
76: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
77: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
78: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
79: 32,32,32,184,186,32,179,191,32,32,32,32,32,180,162,32,
80: 32,32,32,168,170,32,178,175,32,32,32,32,32,165,161,169,
81: 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
82: 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
83: 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,
84: 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218,
85: },
86: _cyr_cp866 = {
87: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
88: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
89: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
90: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
91: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
92: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
93: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
94: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
95: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
96: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
97: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
98: 35,35,35,124,124,124,124,43,43,124,124,43,43,43,43,43,
99: 43,45,45,124,45,43,124,124,43,43,45,45,124,45,43,45,
100: 45,45,45,43,43,43,43,43,43,43,43,35,35,124,124,35,
101: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
102: 179,163,180,164,183,167,190,174,32,149,158,32,152,159,148,154,
103: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
104: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
105: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
106: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
107: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
108: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
109: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
110: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
111: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
112: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
113: 205,186,213,241,243,201,32,245,187,212,211,200,190,32,247,198,
114: 199,204,181,240,242,185,32,244,203,207,208,202,216,32,246,32,
115: 238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174,
116: 175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234,
117: 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
118: 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
119: },
120: _cyr_iso88595 = {
121: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
122: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
123: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
124: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
125: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
126: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
127: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
128: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
129: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
130: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
131: 32,179,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
132: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
133: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
134: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
135: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
136: 32,163,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
137: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
138: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
139: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
140: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
141: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
142: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
143: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
144: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
145: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
146: 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
147: 32,32,32,241,32,32,32,32,32,32,32,32,32,32,32,32,
148: 32,32,32,161,32,32,32,32,32,32,32,32,32,32,32,32,
149: 238,208,209,230,212,213,228,211,229,216,217,218,219,220,221,222,
150: 223,239,224,225,226,227,214,210,236,235,215,232,237,233,231,234,
151: 206,176,177,198,180,181,196,179,197,184,185,186,187,188,189,190,
152: 191,207,192,193,194,195,182,178,204,203,183,200,205,201,199,202,
153: },
154: _cyr_mac = {
155: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
156: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
157: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
158: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
159: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
160: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
161: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
162: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
163: 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
164: 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
165: 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
166: 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
167: 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
168: 144,145,146,147,148,149,150,151,152,153,154,155,156,179,163,209,
169: 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
170: 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,255,
171: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
172: 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
173: 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
174: 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
175: 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
176: 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
177: 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
178: 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
179: 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
180: 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
181: 160,161,162,222,164,165,166,167,168,169,170,171,172,173,174,175,
182: 176,177,178,221,180,181,182,183,184,185,186,187,188,189,190,191,
183: 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
184: 239,223,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
185: 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
186: 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
187: };
188: /* }}} */
189:
190: /* {{{ static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC)
191: * This is the function that performs real in-place conversion of the string
192: * between charsets.
193: * Parameters:
194: * str - string to be converted
195: * from,to - one-symbol label of source and destination charset
196: * The following symbols are used as labels:
197: * k - koi8-r
198: * w - windows-1251
199: * i - iso8859-5
200: * a - x-cp866
201: * d - x-cp866
202: * m - x-mac-cyrillic
203: *****************************************************************************/
204: static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC)
205: {
206: const unsigned char *from_table, *to_table;
207: unsigned char tmp;
208: int i;
209:
210: from_table = NULL;
211: to_table = NULL;
212:
213: switch (toupper((int)(unsigned char)from))
214: {
215: case 'W':
216: from_table = _cyr_win1251;
217: break;
218: case 'A':
219: case 'D':
220: from_table = _cyr_cp866;
221: break;
222: case 'I':
223: from_table = _cyr_iso88595;
224: break;
225: case 'M':
226: from_table = _cyr_mac;
227: break;
228: case 'K':
229: break;
230: default:
231: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown source charset: %c", from);
232: break;
233: }
234:
235: switch (toupper((int)(unsigned char)to))
236: {
237: case 'W':
238: to_table = _cyr_win1251;
239: break;
240: case 'A':
241: case 'D':
242: to_table = _cyr_cp866;
243: break;
244: case 'I':
245: to_table = _cyr_iso88595;
246: break;
247: case 'M':
248: to_table = _cyr_mac;
249: break;
250: case 'K':
251: break;
252: default:
253: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown destination charset: %c", to);
254: break;
255: }
256:
257:
258: if (!str)
259: return (char *)str;
260:
261: for( i = 0; i<length; i++)
262: {
263: tmp = (from_table == NULL)? str[i] : from_table[ str[i] ];
264: str[i] = (to_table == NULL) ? tmp : to_table[tmp + 256];
265: }
266: return (char *)str;
267: }
268: /* }}} */
269:
270: /* {{{ proto string convert_cyr_string(string str, string from, string to)
271: Convert from one Cyrillic character set to another */
272: PHP_FUNCTION(convert_cyr_string)
273: {
274: char *input, *fr_cs, *to_cs;
275: int input_len, fr_cs_len, to_cs_len;
276: unsigned char *str;
277:
278: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss", &input, &input_len, &fr_cs, &fr_cs_len, &to_cs, &to_cs_len) == FAILURE) {
279: return;
280: }
281:
282: str = (unsigned char*) estrndup(input, input_len);
283:
284: php_convert_cyr_string(str, input_len, fr_cs[0], to_cs[0] TSRMLS_CC);
1.1.1.2 ! misho 285: RETVAL_STRING((char *)str, 0);
1.1 misho 286: }
287: /* }}} */
288:
289: /*
290: * Local variables:
291: * tab-width: 4
292: * c-basic-offset: 4
293: * End:
294: * vim600: sw=4 ts=4 fdm=marker
295: * vim<600: sw=4 ts=4
296: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>