Annotation of embedaddon/php/ext/mysqlnd/mysqlnd_charset.c, revision 1.1.1.1
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
5: | Copyright (c) 2006-2012 The PHP Group |
6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Authors: Georg Richter <georg@mysql.com> |
16: | Andrey Hristov <andrey@mysql.com> |
17: | Ulf Wendel <uwendel@mysql.com> |
18: +----------------------------------------------------------------------+
19: */
20: #include "php.h"
21: #include "php_globals.h"
22: #include "mysqlnd.h"
23: #include "mysqlnd_priv.h"
24: #include "mysqlnd_debug.h"
25:
26: /* {{{ utf8 functions */
27: static unsigned int check_mb_utf8mb3_sequence(const char *start, const char *end)
28: {
29: zend_uchar c;
30:
31: if (start >= end) {
32: return 0;
33: }
34:
35: c = (zend_uchar) start[0];
36:
37: if (c < 0x80) {
38: return 1; /* single byte character */
39: }
40: if (c < 0xC2) {
41: return 0; /* invalid mb character */
42: }
43: if (c < 0xE0) {
44: if (start + 2 > end) {
45: return 0; /* too small */
46: }
47: if (!(((zend_uchar)start[1] ^ 0x80) < 0x40)) {
48: return 0;
49: }
50: return 2;
51: }
52: if (c < 0xF0) {
53: if (start + 3 > end) {
54: return 0; /* too small */
55: }
56: if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 && ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
57: (c >= 0xE1 || (zend_uchar)start[1] >= 0xA0))) {
58: return 0; /* invalid utf8 character */
59: }
60: return 3;
61: }
62: return 0;
63: }
64:
65:
66: static unsigned int check_mb_utf8_sequence(const char *start, const char *end)
67: {
68: zend_uchar c;
69:
70: if (start >= end) {
71: return 0;
72: }
73:
74: c = (zend_uchar) start[0];
75:
76: if (c < 0x80) {
77: return 1; /* single byte character */
78: }
79: if (c < 0xC2) {
80: return 0; /* invalid mb character */
81: }
82: if (c < 0xE0) {
83: if (start + 2 > end) {
84: return 0; /* too small */
85: }
86: if (!(((zend_uchar)start[1] ^ 0x80) < 0x40)) {
87: return 0;
88: }
89: return 2;
90: }
91: if (c < 0xF0) {
92: if (start + 3 > end) {
93: return 0; /* too small */
94: }
95: if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 && ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
96: (c >= 0xE1 || (zend_uchar)start[1] >= 0xA0))) {
97: return 0; /* invalid utf8 character */
98: }
99: return 3;
100: }
101: if (c < 0xF5) {
102: if (start + 4 > end) { /* We need 4 characters */
103: return 0; /* too small */
104: }
105:
106: /*
107: UTF-8 quick four-byte mask:
108: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
109: Encoding allows to encode U+00010000..U+001FFFFF
110:
111: The maximum character defined in the Unicode standard is U+0010FFFF.
112: Higher characters U+00110000..U+001FFFFF are not used.
113:
114: 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
115: 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
116:
117: Valid codes:
118: [F0][90..BF][80..BF][80..BF]
119: [F1][80..BF][80..BF][80..BF]
120: [F2][80..BF][80..BF][80..BF]
121: [F3][80..BF][80..BF][80..BF]
122: [F4][80..8F][80..BF][80..BF]
123: */
124:
125: if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 &&
126: ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
127: ((zend_uchar)start[3] ^ 0x80) < 0x40 &&
128: (c >= 0xf1 || (zend_uchar)start[1] >= 0x90) &&
129: (c <= 0xf3 || (zend_uchar)start[1] <= 0x8F)))
130: {
131: return 0; /* invalid utf8 character */
132: }
133: return 4;
134: }
135: return 0;
136: }
137:
138: static unsigned int check_mb_utf8mb3_valid(const char *start, const char *end)
139: {
140: unsigned int len = check_mb_utf8mb3_sequence(start, end);
141: return (len > 1)? len:0;
142: }
143:
144: static unsigned int check_mb_utf8_valid(const char *start, const char *end)
145: {
146: unsigned int len = check_mb_utf8_sequence(start, end);
147: return (len > 1)? len:0;
148: }
149:
150:
151: static unsigned int mysqlnd_mbcharlen_utf8mb3(unsigned int utf8)
152: {
153: if (utf8 < 0x80) {
154: return 1; /* single byte character */
155: }
156: if (utf8 < 0xC2) {
157: return 0; /* invalid multibyte header */
158: }
159: if (utf8 < 0xE0) {
160: return 2; /* double byte character */
161: }
162: if (utf8 < 0xF0) {
163: return 3; /* triple byte character */
164: }
165: return 0;
166: }
167:
168:
169: static unsigned int mysqlnd_mbcharlen_utf8(unsigned int utf8)
170: {
171: if (utf8 < 0x80) {
172: return 1; /* single byte character */
173: }
174: if (utf8 < 0xC2) {
175: return 0; /* invalid multibyte header */
176: }
177: if (utf8 < 0xE0) {
178: return 2; /* double byte character */
179: }
180: if (utf8 < 0xF0) {
181: return 3; /* triple byte character */
182: }
183: if (utf8 < 0xF8) {
184: return 4; /* four byte character */
185: }
186: return 0;
187: }
188: /* }}} */
189:
190:
191: /* {{{ big5 functions */
192: #define valid_big5head(c) (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xF9)
193: #define valid_big5tail(c) ((0x40 <= (unsigned int)(c) && (unsigned int)(c) <= 0x7E) || \
194: (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xFE))
195:
196: #define isbig5code(c,d) (isbig5head(c) && isbig5tail(d))
197:
198: static unsigned int check_mb_big5(const char *start, const char *end)
199: {
200: return (valid_big5head(*(start)) && (end - start) > 1 && valid_big5tail(*(start + 1)) ? 2 : 0);
201: }
202:
203:
204: static unsigned int mysqlnd_mbcharlen_big5(unsigned int big5)
205: {
206: return (valid_big5head(big5)) ? 2 : 1;
207: }
208: /* }}} */
209:
210:
211: /* {{{ cp932 functions */
212: #define valid_cp932head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && c <= 0xFC))
213: #define valid_cp932tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && c <= 0xFC))
214:
215:
216: static unsigned int check_mb_cp932(const char *start, const char *end)
217: {
218: return (valid_cp932head((zend_uchar)start[0]) && (end - start > 1) &&
219: valid_cp932tail((zend_uchar)start[1])) ? 2 : 0;
220: }
221:
222:
223: static unsigned int mysqlnd_mbcharlen_cp932(unsigned int cp932)
224: {
225: return (valid_cp932head((zend_uchar)cp932)) ? 2 : 1;
226: }
227: /* }}} */
228:
229:
230: /* {{{ euckr functions */
231: #define valid_euckr(c) ((0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE))
232:
233: static unsigned int check_mb_euckr(const char *start, const char *end)
234: {
235: if (end - start <= 1) {
236: return 0; /* invalid length */
237: }
238: if (*(zend_uchar *)start < 0x80) {
239: return 0; /* invalid euckr character */
240: }
241: if (valid_euckr(start[1])) {
242: return 2;
243: }
244: return 0;
245: }
246:
247:
248: static unsigned int mysqlnd_mbcharlen_euckr(unsigned int kr)
249: {
250: return (valid_euckr(kr)) ? 2 : 1;
251: }
252: /* }}} */
253:
254:
255: /* {{{ eucjpms functions */
256: #define valid_eucjpms(c) (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xFE)
257: #define valid_eucjpms_kata(c) (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xDF)
258: #define valid_eucjpms_ss2(c) (((c) & 0xFF) == 0x8E)
259: #define valid_eucjpms_ss3(c) (((c) & 0xFF) == 0x8F)
260:
261: static unsigned int check_mb_eucjpms(const char *start, const char *end)
262: {
263: if (*((zend_uchar *)start) < 0x80) {
264: return 0; /* invalid eucjpms character */
265: }
266: if (valid_eucjpms(start[0]) && (end - start) > 1 && valid_eucjpms(start[1])) {
267: return 2;
268: }
269: if (valid_eucjpms_ss2(start[0]) && (end - start) > 1 && valid_eucjpms_kata(start[1])) {
270: return 2;
271: }
272: if (valid_eucjpms_ss3(start[0]) && (end - start) > 2 && valid_eucjpms(start[1]) &&
273: valid_eucjpms(start[2])) {
274: return 2;
275: }
276: return 0;
277: }
278:
279:
280: static unsigned int mysqlnd_mbcharlen_eucjpms(unsigned int jpms)
281: {
282: if (valid_eucjpms(jpms) || valid_eucjpms_ss2(jpms)) {
283: return 2;
284: }
285: if (valid_eucjpms_ss3(jpms)) {
286: return 3;
287: }
288: return 1;
289: }
290: /* }}} */
291:
292:
293: /* {{{ gb2312 functions */
294: #define valid_gb2312_head(c) (0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xF7)
295: #define valid_gb2312_tail(c) (0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE)
296:
297:
298: static unsigned int check_mb_gb2312(const char *start, const char *end)
299: {
300: return (valid_gb2312_head((unsigned int)start[0]) && end - start > 1 &&
301: valid_gb2312_tail((unsigned int)start[1])) ? 2 : 0;
302: }
303:
304:
305: static unsigned int mysqlnd_mbcharlen_gb2312(unsigned int gb)
306: {
307: return (valid_gb2312_head(gb)) ? 2 : 1;
308: }
309: /* }}} */
310:
311:
312: /* {{{ gbk functions */
313: #define valid_gbk_head(c) (0x81<=(zend_uchar)(c) && (zend_uchar)(c)<=0xFE)
314: #define valid_gbk_tail(c) ((0x40<=(zend_uchar)(c) && (zend_uchar)(c)<=0x7E) || (0x80<=(zend_uchar)(c) && (zend_uchar)(c)<=0xFE))
315:
316: static unsigned int check_mb_gbk(const char *start, const char *end)
317: {
318: return (valid_gbk_head(start[0]) && (end) - (start) > 1 && valid_gbk_tail(start[1])) ? 2 : 0;
319: }
320:
321: static unsigned int mysqlnd_mbcharlen_gbk(unsigned int gbk)
322: {
323: return (valid_gbk_head(gbk) ? 2 : 1);
324: }
325: /* }}} */
326:
327:
328: /* {{{ functions */
329: #define valid_sjis_head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && (c) <= 0xFC))
330: #define valid_sjis_tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && (c) <= 0xFC))
331:
332:
333: static unsigned int check_mb_sjis(const char *start, const char *end)
334: {
335: return (valid_sjis_head((zend_uchar)start[0]) && (end - start) > 1 && valid_sjis_tail((zend_uchar)start[1])) ? 2 : 0;
336: }
337:
338:
339: static unsigned int mysqlnd_mbcharlen_sjis(unsigned int sjis)
340: {
341: return (valid_sjis_head((zend_uchar)sjis)) ? 2 : 1;
342: }
343: /* }}} */
344:
345:
346: /* {{{ ucs2 functions */
347: static unsigned int check_mb_ucs2(const char *start __attribute((unused)), const char *end __attribute((unused)))
348: {
349: return 2; /* always 2 */
350: }
351:
352: static unsigned int mysqlnd_mbcharlen_ucs2(unsigned int ucs2 __attribute((unused)))
353: {
354: return 2; /* always 2 */
355: }
356: /* }}} */
357:
358:
359: /* {{{ ujis functions */
360: #define valid_ujis(c) ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xFE))
361: #define valid_ujis_kata(c) ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xDF))
362: #define valid_ujis_ss2(c) (((c)&0xFF) == 0x8E)
363: #define valid_ujis_ss3(c) (((c)&0xFF) == 0x8F)
364:
365: static unsigned int check_mb_ujis(const char *start, const char *end)
366: {
367: if (*(zend_uchar*)start < 0x80) {
368: return 0; /* invalid ujis character */
369: }
370: if (valid_ujis(*(start)) && valid_ujis(*((start)+1))) {
371: return 2;
372: }
373: if (valid_ujis_ss2(*(start)) && valid_ujis_kata(*((start)+1))) {
374: return 2;
375: }
376: if (valid_ujis_ss3(*(start)) && (end-start) > 2 && valid_ujis(*((start)+1)) && valid_ujis(*((start)+2))) {
377: return 3;
378: }
379: return 0;
380: }
381:
382:
383: static unsigned int mysqlnd_mbcharlen_ujis(unsigned int ujis)
384: {
385: return (valid_ujis(ujis)? 2: valid_ujis_ss2(ujis)? 2: valid_ujis_ss3(ujis)? 3: 1);
386: }
387: /* }}} */
388:
389:
390:
391: /* {{{ utf16 functions */
392: #define UTF16_HIGH_HEAD(x) ((((zend_uchar) (x)) & 0xFC) == 0xD8)
393: #define UTF16_LOW_HEAD(x) ((((zend_uchar) (x)) & 0xFC) == 0xDC)
394:
395: static unsigned int check_mb_utf16(const char *start, const char *end)
396: {
397: if (start + 2 > end) {
398: return 0;
399: }
400:
401: if (UTF16_HIGH_HEAD(*start)) {
402: return (start + 4 <= end) && UTF16_LOW_HEAD(start[2]) ? 4 : 0;
403: }
404:
405: if (UTF16_LOW_HEAD(*start)) {
406: return 0;
407: }
408: return 2;
409: }
410:
411:
412: static uint mysqlnd_mbcharlen_utf16(unsigned int utf16)
413: {
414: return UTF16_HIGH_HEAD(utf16) ? 4 : 2;
415: }
416: /* }}} */
417:
418:
419: /* {{{ utf32 functions */
420: static uint
421: check_mb_utf32(const char *start __attribute((unused)), const char *end __attribute((unused)))
422: {
423: return 4;
424: }
425:
426:
427: static uint
428: mysqlnd_mbcharlen_utf32(unsigned int utf32 __attribute((unused)))
429: {
430: return 4;
431: }
432: /* }}} */
433:
434: /*
435: The server compiles sometimes the full utf-8 (the mb4) as utf8m4, and the old as utf8,
436: for BC reasons. Sometimes, utf8mb4 is just utf8 but the old charsets are utf8mb3.
437: Change easily now, with a macro, could be made compilastion dependable.
438: */
439:
440: #define UTF8_MB4 "utf8mb4"
441: #define UTF8_MB3 "utf8"
442:
443: /* {{{ mysqlnd_charsets */
444: const MYSQLND_CHARSET mysqlnd_charsets[] =
445: {
446: { 1, "big5","big5_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_big5, check_mb_big5},
447: { 3, "dec8", "dec8_swedisch_ci", 1, 1, "", NULL, NULL},
448: { 4, "cp850", "cp850_general_ci", 1, 1, "", NULL, NULL},
449: { 6, "hp8", "hp8_english_ci", 1, 1, "", NULL, NULL},
450: { 7, "koi8r", "koi8r_general_ci", 1, 1, "", NULL, NULL},
451: { 8, "latin1", "latin1_swedish_ci", 1, 1, "", NULL, NULL},
452: { 9, "latin2", "latin2_general_ci", 1, 1, "", NULL, NULL},
453: { 10, "swe7", "swe7_swedish_ci", 1, 1, "", NULL, NULL},
454: { 11, "ascii", "ascii_general_ci", 1, 1, "", NULL, NULL},
455: { 12, "ujis", "ujis_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_ujis, check_mb_ujis},
456: { 13, "sjis", "sjis_japanese_ci", 1, 2, "", mysqlnd_mbcharlen_sjis, check_mb_sjis},
457: { 16, "hebrew", "hebrew_general_ci", 1, 1, "", NULL, NULL},
458: { 18, "tis620", "tis620_thai_ci", 1, 1, "", NULL, NULL},
459: { 19, "euckr", "euckr_korean_ci", 1, 2, "", mysqlnd_mbcharlen_euckr, check_mb_euckr},
460: { 22, "koi8u", "koi8u_general_ci", 1, 1, "", NULL, NULL},
461: { 24, "gb2312", "gb2312_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_gb2312, check_mb_gb2312},
462: { 25, "greek", "greek_general_ci", 1, 1, "", NULL, NULL},
463: { 26, "cp1250", "cp1250_general_ci", 1, 1, "", NULL, NULL},
464: { 28, "gbk", "gbk_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_gbk, check_mb_gbk},
465: { 30, "latin5", "latin5_turkish_ci", 1, 1, "", NULL, NULL},
466: { 32, "armscii8", "armscii8_general_ci", 1, 1, "", NULL, NULL},
467: { 33, UTF8_MB3, UTF8_MB3"_general_ci", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
468: { 35, "ucs2", "ucs2_general_ci", 2, 2, "UCS-2 Unicode", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
469: { 36, "cp866", "cp866_general_ci", 1, 1, "", NULL, NULL},
470: { 37, "keybcs2", "keybcs2_general_ci", 1, 1, "", NULL, NULL},
471: { 38, "macce", "macce_general_ci", 1, 1, "", NULL, NULL},
472: { 39, "macroman", "macroman_general_ci", 1, 1, "", NULL, NULL},
473: { 40, "cp852", "cp852_general_ci", 1, 1, "", NULL, NULL},
474: { 41, "latin7", "latin7_general_ci", 1, 1, "", NULL, NULL},
475: { 51, "cp1251", "cp1251_general_ci", 1, 1, "", NULL, NULL},
476: { 57, "cp1256", "cp1256_general_ci", 1, 1, "", NULL, NULL},
477: { 59, "cp1257", "cp1257_general_ci", 1, 1, "", NULL, NULL},
478: { 63, "binary", "binary", 1, 1, "", NULL, NULL},
479: { 92, "geostd8", "geostd8_general_ci", 1, 1, "", NULL, NULL},
480: { 95, "cp932", "cp932_japanese_ci", 1, 2, "", mysqlnd_mbcharlen_cp932, check_mb_cp932},
481: { 97, "eucjpms", "eucjpms_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_eucjpms, check_mb_eucjpms},
482: { 2, "latin2", "latin2_czech_cs", 1, 1, "", NULL, NULL},
483: { 5, "latin1", "latin1_german_ci", 1, 1, "", NULL, NULL},
484: { 14, "cp1251", "cp1251_bulgarian_ci", 1, 1, "", NULL, NULL},
485: { 15, "latin1", "latin1_danish_ci", 1, 1, "", NULL, NULL},
486: { 17, "filename", "filename", 1, 5, "", NULL, NULL},
487: { 20, "latin7", "latin7_estonian_cs", 1, 1, "", NULL, NULL},
488: { 21, "latin2", "latin2_hungarian_ci", 1, 1, "", NULL, NULL},
489: { 23, "cp1251", "cp1251_ukrainian_ci", 1, 1, "", NULL, NULL},
490: { 27, "latin2", "latin2_croatian_ci", 1, 1, "", NULL, NULL},
491: { 29, "cp1257", "cp1257_lithunian_ci", 1, 1, "", NULL, NULL},
492: { 31, "latin1", "latin1_german2_ci", 1, 1, "", NULL, NULL},
493: { 34, "cp1250", "cp1250_czech_cs", 1, 1, "", NULL, NULL},
494: { 42, "latin7", "latin7_general_cs", 1, 1, "", NULL, NULL},
495: { 43, "macce", "macce_bin", 1, 1, "", NULL, NULL},
496: { 44, "cp1250", "cp1250_croatian_ci", 1, 1, "", NULL, NULL},
497: { 45, UTF8_MB4, UTF8_MB4"_general_ci", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
498: { 46, UTF8_MB4, UTF8_MB4"_bin", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
499: { 47, "latin1", "latin1_bin", 1, 1, "", NULL, NULL},
500: { 48, "latin1", "latin1_general_ci", 1, 1, "", NULL, NULL},
501: { 49, "latin1", "latin1_general_cs", 1, 1, "", NULL, NULL},
502: { 50, "cp1251", "cp1251_bin", 1, 1, "", NULL, NULL},
503: { 52, "cp1251", "cp1251_general_cs", 1, 1, "", NULL, NULL},
504: { 53, "macroman", "macroman_bin", 1, 1, "", NULL, NULL},
505: { 54, "utf16", "utf16_general_ci", 2, 4, "UTF-16 Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
506: { 55, "utf16", "utf16_bin", 2, 4, "UTF-16 Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
507: { 58, "cp1257", "cp1257_bin", 1, 1, "", NULL, NULL},
508: #ifdef USED_TO_BE_SO_BEFORE_MYSQL_5_5
509: { 60, "armascii8", "armascii8_bin", 1, 1, "", NULL, NULL},
510: #endif
511: { 60, "utf32", "utf32_general_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
512: { 61, "utf32", "utf32_bin", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
513: { 65, "ascii", "ascii_bin", 1, 1, "", NULL, NULL},
514: { 66, "cp1250", "cp1250_bin", 1, 1, "", NULL, NULL},
515: { 67, "cp1256", "cp1256_bin", 1, 1, "", NULL, NULL},
516: { 68, "cp866", "cp866_bin", 1, 1, "", NULL, NULL},
517: { 69, "dec8", "dec8_bin", 1, 1, "", NULL, NULL},
518: { 70, "greek", "greek_bin", 1, 1, "", NULL, NULL},
519: { 71, "hebew", "hebrew_bin", 1, 1, "", NULL, NULL},
520: { 72, "hp8", "hp8_bin", 1, 1, "", NULL, NULL},
521: { 73, "keybcs2", "keybcs2_bin", 1, 1, "", NULL, NULL},
522: { 74, "koi8r", "koi8r_bin", 1, 1, "", NULL, NULL},
523: { 75, "koi8u", "koi8u_bin", 1, 1, "", NULL, NULL},
524: { 77, "latin2", "latin2_bin", 1, 1, "", NULL, NULL},
525: { 78, "latin5", "latin5_bin", 1, 1, "", NULL, NULL},
526: { 79, "latin7", "latin7_bin", 1, 1, "", NULL, NULL},
527: { 80, "cp850", "cp850_bin", 1, 1, "", NULL, NULL},
528: { 81, "cp852", "cp852_bin", 1, 1, "", NULL, NULL},
529: { 82, "swe7", "swe7_bin", 1, 1, "", NULL, NULL},
530: { 93, "geostd8", "geostd8_bin", 1, 1, "", NULL, NULL},
531: { 83, UTF8_MB3, UTF8_MB3"_bin", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
532: { 84, "big5", "big5_bin", 1, 2, "", mysqlnd_mbcharlen_big5, check_mb_big5},
533: { 85, "euckr", "euckr_bin", 1, 2, "", mysqlnd_mbcharlen_euckr, check_mb_euckr},
534: { 86, "gb2312", "gb2312_bin", 1, 2, "", mysqlnd_mbcharlen_gb2312, check_mb_gb2312},
535: { 87, "gbk", "gbk_bin", 1, 2, "", mysqlnd_mbcharlen_gbk, check_mb_gbk},
536: { 88, "sjis", "sjis_bin", 1, 2, "", mysqlnd_mbcharlen_sjis, check_mb_sjis},
537: { 89, "tis620", "tis620_bin", 1, 1, "", NULL, NULL},
538: { 90, "ucs2", "ucs2_bin", 2, 2, "UCS-2 Unicode", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
539: { 91, "ujis", "ujis_bin", 1, 3, "", mysqlnd_mbcharlen_ujis, check_mb_ujis},
540: { 94, "latin1", "latin1_spanish_ci", 1, 1, "", NULL, NULL},
541: { 96, "cp932", "cp932_bin", 1, 2, "", mysqlnd_mbcharlen_cp932, check_mb_cp932},
542: { 99, "cp1250", "cp1250_polish_ci", 1, 1, "", NULL, NULL},
543: { 98, "eucjpms", "eucjpms_bin", 1, 3, "", mysqlnd_mbcharlen_eucjpms, check_mb_eucjpms},
544: { 128, "ucs2", "ucs2_unicode_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
545: { 129, "ucs2", "ucs2_icelandic_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
546: { 130, "ucs2", "ucs2_latvian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
547: { 131, "ucs2", "ucs2_romanian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
548: { 132, "ucs2", "ucs2_slovenian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
549: { 133, "ucs2", "ucs2_polish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
550: { 134, "ucs2", "ucs2_estonian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
551: { 135, "ucs2", "ucs2_spanish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
552: { 136, "ucs2", "ucs2_swedish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
553: { 137, "ucs2", "ucs2_turkish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
554: { 138, "ucs2", "ucs2_czech_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
555: { 139, "ucs2", "ucs2_danish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
556: { 140, "ucs2", "ucs2_lithunian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
557: { 141, "ucs2", "ucs2_slovak_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
558: { 142, "ucs2", "ucs2_spanish2_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
559: { 143, "ucs2", "ucs2_roman_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
560: { 144, "ucs2", "ucs2_persian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
561: { 145, "ucs2", "ucs2_esperanto_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
562: { 146, "ucs2", "ucs2_hungarian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
563: { 147, "ucs2", "ucs2_sinhala_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
564: { 149, "ucs2", "ucs2_croatian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
565:
566: { 192, UTF8_MB3, UTF8_MB3"_general_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
567: { 193, UTF8_MB3, UTF8_MB3"_icelandic_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
568: { 194, UTF8_MB3, UTF8_MB3"_latvian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
569: { 195, UTF8_MB3, UTF8_MB3"_romanian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
570: { 196, UTF8_MB3, UTF8_MB3"_slovenian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
571: { 197, UTF8_MB3, UTF8_MB3"_polish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
572: { 198, UTF8_MB3, UTF8_MB3"_estonian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
573: { 119, UTF8_MB3, UTF8_MB3"_spanish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
574: { 200, UTF8_MB3, UTF8_MB3"_swedish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
575: { 201, UTF8_MB3, UTF8_MB3"_turkish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
576: { 202, UTF8_MB3, UTF8_MB3"_czech_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
577: { 203, UTF8_MB3, UTF8_MB3"_danish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
578: { 204, UTF8_MB3, UTF8_MB3"_lithunian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
579: { 205, UTF8_MB3, UTF8_MB3"_slovak_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
580: { 206, UTF8_MB3, UTF8_MB3"_spanish2_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
581: { 207, UTF8_MB3, UTF8_MB3"_roman_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
582: { 208, UTF8_MB3, UTF8_MB3"_persian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
583: { 209, UTF8_MB3, UTF8_MB3"_esperanto_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
584: { 210, UTF8_MB3, UTF8_MB3"_hungarian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
585: { 211, UTF8_MB3, UTF8_MB3"_sinhala_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
586: { 213, UTF8_MB3, UTF8_MB3"_croatian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
587:
588: { 224, UTF8_MB4, UTF8_MB4"_unicode_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
589: { 225, UTF8_MB4, UTF8_MB4"_icelandic_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
590: { 226, UTF8_MB4, UTF8_MB4"_latvian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
591: { 227, UTF8_MB4, UTF8_MB4"_romanian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
592: { 228, UTF8_MB4, UTF8_MB4"_slovenian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
593: { 229, UTF8_MB4, UTF8_MB4"_polish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
594: { 230, UTF8_MB4, UTF8_MB4"_estonian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
595: { 231, UTF8_MB4, UTF8_MB4"_spanish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
596: { 232, UTF8_MB4, UTF8_MB4"_swedish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
597: { 233, UTF8_MB4, UTF8_MB4"_turkish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
598: { 234, UTF8_MB4, UTF8_MB4"_czech_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
599: { 235, UTF8_MB4, UTF8_MB4"_danish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
600: { 236, UTF8_MB4, UTF8_MB4"_lithuanian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
601: { 237, UTF8_MB4, UTF8_MB4"_slovak_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
602: { 238, UTF8_MB4, UTF8_MB4"_spanish2_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
603: { 239, UTF8_MB4, UTF8_MB4"_roman_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
604: { 240, UTF8_MB4, UTF8_MB4"_persian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
605: { 241, UTF8_MB4, UTF8_MB4"_esperanto_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
606: { 242, UTF8_MB4, UTF8_MB4"_hungarian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
607: { 243, UTF8_MB4, UTF8_MB4"_sinhala_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
608:
609: { 254, UTF8_MB3, UTF8_MB3"_general_cs", 1, 3, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
610: { 0, NULL, NULL, 0, 0, NULL, NULL, NULL}
611: };
612: /* }}} */
613:
614:
615: /* {{{ mysqlnd_find_charset_nr */
616: PHPAPI const MYSQLND_CHARSET * mysqlnd_find_charset_nr(unsigned int charsetnr)
617: {
618: const MYSQLND_CHARSET * c = mysqlnd_charsets;
619:
620: do {
621: if (c->nr == charsetnr) {
622: return c;
623: }
624: ++c;
625: } while (c[0].nr != 0);
626: return NULL;
627: }
628: /* }}} */
629:
630:
631: /* {{{ mysqlnd_find_charset_name */
632: PHPAPI const MYSQLND_CHARSET * mysqlnd_find_charset_name(const char * const name)
633: {
634: const MYSQLND_CHARSET *c = mysqlnd_charsets;
635:
636: do {
637: if (!strcasecmp(c->name, name)) {
638: return c;
639: }
640: ++c;
641: } while (c[0].nr != 0);
642: return NULL;
643: }
644: /* }}} */
645:
646:
647: /* {{{ mysqlnd_cset_escape_quotes */
648: PHPAPI ulong mysqlnd_cset_escape_quotes(const MYSQLND_CHARSET * const cset, char *newstr,
649: const char * escapestr, size_t escapestr_len TSRMLS_DC)
650: {
651: const char *newstr_s = newstr;
652: const char *newstr_e = newstr + 2 * escapestr_len;
653: const char *end = escapestr + escapestr_len;
654: zend_bool escape_overflow = FALSE;
655:
656: DBG_ENTER("mysqlnd_cset_escape_quotes");
657:
658: for (;escapestr < end; escapestr++) {
659: unsigned int len = 0;
660: /* check unicode characters */
661:
662: if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
663:
664: /* check possible overflow */
665: if ((newstr + len) > newstr_e) {
666: escape_overflow = TRUE;
667: break;
668: }
669: /* copy mb char without escaping it */
670: while (len--) {
671: *newstr++ = *escapestr++;
672: }
673: escapestr--;
674: continue;
675: }
676: if (*escapestr == '\'') {
677: if (newstr + 2 > newstr_e) {
678: escape_overflow = TRUE;
679: break;
680: }
681: *newstr++ = '\'';
682: *newstr++ = '\'';
683: } else {
684: if (newstr + 1 > newstr_e) {
685: escape_overflow = TRUE;
686: break;
687: }
688: *newstr++ = *escapestr;
689: }
690: }
691: *newstr = '\0';
692:
693: if (escape_overflow) {
694: DBG_RETURN((ulong)~0);
695: }
696: DBG_RETURN((ulong)(newstr - newstr_s));
697: }
698: /* }}} */
699:
700:
701: /* {{{ mysqlnd_cset_escape_slashes */
702: PHPAPI ulong mysqlnd_cset_escape_slashes(const MYSQLND_CHARSET * const cset, char *newstr,
703: const char * escapestr, size_t escapestr_len TSRMLS_DC)
704: {
705: const char *newstr_s = newstr;
706: const char *newstr_e = newstr + 2 * escapestr_len;
707: const char *end = escapestr + escapestr_len;
708: zend_bool escape_overflow = FALSE;
709:
710: DBG_ENTER("mysqlnd_cset_escape_slashes");
711: DBG_INF_FMT("charset=%s", cset->name);
712:
713: for (;escapestr < end; escapestr++) {
714: char esc = '\0';
715: unsigned int len = 0;
716:
717: /* check unicode characters */
718: if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
719: /* check possible overflow */
720: if ((newstr + len) > newstr_e) {
721: escape_overflow = TRUE;
722: break;
723: }
724: /* copy mb char without escaping it */
725: while (len--) {
726: *newstr++ = *escapestr++;
727: }
728: escapestr--;
729: continue;
730: }
731: if (cset->char_maxlen > 1 && cset->mb_charlen(*escapestr) > 1) {
732: esc = *escapestr;
733: } else {
734: switch (*escapestr) {
735: case 0:
736: esc = '0';
737: break;
738: case '\n':
739: esc = 'n';
740: break;
741: case '\r':
742: esc = 'r';
743: break;
744: case '\\':
745: case '\'':
746: case '"':
747: esc = *escapestr;
748: break;
749: case '\032':
750: esc = 'Z';
751: break;
752: }
753: }
754: if (esc) {
755: if (newstr + 2 > newstr_e) {
756: escape_overflow = TRUE;
757: break;
758: }
759: /* copy escaped character */
760: *newstr++ = '\\';
761: *newstr++ = esc;
762: } else {
763: if (newstr + 1 > newstr_e) {
764: escape_overflow = TRUE;
765: break;
766: }
767: /* copy non escaped character */
768: *newstr++ = *escapestr;
769: }
770: }
771: *newstr = '\0';
772:
773: if (escape_overflow) {
774: DBG_RETURN((ulong)~0);
775: }
776: DBG_RETURN((ulong)(newstr - newstr_s));
777: }
778: /* }}} */
779:
780: /*
781: * Local variables:
782: * tab-width: 4
783: * c-basic-offset: 4
784: * End:
785: * vim600: noet sw=4 ts=4 fdm=marker
786: * vim<600: noet sw=4 ts=4
787: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>