Annotation of embedaddon/libiconv/lib/iso2022_cnext.h, revision 1.1.1.1
1.1 misho 1: /*
2: * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18: * Fifth Floor, Boston, MA 02110-1301, USA.
19: */
20:
21: /*
22: * ISO-2022-CN-EXT
23: */
24:
25: /* Specification: RFC 1922 */
26:
27: #define ESC 0x1b
28: #define SO 0x0e
29: #define SI 0x0f
30:
31: /*
32: * The state is composed of one of the following values
33: */
34: #define STATE_ASCII 0
35: #define STATE_TWOBYTE 1
36: /*
37: * and one of the following values, << 8
38: */
39: #define STATE2_NONE 0
40: #define STATE2_DESIGNATED_GB2312 1
41: #define STATE2_DESIGNATED_CNS11643_1 2
42: #define STATE2_DESIGNATED_ISO_IR_165 3
43: /*
44: * and one of the following values, << 16
45: */
46: #define STATE3_NONE 0
47: #define STATE3_DESIGNATED_CNS11643_2 1
48: /*
49: * and one of the following values, << 24
50: */
51: #define STATE4_NONE 0
52: #define STATE4_DESIGNATED_CNS11643_3 1
53: #define STATE4_DESIGNATED_CNS11643_4 2
54: #define STATE4_DESIGNATED_CNS11643_5 3
55: #define STATE4_DESIGNATED_CNS11643_6 4
56: #define STATE4_DESIGNATED_CNS11643_7 5
57:
58: #define SPLIT_STATE \
59: unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
60: #define COMBINE_STATE \
61: state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
62:
63: static int
64: iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
65: {
66: state_t state = conv->istate;
67: SPLIT_STATE;
68: int count = 0;
69: unsigned char c;
70: for (;;) {
71: c = *s;
72: if (c == ESC) {
73: if (n < count+4)
74: goto none;
75: if (s[1] == '$') {
76: if (s[2] == ')') {
77: if (s[3] == 'A') {
78: state2 = STATE2_DESIGNATED_GB2312;
79: s += 4; count += 4;
80: if (n < count+1)
81: goto none;
82: continue;
83: }
84: if (s[3] == 'G') {
85: state2 = STATE2_DESIGNATED_CNS11643_1;
86: s += 4; count += 4;
87: if (n < count+1)
88: goto none;
89: continue;
90: }
91: if (s[3] == 'E') {
92: state2 = STATE2_DESIGNATED_ISO_IR_165;
93: s += 4; count += 4;
94: if (n < count+1)
95: goto none;
96: continue;
97: }
98: }
99: if (s[2] == '*') {
100: if (s[3] == 'H') {
101: state3 = STATE3_DESIGNATED_CNS11643_2;
102: s += 4; count += 4;
103: if (n < count+1)
104: goto none;
105: continue;
106: }
107: }
108: if (s[2] == '+') {
109: if (s[3] == 'I') {
110: state4 = STATE4_DESIGNATED_CNS11643_3;
111: s += 4; count += 4;
112: if (n < count+1)
113: goto none;
114: continue;
115: }
116: if (s[3] == 'J') {
117: state4 = STATE4_DESIGNATED_CNS11643_4;
118: s += 4; count += 4;
119: if (n < count+1)
120: goto none;
121: continue;
122: }
123: if (s[3] == 'K') {
124: state4 = STATE4_DESIGNATED_CNS11643_5;
125: s += 4; count += 4;
126: if (n < count+1)
127: goto none;
128: continue;
129: }
130: if (s[3] == 'L') {
131: state4 = STATE4_DESIGNATED_CNS11643_6;
132: s += 4; count += 4;
133: if (n < count+1)
134: goto none;
135: continue;
136: }
137: if (s[3] == 'M') {
138: state4 = STATE4_DESIGNATED_CNS11643_7;
139: s += 4; count += 4;
140: if (n < count+1)
141: goto none;
142: continue;
143: }
144: }
145: }
146: if (s[1] == 'N') {
147: switch (state3) {
148: case STATE3_NONE:
149: goto ilseq;
150: case STATE3_DESIGNATED_CNS11643_2:
151: if (s[2] < 0x80 && s[3] < 0x80) {
152: int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
153: if (ret == RET_ILSEQ)
154: goto ilseq;
155: if (ret != 2) abort();
156: COMBINE_STATE;
157: conv->istate = state;
158: return count+4;
159: } else
160: goto ilseq;
161: default: abort();
162: }
163: }
164: if (s[1] == 'O') {
165: switch (state4) {
166: case STATE4_NONE:
167: goto ilseq;
168: case STATE4_DESIGNATED_CNS11643_3:
169: if (s[2] < 0x80 && s[3] < 0x80) {
170: int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
171: if (ret == RET_ILSEQ)
172: goto ilseq;
173: if (ret != 2) abort();
174: COMBINE_STATE;
175: conv->istate = state;
176: return count+4;
177: } else
178: goto ilseq;
179: case STATE4_DESIGNATED_CNS11643_4:
180: if (s[2] < 0x80 && s[3] < 0x80) {
181: int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
182: if (ret == RET_ILSEQ)
183: goto ilseq;
184: if (ret != 2) abort();
185: COMBINE_STATE;
186: conv->istate = state;
187: return count+4;
188: } else
189: goto ilseq;
190: case STATE4_DESIGNATED_CNS11643_5:
191: if (s[2] < 0x80 && s[3] < 0x80) {
192: int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
193: if (ret == RET_ILSEQ)
194: goto ilseq;
195: if (ret != 2) abort();
196: COMBINE_STATE;
197: conv->istate = state;
198: return count+4;
199: } else
200: goto ilseq;
201: case STATE4_DESIGNATED_CNS11643_6:
202: if (s[2] < 0x80 && s[3] < 0x80) {
203: int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
204: if (ret == RET_ILSEQ)
205: goto ilseq;
206: if (ret != 2) abort();
207: COMBINE_STATE;
208: conv->istate = state;
209: return count+4;
210: } else
211: goto ilseq;
212: case STATE4_DESIGNATED_CNS11643_7:
213: if (s[2] < 0x80 && s[3] < 0x80) {
214: int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
215: if (ret == RET_ILSEQ)
216: goto ilseq;
217: if (ret != 2) abort();
218: COMBINE_STATE;
219: conv->istate = state;
220: return count+4;
221: } else
222: goto ilseq;
223: default: abort();
224: }
225: }
226: goto ilseq;
227: }
228: if (c == SO) {
229: if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
230: goto ilseq;
231: state1 = STATE_TWOBYTE;
232: s++; count++;
233: if (n < count+1)
234: goto none;
235: continue;
236: }
237: if (c == SI) {
238: state1 = STATE_ASCII;
239: s++; count++;
240: if (n < count+1)
241: goto none;
242: continue;
243: }
244: break;
245: }
246: switch (state1) {
247: case STATE_ASCII:
248: if (c < 0x80) {
249: int ret = ascii_mbtowc(conv,pwc,s,1);
250: if (ret == RET_ILSEQ)
251: goto ilseq;
252: if (ret != 1) abort();
253: if (*pwc == 0x000a || *pwc == 0x000d) {
254: state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
255: }
256: COMBINE_STATE;
257: conv->istate = state;
258: return count+1;
259: } else
260: goto ilseq;
261: case STATE_TWOBYTE:
262: if (n < count+2)
263: goto none;
264: if (s[0] < 0x80 && s[1] < 0x80) {
265: int ret;
266: switch (state2) {
267: case STATE2_NONE:
268: goto ilseq;
269: case STATE2_DESIGNATED_GB2312:
270: ret = gb2312_mbtowc(conv,pwc,s,2); break;
271: case STATE2_DESIGNATED_CNS11643_1:
272: ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
273: case STATE2_DESIGNATED_ISO_IR_165:
274: ret = isoir165_mbtowc(conv,pwc,s,2); break;
275: default: abort();
276: }
277: if (ret == RET_ILSEQ)
278: goto ilseq;
279: if (ret != 2) abort();
280: COMBINE_STATE;
281: conv->istate = state;
282: return count+2;
283: } else
284: goto ilseq;
285: default: abort();
286: }
287:
288: none:
289: COMBINE_STATE;
290: conv->istate = state;
291: return RET_TOOFEW(count);
292:
293: ilseq:
294: COMBINE_STATE;
295: conv->istate = state;
296: return RET_SHIFT_ILSEQ(count);
297: }
298:
299: static int
300: iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
301: {
302: state_t state = conv->ostate;
303: SPLIT_STATE;
304: unsigned char buf[3];
305: int ret;
306:
307: /* There is no need to handle Unicode 3.1 tag characters and to look for
308: "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
309:
310: /* Try ASCII. */
311: ret = ascii_wctomb(conv,buf,wc,1);
312: if (ret != RET_ILUNI) {
313: if (ret != 1) abort();
314: if (buf[0] < 0x80) {
315: int count = (state1 == STATE_ASCII ? 1 : 2);
316: if (n < count)
317: return RET_TOOSMALL;
318: if (state1 != STATE_ASCII) {
319: r[0] = SI;
320: r += 1;
321: state1 = STATE_ASCII;
322: }
323: r[0] = buf[0];
324: if (wc == 0x000a || wc == 0x000d) {
325: state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
326: }
327: COMBINE_STATE;
328: conv->ostate = state;
329: return count;
330: }
331: }
332:
333: /* Try GB 2312-1980. */
334: ret = gb2312_wctomb(conv,buf,wc,2);
335: if (ret != RET_ILUNI) {
336: if (ret != 2) abort();
337: if (buf[0] < 0x80 && buf[1] < 0x80) {
338: int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
339: if (n < count)
340: return RET_TOOSMALL;
341: if (state2 != STATE2_DESIGNATED_GB2312) {
342: r[0] = ESC;
343: r[1] = '$';
344: r[2] = ')';
345: r[3] = 'A';
346: r += 4;
347: state2 = STATE2_DESIGNATED_GB2312;
348: }
349: if (state1 != STATE_TWOBYTE) {
350: r[0] = SO;
351: r += 1;
352: state1 = STATE_TWOBYTE;
353: }
354: r[0] = buf[0];
355: r[1] = buf[1];
356: COMBINE_STATE;
357: conv->ostate = state;
358: return count;
359: }
360: }
361:
362: ret = cns11643_wctomb(conv,buf,wc,3);
363: if (ret != RET_ILUNI) {
364: if (ret != 3) abort();
365:
366: /* Try CNS 11643-1992 Plane 1. */
367: if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
368: int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
369: if (n < count)
370: return RET_TOOSMALL;
371: if (state2 != STATE2_DESIGNATED_CNS11643_1) {
372: r[0] = ESC;
373: r[1] = '$';
374: r[2] = ')';
375: r[3] = 'G';
376: r += 4;
377: state2 = STATE2_DESIGNATED_CNS11643_1;
378: }
379: if (state1 != STATE_TWOBYTE) {
380: r[0] = SO;
381: r += 1;
382: state1 = STATE_TWOBYTE;
383: }
384: r[0] = buf[1];
385: r[1] = buf[2];
386: COMBINE_STATE;
387: conv->ostate = state;
388: return count;
389: }
390:
391: /* Try CNS 11643-1992 Plane 2. */
392: if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
393: int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
394: if (n < count)
395: return RET_TOOSMALL;
396: if (state3 != STATE3_DESIGNATED_CNS11643_2) {
397: r[0] = ESC;
398: r[1] = '$';
399: r[2] = '*';
400: r[3] = 'H';
401: r += 4;
402: state3 = STATE3_DESIGNATED_CNS11643_2;
403: }
404: r[0] = ESC;
405: r[1] = 'N';
406: r[2] = buf[1];
407: r[3] = buf[2];
408: COMBINE_STATE;
409: conv->ostate = state;
410: return count;
411: }
412:
413: /* Try CNS 11643-1992 Plane 3. */
414: if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) {
415: int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4;
416: if (n < count)
417: return RET_TOOSMALL;
418: if (state4 != STATE4_DESIGNATED_CNS11643_3) {
419: r[0] = ESC;
420: r[1] = '$';
421: r[2] = '+';
422: r[3] = 'I';
423: r += 4;
424: state4 = STATE4_DESIGNATED_CNS11643_3;
425: }
426: r[0] = ESC;
427: r[1] = 'O';
428: r[2] = buf[1];
429: r[3] = buf[2];
430: COMBINE_STATE;
431: conv->ostate = state;
432: return count;
433: }
434:
435: /* Try CNS 11643-1992 Plane 4. */
436: if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) {
437: int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4;
438: if (n < count)
439: return RET_TOOSMALL;
440: if (state4 != STATE4_DESIGNATED_CNS11643_4) {
441: r[0] = ESC;
442: r[1] = '$';
443: r[2] = '+';
444: r[3] = 'J';
445: r += 4;
446: state4 = STATE4_DESIGNATED_CNS11643_4;
447: }
448: r[0] = ESC;
449: r[1] = 'O';
450: r[2] = buf[1];
451: r[3] = buf[2];
452: COMBINE_STATE;
453: conv->ostate = state;
454: return count;
455: }
456:
457: /* Try CNS 11643-1992 Plane 5. */
458: if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) {
459: int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4;
460: if (n < count)
461: return RET_TOOSMALL;
462: if (state4 != STATE4_DESIGNATED_CNS11643_5) {
463: r[0] = ESC;
464: r[1] = '$';
465: r[2] = '+';
466: r[3] = 'K';
467: r += 4;
468: state4 = STATE4_DESIGNATED_CNS11643_5;
469: }
470: r[0] = ESC;
471: r[1] = 'O';
472: r[2] = buf[1];
473: r[3] = buf[2];
474: COMBINE_STATE;
475: conv->ostate = state;
476: return count;
477: }
478:
479: /* Try CNS 11643-1992 Plane 6. */
480: if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) {
481: int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4;
482: if (n < count)
483: return RET_TOOSMALL;
484: if (state4 != STATE4_DESIGNATED_CNS11643_6) {
485: r[0] = ESC;
486: r[1] = '$';
487: r[2] = '+';
488: r[3] = 'L';
489: r += 4;
490: state4 = STATE4_DESIGNATED_CNS11643_6;
491: }
492: r[0] = ESC;
493: r[1] = 'O';
494: r[2] = buf[1];
495: r[3] = buf[2];
496: COMBINE_STATE;
497: conv->ostate = state;
498: return count;
499: }
500:
501: /* Try CNS 11643-1992 Plane 7. */
502: if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) {
503: int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4;
504: if (n < count)
505: return RET_TOOSMALL;
506: if (state4 != STATE4_DESIGNATED_CNS11643_7) {
507: r[0] = ESC;
508: r[1] = '$';
509: r[2] = '+';
510: r[3] = 'M';
511: r += 4;
512: state4 = STATE4_DESIGNATED_CNS11643_7;
513: }
514: r[0] = ESC;
515: r[1] = 'O';
516: r[2] = buf[1];
517: r[3] = buf[2];
518: COMBINE_STATE;
519: conv->ostate = state;
520: return count;
521: }
522:
523: }
524:
525: /* Try ISO-IR-165. */
526: ret = isoir165_wctomb(conv,buf,wc,2);
527: if (ret != RET_ILUNI) {
528: if (ret != 2) abort();
529: if (buf[0] < 0x80 && buf[1] < 0x80) {
530: int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
531: if (n < count)
532: return RET_TOOSMALL;
533: if (state2 != STATE2_DESIGNATED_ISO_IR_165) {
534: r[0] = ESC;
535: r[1] = '$';
536: r[2] = ')';
537: r[3] = 'E';
538: r += 4;
539: state2 = STATE2_DESIGNATED_ISO_IR_165;
540: }
541: if (state1 != STATE_TWOBYTE) {
542: r[0] = SO;
543: r += 1;
544: state1 = STATE_TWOBYTE;
545: }
546: r[0] = buf[0];
547: r[1] = buf[1];
548: COMBINE_STATE;
549: conv->ostate = state;
550: return count;
551: }
552: }
553:
554: return RET_ILUNI;
555: }
556:
557: static int
558: iso2022_cn_ext_reset (conv_t conv, unsigned char *r, int n)
559: {
560: state_t state = conv->ostate;
561: SPLIT_STATE;
562: (void)state2;
563: (void)state3;
564: (void)state4;
565: if (state1 != STATE_ASCII) {
566: if (n < 1)
567: return RET_TOOSMALL;
568: r[0] = SI;
569: /* conv->ostate = 0; will be done by the caller */
570: return 1;
571: } else
572: return 0;
573: }
574:
575: #undef COMBINE_STATE
576: #undef SPLIT_STATE
577: #undef STATE4_DESIGNATED_CNS11643_7
578: #undef STATE4_DESIGNATED_CNS11643_6
579: #undef STATE4_DESIGNATED_CNS11643_5
580: #undef STATE4_DESIGNATED_CNS11643_4
581: #undef STATE4_DESIGNATED_CNS11643_3
582: #undef STATE4_NONE
583: #undef STATE3_DESIGNATED_CNS11643_2
584: #undef STATE3_NONE
585: #undef STATE2_DESIGNATED_ISO_IR_165
586: #undef STATE2_DESIGNATED_CNS11643_1
587: #undef STATE2_DESIGNATED_GB2312
588: #undef STATE2_NONE
589: #undef STATE_TWOBYTE
590: #undef STATE_ASCII
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>