Annotation of embedaddon/libiconv/lib/iso2022_cnext.h, revision 1.1.1.2
1.1 misho 1: /*
1.1.1.2 ! misho 2: * Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc.
1.1 misho 3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.2 ! misho 17: * If not, see <https://www.gnu.org/licenses/>.
1.1 misho 18: */
19:
20: /*
21: * ISO-2022-CN-EXT
22: */
23:
24: /* Specification: RFC 1922 */
25:
26: #define ESC 0x1b
27: #define SO 0x0e
28: #define SI 0x0f
29:
30: /*
31: * The state is composed of one of the following values
32: */
33: #define STATE_ASCII 0
34: #define STATE_TWOBYTE 1
35: /*
36: * and one of the following values, << 8
37: */
38: #define STATE2_NONE 0
39: #define STATE2_DESIGNATED_GB2312 1
40: #define STATE2_DESIGNATED_CNS11643_1 2
41: #define STATE2_DESIGNATED_ISO_IR_165 3
42: /*
43: * and one of the following values, << 16
44: */
45: #define STATE3_NONE 0
46: #define STATE3_DESIGNATED_CNS11643_2 1
47: /*
48: * and one of the following values, << 24
49: */
50: #define STATE4_NONE 0
51: #define STATE4_DESIGNATED_CNS11643_3 1
52: #define STATE4_DESIGNATED_CNS11643_4 2
53: #define STATE4_DESIGNATED_CNS11643_5 3
54: #define STATE4_DESIGNATED_CNS11643_6 4
55: #define STATE4_DESIGNATED_CNS11643_7 5
56:
57: #define SPLIT_STATE \
58: unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
59: #define COMBINE_STATE \
60: state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
61:
62: static int
1.1.1.2 ! misho 63: iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
1.1 misho 64: {
65: state_t state = conv->istate;
66: SPLIT_STATE;
67: int count = 0;
68: unsigned char c;
69: for (;;) {
70: c = *s;
71: if (c == ESC) {
72: if (n < count+4)
73: goto none;
74: if (s[1] == '$') {
75: if (s[2] == ')') {
76: if (s[3] == 'A') {
77: state2 = STATE2_DESIGNATED_GB2312;
78: s += 4; count += 4;
79: if (n < count+1)
80: goto none;
81: continue;
82: }
83: if (s[3] == 'G') {
84: state2 = STATE2_DESIGNATED_CNS11643_1;
85: s += 4; count += 4;
86: if (n < count+1)
87: goto none;
88: continue;
89: }
90: if (s[3] == 'E') {
91: state2 = STATE2_DESIGNATED_ISO_IR_165;
92: s += 4; count += 4;
93: if (n < count+1)
94: goto none;
95: continue;
96: }
97: }
98: if (s[2] == '*') {
99: if (s[3] == 'H') {
100: state3 = STATE3_DESIGNATED_CNS11643_2;
101: s += 4; count += 4;
102: if (n < count+1)
103: goto none;
104: continue;
105: }
106: }
107: if (s[2] == '+') {
108: if (s[3] == 'I') {
109: state4 = STATE4_DESIGNATED_CNS11643_3;
110: s += 4; count += 4;
111: if (n < count+1)
112: goto none;
113: continue;
114: }
115: if (s[3] == 'J') {
116: state4 = STATE4_DESIGNATED_CNS11643_4;
117: s += 4; count += 4;
118: if (n < count+1)
119: goto none;
120: continue;
121: }
122: if (s[3] == 'K') {
123: state4 = STATE4_DESIGNATED_CNS11643_5;
124: s += 4; count += 4;
125: if (n < count+1)
126: goto none;
127: continue;
128: }
129: if (s[3] == 'L') {
130: state4 = STATE4_DESIGNATED_CNS11643_6;
131: s += 4; count += 4;
132: if (n < count+1)
133: goto none;
134: continue;
135: }
136: if (s[3] == 'M') {
137: state4 = STATE4_DESIGNATED_CNS11643_7;
138: s += 4; count += 4;
139: if (n < count+1)
140: goto none;
141: continue;
142: }
143: }
144: }
145: if (s[1] == 'N') {
146: switch (state3) {
147: case STATE3_NONE:
148: goto ilseq;
149: case STATE3_DESIGNATED_CNS11643_2:
150: if (s[2] < 0x80 && s[3] < 0x80) {
151: int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
152: if (ret == RET_ILSEQ)
153: goto ilseq;
154: if (ret != 2) abort();
155: COMBINE_STATE;
156: conv->istate = state;
157: return count+4;
158: } else
159: goto ilseq;
160: default: abort();
161: }
162: }
163: if (s[1] == 'O') {
164: switch (state4) {
165: case STATE4_NONE:
166: goto ilseq;
167: case STATE4_DESIGNATED_CNS11643_3:
168: if (s[2] < 0x80 && s[3] < 0x80) {
169: int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
170: if (ret == RET_ILSEQ)
171: goto ilseq;
172: if (ret != 2) abort();
173: COMBINE_STATE;
174: conv->istate = state;
175: return count+4;
176: } else
177: goto ilseq;
178: case STATE4_DESIGNATED_CNS11643_4:
179: if (s[2] < 0x80 && s[3] < 0x80) {
180: int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
181: if (ret == RET_ILSEQ)
182: goto ilseq;
183: if (ret != 2) abort();
184: COMBINE_STATE;
185: conv->istate = state;
186: return count+4;
187: } else
188: goto ilseq;
189: case STATE4_DESIGNATED_CNS11643_5:
190: if (s[2] < 0x80 && s[3] < 0x80) {
191: int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
192: if (ret == RET_ILSEQ)
193: goto ilseq;
194: if (ret != 2) abort();
195: COMBINE_STATE;
196: conv->istate = state;
197: return count+4;
198: } else
199: goto ilseq;
200: case STATE4_DESIGNATED_CNS11643_6:
201: if (s[2] < 0x80 && s[3] < 0x80) {
202: int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
203: if (ret == RET_ILSEQ)
204: goto ilseq;
205: if (ret != 2) abort();
206: COMBINE_STATE;
207: conv->istate = state;
208: return count+4;
209: } else
210: goto ilseq;
211: case STATE4_DESIGNATED_CNS11643_7:
212: if (s[2] < 0x80 && s[3] < 0x80) {
213: int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
214: if (ret == RET_ILSEQ)
215: goto ilseq;
216: if (ret != 2) abort();
217: COMBINE_STATE;
218: conv->istate = state;
219: return count+4;
220: } else
221: goto ilseq;
222: default: abort();
223: }
224: }
225: goto ilseq;
226: }
227: if (c == SO) {
228: if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
229: goto ilseq;
230: state1 = STATE_TWOBYTE;
231: s++; count++;
232: if (n < count+1)
233: goto none;
234: continue;
235: }
236: if (c == SI) {
237: state1 = STATE_ASCII;
238: s++; count++;
239: if (n < count+1)
240: goto none;
241: continue;
242: }
243: break;
244: }
245: switch (state1) {
246: case STATE_ASCII:
247: if (c < 0x80) {
248: int ret = ascii_mbtowc(conv,pwc,s,1);
249: if (ret == RET_ILSEQ)
250: goto ilseq;
251: if (ret != 1) abort();
252: if (*pwc == 0x000a || *pwc == 0x000d) {
253: state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
254: }
255: COMBINE_STATE;
256: conv->istate = state;
257: return count+1;
258: } else
259: goto ilseq;
260: case STATE_TWOBYTE:
261: if (n < count+2)
262: goto none;
263: if (s[0] < 0x80 && s[1] < 0x80) {
264: int ret;
265: switch (state2) {
266: case STATE2_NONE:
267: goto ilseq;
268: case STATE2_DESIGNATED_GB2312:
269: ret = gb2312_mbtowc(conv,pwc,s,2); break;
270: case STATE2_DESIGNATED_CNS11643_1:
271: ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
272: case STATE2_DESIGNATED_ISO_IR_165:
273: ret = isoir165_mbtowc(conv,pwc,s,2); break;
274: default: abort();
275: }
276: if (ret == RET_ILSEQ)
277: goto ilseq;
278: if (ret != 2) abort();
279: COMBINE_STATE;
280: conv->istate = state;
281: return count+2;
282: } else
283: goto ilseq;
284: default: abort();
285: }
286:
287: none:
288: COMBINE_STATE;
289: conv->istate = state;
290: return RET_TOOFEW(count);
291:
292: ilseq:
293: COMBINE_STATE;
294: conv->istate = state;
295: return RET_SHIFT_ILSEQ(count);
296: }
297:
298: static int
1.1.1.2 ! misho 299: iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
1.1 misho 300: {
301: state_t state = conv->ostate;
302: SPLIT_STATE;
303: unsigned char buf[3];
304: int ret;
305:
306: /* There is no need to handle Unicode 3.1 tag characters and to look for
307: "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
308:
309: /* Try ASCII. */
310: ret = ascii_wctomb(conv,buf,wc,1);
311: if (ret != RET_ILUNI) {
312: if (ret != 1) abort();
313: if (buf[0] < 0x80) {
314: int count = (state1 == STATE_ASCII ? 1 : 2);
315: if (n < count)
316: return RET_TOOSMALL;
317: if (state1 != STATE_ASCII) {
318: r[0] = SI;
319: r += 1;
320: state1 = STATE_ASCII;
321: }
322: r[0] = buf[0];
323: if (wc == 0x000a || wc == 0x000d) {
324: state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
325: }
326: COMBINE_STATE;
327: conv->ostate = state;
328: return count;
329: }
330: }
331:
332: /* Try GB 2312-1980. */
333: ret = gb2312_wctomb(conv,buf,wc,2);
334: if (ret != RET_ILUNI) {
335: if (ret != 2) abort();
336: if (buf[0] < 0x80 && buf[1] < 0x80) {
337: int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
338: if (n < count)
339: return RET_TOOSMALL;
340: if (state2 != STATE2_DESIGNATED_GB2312) {
341: r[0] = ESC;
342: r[1] = '$';
343: r[2] = ')';
344: r[3] = 'A';
345: r += 4;
346: state2 = STATE2_DESIGNATED_GB2312;
347: }
348: if (state1 != STATE_TWOBYTE) {
349: r[0] = SO;
350: r += 1;
351: state1 = STATE_TWOBYTE;
352: }
353: r[0] = buf[0];
354: r[1] = buf[1];
355: COMBINE_STATE;
356: conv->ostate = state;
357: return count;
358: }
359: }
360:
361: ret = cns11643_wctomb(conv,buf,wc,3);
362: if (ret != RET_ILUNI) {
363: if (ret != 3) abort();
364:
365: /* Try CNS 11643-1992 Plane 1. */
366: if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
367: int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
368: if (n < count)
369: return RET_TOOSMALL;
370: if (state2 != STATE2_DESIGNATED_CNS11643_1) {
371: r[0] = ESC;
372: r[1] = '$';
373: r[2] = ')';
374: r[3] = 'G';
375: r += 4;
376: state2 = STATE2_DESIGNATED_CNS11643_1;
377: }
378: if (state1 != STATE_TWOBYTE) {
379: r[0] = SO;
380: r += 1;
381: state1 = STATE_TWOBYTE;
382: }
383: r[0] = buf[1];
384: r[1] = buf[2];
385: COMBINE_STATE;
386: conv->ostate = state;
387: return count;
388: }
389:
390: /* Try CNS 11643-1992 Plane 2. */
391: if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
392: int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
393: if (n < count)
394: return RET_TOOSMALL;
395: if (state3 != STATE3_DESIGNATED_CNS11643_2) {
396: r[0] = ESC;
397: r[1] = '$';
398: r[2] = '*';
399: r[3] = 'H';
400: r += 4;
401: state3 = STATE3_DESIGNATED_CNS11643_2;
402: }
403: r[0] = ESC;
404: r[1] = 'N';
405: r[2] = buf[1];
406: r[3] = buf[2];
407: COMBINE_STATE;
408: conv->ostate = state;
409: return count;
410: }
411:
412: /* Try CNS 11643-1992 Plane 3. */
413: if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) {
414: int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4;
415: if (n < count)
416: return RET_TOOSMALL;
417: if (state4 != STATE4_DESIGNATED_CNS11643_3) {
418: r[0] = ESC;
419: r[1] = '$';
420: r[2] = '+';
421: r[3] = 'I';
422: r += 4;
423: state4 = STATE4_DESIGNATED_CNS11643_3;
424: }
425: r[0] = ESC;
426: r[1] = 'O';
427: r[2] = buf[1];
428: r[3] = buf[2];
429: COMBINE_STATE;
430: conv->ostate = state;
431: return count;
432: }
433:
434: /* Try CNS 11643-1992 Plane 4. */
435: if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) {
436: int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4;
437: if (n < count)
438: return RET_TOOSMALL;
439: if (state4 != STATE4_DESIGNATED_CNS11643_4) {
440: r[0] = ESC;
441: r[1] = '$';
442: r[2] = '+';
443: r[3] = 'J';
444: r += 4;
445: state4 = STATE4_DESIGNATED_CNS11643_4;
446: }
447: r[0] = ESC;
448: r[1] = 'O';
449: r[2] = buf[1];
450: r[3] = buf[2];
451: COMBINE_STATE;
452: conv->ostate = state;
453: return count;
454: }
455:
456: /* Try CNS 11643-1992 Plane 5. */
457: if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) {
458: int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4;
459: if (n < count)
460: return RET_TOOSMALL;
461: if (state4 != STATE4_DESIGNATED_CNS11643_5) {
462: r[0] = ESC;
463: r[1] = '$';
464: r[2] = '+';
465: r[3] = 'K';
466: r += 4;
467: state4 = STATE4_DESIGNATED_CNS11643_5;
468: }
469: r[0] = ESC;
470: r[1] = 'O';
471: r[2] = buf[1];
472: r[3] = buf[2];
473: COMBINE_STATE;
474: conv->ostate = state;
475: return count;
476: }
477:
478: /* Try CNS 11643-1992 Plane 6. */
479: if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) {
480: int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4;
481: if (n < count)
482: return RET_TOOSMALL;
483: if (state4 != STATE4_DESIGNATED_CNS11643_6) {
484: r[0] = ESC;
485: r[1] = '$';
486: r[2] = '+';
487: r[3] = 'L';
488: r += 4;
489: state4 = STATE4_DESIGNATED_CNS11643_6;
490: }
491: r[0] = ESC;
492: r[1] = 'O';
493: r[2] = buf[1];
494: r[3] = buf[2];
495: COMBINE_STATE;
496: conv->ostate = state;
497: return count;
498: }
499:
500: /* Try CNS 11643-1992 Plane 7. */
501: if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) {
502: int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4;
503: if (n < count)
504: return RET_TOOSMALL;
505: if (state4 != STATE4_DESIGNATED_CNS11643_7) {
506: r[0] = ESC;
507: r[1] = '$';
508: r[2] = '+';
509: r[3] = 'M';
510: r += 4;
511: state4 = STATE4_DESIGNATED_CNS11643_7;
512: }
513: r[0] = ESC;
514: r[1] = 'O';
515: r[2] = buf[1];
516: r[3] = buf[2];
517: COMBINE_STATE;
518: conv->ostate = state;
519: return count;
520: }
521:
522: }
523:
524: /* Try ISO-IR-165. */
525: ret = isoir165_wctomb(conv,buf,wc,2);
526: if (ret != RET_ILUNI) {
527: if (ret != 2) abort();
528: if (buf[0] < 0x80 && buf[1] < 0x80) {
529: int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
530: if (n < count)
531: return RET_TOOSMALL;
532: if (state2 != STATE2_DESIGNATED_ISO_IR_165) {
533: r[0] = ESC;
534: r[1] = '$';
535: r[2] = ')';
536: r[3] = 'E';
537: r += 4;
538: state2 = STATE2_DESIGNATED_ISO_IR_165;
539: }
540: if (state1 != STATE_TWOBYTE) {
541: r[0] = SO;
542: r += 1;
543: state1 = STATE_TWOBYTE;
544: }
545: r[0] = buf[0];
546: r[1] = buf[1];
547: COMBINE_STATE;
548: conv->ostate = state;
549: return count;
550: }
551: }
552:
553: return RET_ILUNI;
554: }
555:
556: static int
1.1.1.2 ! misho 557: iso2022_cn_ext_reset (conv_t conv, unsigned char *r, size_t n)
1.1 misho 558: {
559: state_t state = conv->ostate;
560: SPLIT_STATE;
561: (void)state2;
562: (void)state3;
563: (void)state4;
564: if (state1 != STATE_ASCII) {
565: if (n < 1)
566: return RET_TOOSMALL;
567: r[0] = SI;
568: /* conv->ostate = 0; will be done by the caller */
569: return 1;
570: } else
571: return 0;
572: }
573:
574: #undef COMBINE_STATE
575: #undef SPLIT_STATE
576: #undef STATE4_DESIGNATED_CNS11643_7
577: #undef STATE4_DESIGNATED_CNS11643_6
578: #undef STATE4_DESIGNATED_CNS11643_5
579: #undef STATE4_DESIGNATED_CNS11643_4
580: #undef STATE4_DESIGNATED_CNS11643_3
581: #undef STATE4_NONE
582: #undef STATE3_DESIGNATED_CNS11643_2
583: #undef STATE3_NONE
584: #undef STATE2_DESIGNATED_ISO_IR_165
585: #undef STATE2_DESIGNATED_CNS11643_1
586: #undef STATE2_DESIGNATED_GB2312
587: #undef STATE2_NONE
588: #undef STATE_TWOBYTE
589: #undef STATE_ASCII
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>