Annotation of embedaddon/libiconv/lib/iso2022_cn.h, revision 1.1.1.2
1.1 misho 1: /*
1.1.1.2 ! misho 2: * Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc.
1.1 misho 3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.2 ! misho 17: * If not, see <https://www.gnu.org/licenses/>.
1.1 misho 18: */
19:
20: /*
21: * ISO-2022-CN
22: */
23:
24: /* Specification: RFC 1922 */
25:
26: #define ESC 0x1b
27: #define SO 0x0e
28: #define SI 0x0f
29:
30: /*
31: * The state is composed of one of the following values
32: */
33: #define STATE_ASCII 0
34: #define STATE_TWOBYTE 1
35: /*
36: * and one of the following values, << 8
37: */
38: #define STATE2_NONE 0
39: #define STATE2_DESIGNATED_GB2312 1
40: #define STATE2_DESIGNATED_CNS11643_1 2
41: /*
42: * and one of the following values, << 16
43: */
44: #define STATE3_NONE 0
45: #define STATE3_DESIGNATED_CNS11643_2 1
46:
47: #define SPLIT_STATE \
48: unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16
49: #define COMBINE_STATE \
50: state = (state3 << 16) | (state2 << 8) | state1
51:
52: static int
1.1.1.2 ! misho 53: iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
1.1 misho 54: {
55: state_t state = conv->istate;
56: SPLIT_STATE;
57: int count = 0;
58: unsigned char c;
59: for (;;) {
60: c = *s;
61: if (c == ESC) {
62: if (n < count+4)
63: goto none;
64: if (s[1] == '$') {
65: if (s[2] == ')') {
66: if (s[3] == 'A') {
67: state2 = STATE2_DESIGNATED_GB2312;
68: s += 4; count += 4;
69: if (n < count+1)
70: goto none;
71: continue;
72: }
73: if (s[3] == 'G') {
74: state2 = STATE2_DESIGNATED_CNS11643_1;
75: s += 4; count += 4;
76: if (n < count+1)
77: goto none;
78: continue;
79: }
80: }
81: if (s[2] == '*') {
82: if (s[3] == 'H') {
83: state3 = STATE3_DESIGNATED_CNS11643_2;
84: s += 4; count += 4;
85: if (n < count+1)
86: goto none;
87: continue;
88: }
89: }
90: }
91: if (s[1] == 'N') {
92: switch (state3) {
93: case STATE3_NONE:
94: goto ilseq;
95: case STATE3_DESIGNATED_CNS11643_2:
96: if (s[2] < 0x80 && s[3] < 0x80) {
97: int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
98: if (ret == RET_ILSEQ)
99: goto ilseq;
100: if (ret != 2) abort();
101: COMBINE_STATE;
102: conv->istate = state;
103: return count+4;
104: } else
105: goto ilseq;
106: default: abort();
107: }
108: }
109: goto ilseq;
110: }
111: if (c == SO) {
112: if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1)
113: goto ilseq;
114: state1 = STATE_TWOBYTE;
115: s++; count++;
116: if (n < count+1)
117: goto none;
118: continue;
119: }
120: if (c == SI) {
121: state1 = STATE_ASCII;
122: s++; count++;
123: if (n < count+1)
124: goto none;
125: continue;
126: }
127: break;
128: }
129: switch (state1) {
130: case STATE_ASCII:
131: if (c < 0x80) {
132: int ret = ascii_mbtowc(conv,pwc,s,1);
133: if (ret == RET_ILSEQ)
134: goto ilseq;
135: if (ret != 1) abort();
136: if (*pwc == 0x000a || *pwc == 0x000d) {
137: state2 = STATE2_NONE; state3 = STATE3_NONE;
138: }
139: COMBINE_STATE;
140: conv->istate = state;
141: return count+1;
142: } else
143: goto ilseq;
144: case STATE_TWOBYTE:
145: if (n < count+2)
146: goto none;
147: if (s[0] < 0x80 && s[1] < 0x80) {
148: int ret;
149: switch (state2) {
150: case STATE2_NONE:
151: goto ilseq;
152: case STATE2_DESIGNATED_GB2312:
153: ret = gb2312_mbtowc(conv,pwc,s,2); break;
154: case STATE2_DESIGNATED_CNS11643_1:
155: ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
156: default: abort();
157: }
158: if (ret == RET_ILSEQ)
159: goto ilseq;
160: if (ret != 2) abort();
161: COMBINE_STATE;
162: conv->istate = state;
163: return count+2;
164: } else
165: goto ilseq;
166: default: abort();
167: }
168:
169: none:
170: COMBINE_STATE;
171: conv->istate = state;
172: return RET_TOOFEW(count);
173:
174: ilseq:
175: COMBINE_STATE;
176: conv->istate = state;
177: return RET_SHIFT_ILSEQ(count);
178: }
179:
180: static int
1.1.1.2 ! misho 181: iso2022_cn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
1.1 misho 182: {
183: state_t state = conv->ostate;
184: SPLIT_STATE;
185: unsigned char buf[3];
186: int ret;
187:
188: /* There is no need to handle Unicode 3.1 tag characters and to look for
189: "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
190:
191: /* Try ASCII. */
192: ret = ascii_wctomb(conv,buf,wc,1);
193: if (ret != RET_ILUNI) {
194: if (ret != 1) abort();
195: if (buf[0] < 0x80) {
196: int count = (state1 == STATE_ASCII ? 1 : 2);
197: if (n < count)
198: return RET_TOOSMALL;
199: if (state1 != STATE_ASCII) {
200: r[0] = SI;
201: r += 1;
202: state1 = STATE_ASCII;
203: }
204: r[0] = buf[0];
205: if (wc == 0x000a || wc == 0x000d) {
206: state2 = STATE2_NONE; state3 = STATE3_NONE;
207: }
208: COMBINE_STATE;
209: conv->ostate = state;
210: return count;
211: }
212: }
213:
214: /* Try GB 2312-1980. */
215: ret = gb2312_wctomb(conv,buf,wc,2);
216: if (ret != RET_ILUNI) {
217: if (ret != 2) abort();
218: if (buf[0] < 0x80 && buf[1] < 0x80) {
219: int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
220: if (n < count)
221: return RET_TOOSMALL;
222: if (state2 != STATE2_DESIGNATED_GB2312) {
223: r[0] = ESC;
224: r[1] = '$';
225: r[2] = ')';
226: r[3] = 'A';
227: r += 4;
228: state2 = STATE2_DESIGNATED_GB2312;
229: }
230: if (state1 != STATE_TWOBYTE) {
231: r[0] = SO;
232: r += 1;
233: state1 = STATE_TWOBYTE;
234: }
235: r[0] = buf[0];
236: r[1] = buf[1];
237: COMBINE_STATE;
238: conv->ostate = state;
239: return count;
240: }
241: }
242:
243: ret = cns11643_wctomb(conv,buf,wc,3);
244: if (ret != RET_ILUNI) {
245: if (ret != 3) abort();
246:
247: /* Try CNS 11643-1992 Plane 1. */
248: if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
249: int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
250: if (n < count)
251: return RET_TOOSMALL;
252: if (state2 != STATE2_DESIGNATED_CNS11643_1) {
253: r[0] = ESC;
254: r[1] = '$';
255: r[2] = ')';
256: r[3] = 'G';
257: r += 4;
258: state2 = STATE2_DESIGNATED_CNS11643_1;
259: }
260: if (state1 != STATE_TWOBYTE) {
261: r[0] = SO;
262: r += 1;
263: state1 = STATE_TWOBYTE;
264: }
265: r[0] = buf[1];
266: r[1] = buf[2];
267: COMBINE_STATE;
268: conv->ostate = state;
269: return count;
270: }
271:
272: /* Try CNS 11643-1992 Plane 2. */
273: if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
274: int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
275: if (n < count)
276: return RET_TOOSMALL;
277: if (state3 != STATE3_DESIGNATED_CNS11643_2) {
278: r[0] = ESC;
279: r[1] = '$';
280: r[2] = '*';
281: r[3] = 'H';
282: r += 4;
283: state3 = STATE3_DESIGNATED_CNS11643_2;
284: }
285: r[0] = ESC;
286: r[1] = 'N';
287: r[2] = buf[1];
288: r[3] = buf[2];
289: COMBINE_STATE;
290: conv->ostate = state;
291: return count;
292: }
293: }
294:
295: return RET_ILUNI;
296: }
297:
298: static int
1.1.1.2 ! misho 299: iso2022_cn_reset (conv_t conv, unsigned char *r, size_t n)
1.1 misho 300: {
301: state_t state = conv->ostate;
302: SPLIT_STATE;
303: (void)state2;
304: (void)state3;
305: if (state1 != STATE_ASCII) {
306: if (n < 1)
307: return RET_TOOSMALL;
308: r[0] = SI;
309: /* conv->ostate = 0; will be done by the caller */
310: return 1;
311: } else
312: return 0;
313: }
314:
315: #undef COMBINE_STATE
316: #undef SPLIT_STATE
317: #undef STATE3_DESIGNATED_CNS11643_2
318: #undef STATE3_NONE
319: #undef STATE2_DESIGNATED_CNS11643_1
320: #undef STATE2_DESIGNATED_GB2312
321: #undef STATE2_NONE
322: #undef STATE_TWOBYTE
323: #undef STATE_ASCII
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>