Annotation of embedaddon/libiconv/lib/iso2022_cn.h, revision 1.1.1.1
1.1 misho 1: /*
2: * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18: * Fifth Floor, Boston, MA 02110-1301, USA.
19: */
20:
21: /*
22: * ISO-2022-CN
23: */
24:
25: /* Specification: RFC 1922 */
26:
27: #define ESC 0x1b
28: #define SO 0x0e
29: #define SI 0x0f
30:
31: /*
32: * The state is composed of one of the following values
33: */
34: #define STATE_ASCII 0
35: #define STATE_TWOBYTE 1
36: /*
37: * and one of the following values, << 8
38: */
39: #define STATE2_NONE 0
40: #define STATE2_DESIGNATED_GB2312 1
41: #define STATE2_DESIGNATED_CNS11643_1 2
42: /*
43: * and one of the following values, << 16
44: */
45: #define STATE3_NONE 0
46: #define STATE3_DESIGNATED_CNS11643_2 1
47:
48: #define SPLIT_STATE \
49: unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16
50: #define COMBINE_STATE \
51: state = (state3 << 16) | (state2 << 8) | state1
52:
53: static int
54: iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
55: {
56: state_t state = conv->istate;
57: SPLIT_STATE;
58: int count = 0;
59: unsigned char c;
60: for (;;) {
61: c = *s;
62: if (c == ESC) {
63: if (n < count+4)
64: goto none;
65: if (s[1] == '$') {
66: if (s[2] == ')') {
67: if (s[3] == 'A') {
68: state2 = STATE2_DESIGNATED_GB2312;
69: s += 4; count += 4;
70: if (n < count+1)
71: goto none;
72: continue;
73: }
74: if (s[3] == 'G') {
75: state2 = STATE2_DESIGNATED_CNS11643_1;
76: s += 4; count += 4;
77: if (n < count+1)
78: goto none;
79: continue;
80: }
81: }
82: if (s[2] == '*') {
83: if (s[3] == 'H') {
84: state3 = STATE3_DESIGNATED_CNS11643_2;
85: s += 4; count += 4;
86: if (n < count+1)
87: goto none;
88: continue;
89: }
90: }
91: }
92: if (s[1] == 'N') {
93: switch (state3) {
94: case STATE3_NONE:
95: goto ilseq;
96: case STATE3_DESIGNATED_CNS11643_2:
97: if (s[2] < 0x80 && s[3] < 0x80) {
98: int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
99: if (ret == RET_ILSEQ)
100: goto ilseq;
101: if (ret != 2) abort();
102: COMBINE_STATE;
103: conv->istate = state;
104: return count+4;
105: } else
106: goto ilseq;
107: default: abort();
108: }
109: }
110: goto ilseq;
111: }
112: if (c == SO) {
113: if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1)
114: goto ilseq;
115: state1 = STATE_TWOBYTE;
116: s++; count++;
117: if (n < count+1)
118: goto none;
119: continue;
120: }
121: if (c == SI) {
122: state1 = STATE_ASCII;
123: s++; count++;
124: if (n < count+1)
125: goto none;
126: continue;
127: }
128: break;
129: }
130: switch (state1) {
131: case STATE_ASCII:
132: if (c < 0x80) {
133: int ret = ascii_mbtowc(conv,pwc,s,1);
134: if (ret == RET_ILSEQ)
135: goto ilseq;
136: if (ret != 1) abort();
137: if (*pwc == 0x000a || *pwc == 0x000d) {
138: state2 = STATE2_NONE; state3 = STATE3_NONE;
139: }
140: COMBINE_STATE;
141: conv->istate = state;
142: return count+1;
143: } else
144: goto ilseq;
145: case STATE_TWOBYTE:
146: if (n < count+2)
147: goto none;
148: if (s[0] < 0x80 && s[1] < 0x80) {
149: int ret;
150: switch (state2) {
151: case STATE2_NONE:
152: goto ilseq;
153: case STATE2_DESIGNATED_GB2312:
154: ret = gb2312_mbtowc(conv,pwc,s,2); break;
155: case STATE2_DESIGNATED_CNS11643_1:
156: ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
157: default: abort();
158: }
159: if (ret == RET_ILSEQ)
160: goto ilseq;
161: if (ret != 2) abort();
162: COMBINE_STATE;
163: conv->istate = state;
164: return count+2;
165: } else
166: goto ilseq;
167: default: abort();
168: }
169:
170: none:
171: COMBINE_STATE;
172: conv->istate = state;
173: return RET_TOOFEW(count);
174:
175: ilseq:
176: COMBINE_STATE;
177: conv->istate = state;
178: return RET_SHIFT_ILSEQ(count);
179: }
180:
181: static int
182: iso2022_cn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
183: {
184: state_t state = conv->ostate;
185: SPLIT_STATE;
186: unsigned char buf[3];
187: int ret;
188:
189: /* There is no need to handle Unicode 3.1 tag characters and to look for
190: "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
191:
192: /* Try ASCII. */
193: ret = ascii_wctomb(conv,buf,wc,1);
194: if (ret != RET_ILUNI) {
195: if (ret != 1) abort();
196: if (buf[0] < 0x80) {
197: int count = (state1 == STATE_ASCII ? 1 : 2);
198: if (n < count)
199: return RET_TOOSMALL;
200: if (state1 != STATE_ASCII) {
201: r[0] = SI;
202: r += 1;
203: state1 = STATE_ASCII;
204: }
205: r[0] = buf[0];
206: if (wc == 0x000a || wc == 0x000d) {
207: state2 = STATE2_NONE; state3 = STATE3_NONE;
208: }
209: COMBINE_STATE;
210: conv->ostate = state;
211: return count;
212: }
213: }
214:
215: /* Try GB 2312-1980. */
216: ret = gb2312_wctomb(conv,buf,wc,2);
217: if (ret != RET_ILUNI) {
218: if (ret != 2) abort();
219: if (buf[0] < 0x80 && buf[1] < 0x80) {
220: int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
221: if (n < count)
222: return RET_TOOSMALL;
223: if (state2 != STATE2_DESIGNATED_GB2312) {
224: r[0] = ESC;
225: r[1] = '$';
226: r[2] = ')';
227: r[3] = 'A';
228: r += 4;
229: state2 = STATE2_DESIGNATED_GB2312;
230: }
231: if (state1 != STATE_TWOBYTE) {
232: r[0] = SO;
233: r += 1;
234: state1 = STATE_TWOBYTE;
235: }
236: r[0] = buf[0];
237: r[1] = buf[1];
238: COMBINE_STATE;
239: conv->ostate = state;
240: return count;
241: }
242: }
243:
244: ret = cns11643_wctomb(conv,buf,wc,3);
245: if (ret != RET_ILUNI) {
246: if (ret != 3) abort();
247:
248: /* Try CNS 11643-1992 Plane 1. */
249: if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
250: int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
251: if (n < count)
252: return RET_TOOSMALL;
253: if (state2 != STATE2_DESIGNATED_CNS11643_1) {
254: r[0] = ESC;
255: r[1] = '$';
256: r[2] = ')';
257: r[3] = 'G';
258: r += 4;
259: state2 = STATE2_DESIGNATED_CNS11643_1;
260: }
261: if (state1 != STATE_TWOBYTE) {
262: r[0] = SO;
263: r += 1;
264: state1 = STATE_TWOBYTE;
265: }
266: r[0] = buf[1];
267: r[1] = buf[2];
268: COMBINE_STATE;
269: conv->ostate = state;
270: return count;
271: }
272:
273: /* Try CNS 11643-1992 Plane 2. */
274: if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
275: int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
276: if (n < count)
277: return RET_TOOSMALL;
278: if (state3 != STATE3_DESIGNATED_CNS11643_2) {
279: r[0] = ESC;
280: r[1] = '$';
281: r[2] = '*';
282: r[3] = 'H';
283: r += 4;
284: state3 = STATE3_DESIGNATED_CNS11643_2;
285: }
286: r[0] = ESC;
287: r[1] = 'N';
288: r[2] = buf[1];
289: r[3] = buf[2];
290: COMBINE_STATE;
291: conv->ostate = state;
292: return count;
293: }
294: }
295:
296: return RET_ILUNI;
297: }
298:
299: static int
300: iso2022_cn_reset (conv_t conv, unsigned char *r, int n)
301: {
302: state_t state = conv->ostate;
303: SPLIT_STATE;
304: (void)state2;
305: (void)state3;
306: if (state1 != STATE_ASCII) {
307: if (n < 1)
308: return RET_TOOSMALL;
309: r[0] = SI;
310: /* conv->ostate = 0; will be done by the caller */
311: return 1;
312: } else
313: return 0;
314: }
315:
316: #undef COMBINE_STATE
317: #undef SPLIT_STATE
318: #undef STATE3_DESIGNATED_CNS11643_2
319: #undef STATE3_NONE
320: #undef STATE2_DESIGNATED_CNS11643_1
321: #undef STATE2_DESIGNATED_GB2312
322: #undef STATE2_NONE
323: #undef STATE_TWOBYTE
324: #undef STATE_ASCII
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>