Annotation of embedaddon/libiconv/lib/iso2022_jp3.h, revision 1.1.1.2
1.1 misho 1: /*
1.1.1.2 ! misho 2: * Copyright (C) 1999-2004, 2008, 2016 Free Software Foundation, Inc.
1.1 misho 3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.2 ! misho 17: * If not, see <https://www.gnu.org/licenses/>.
1.1 misho 18: */
19:
20: /*
21: * ISO-2022-JP-3
22: */
23:
24: #include "jisx0213.h"
25:
26: #define ESC 0x1b
27:
28: /*
29: * The state is composed of one of the following values
30: */
31: #define STATE_ASCII 0 /* Esc ( B */
32: #define STATE_JISX0201ROMAN 1 /* Esc ( J */
33: #define STATE_JISX0201KATAKANA 2 /* Esc ( I */
34: #define STATE_JISX0208 3 /* Esc $ @ or Esc $ B */
35: #define STATE_JISX02131 4 /* Esc $ ( O or Esc $ ( Q*/
36: #define STATE_JISX02132 5 /* Esc $ ( P */
37:
38: /*
39: * In the ISO-2022-JP-3 to UCS-4 direction, the state also holds the last
40: * character to be output, shifted by 3 bits.
41: */
42:
43: static int
1.1.1.2 ! misho 44: iso2022_jp3_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
1.1 misho 45: {
46: ucs4_t last_wc = conv->istate >> 3;
47: if (last_wc) {
48: /* Output the buffered character. */
49: conv->istate &= 7;
50: *pwc = last_wc;
51: return 0; /* Don't advance the input pointer. */
52: } else {
53: state_t state = conv->istate;
54: int count = 0;
55: unsigned char c;
56: for (;;) {
57: c = *s;
58: if (c == ESC) {
59: if (n < count+3)
60: goto none;
61: if (s[1] == '(') {
62: if (s[2] == 'B') {
63: state = STATE_ASCII;
64: s += 3; count += 3;
65: if (n < count+1)
66: goto none;
67: continue;
68: }
69: if (s[2] == 'J') {
70: state = STATE_JISX0201ROMAN;
71: s += 3; count += 3;
72: if (n < count+1)
73: goto none;
74: continue;
75: }
76: if (s[2] == 'I') {
77: state = STATE_JISX0201KATAKANA;
78: s += 3; count += 3;
79: if (n < count+1)
80: goto none;
81: continue;
82: }
83: goto ilseq;
84: }
85: if (s[1] == '$') {
86: if (s[2] == '@' || s[2] == 'B') {
87: /* We don't distinguish JIS X 0208-1978 and JIS X 0208-1983. */
88: state = STATE_JISX0208;
89: s += 3; count += 3;
90: if (n < count+1)
91: goto none;
92: continue;
93: }
94: if (s[2] == '(') {
95: if (n < count+4)
96: goto none;
97: if (s[3] == 'O' || s[3] == 'Q') {
98: state = STATE_JISX02131;
99: s += 4; count += 4;
100: if (n < count+1)
101: goto none;
102: continue;
103: }
104: if (s[3] == 'P') {
105: state = STATE_JISX02132;
106: s += 4; count += 4;
107: if (n < count+1)
108: goto none;
109: continue;
110: }
111: }
112: goto ilseq;
113: }
114: goto ilseq;
115: }
116: break;
117: }
118: switch (state) {
119: case STATE_ASCII:
120: if (c < 0x80) {
121: int ret = ascii_mbtowc(conv,pwc,s,1);
122: if (ret == RET_ILSEQ)
123: goto ilseq;
124: if (ret != 1) abort();
125: conv->istate = state;
126: return count+1;
127: } else
128: goto ilseq;
129: case STATE_JISX0201ROMAN:
130: if (c < 0x80) {
131: int ret = jisx0201_mbtowc(conv,pwc,s,1);
132: if (ret == RET_ILSEQ)
133: goto ilseq;
134: if (ret != 1) abort();
135: conv->istate = state;
136: return count+1;
137: } else
138: goto ilseq;
139: case STATE_JISX0201KATAKANA:
140: if (c < 0x80) {
141: unsigned char buf = c+0x80;
142: int ret = jisx0201_mbtowc(conv,pwc,&buf,1);
143: if (ret == RET_ILSEQ)
144: goto ilseq;
145: if (ret != 1) abort();
146: conv->istate = state;
147: return count+1;
148: } else
149: goto ilseq;
150: case STATE_JISX0208:
151: if (n < count+2)
152: goto none;
153: if (s[0] < 0x80 && s[1] < 0x80) {
154: int ret = jisx0208_mbtowc(conv,pwc,s,2);
155: if (ret == RET_ILSEQ)
156: goto ilseq;
157: if (ret != 2) abort();
158: conv->istate = state;
159: return count+2;
160: } else
161: goto ilseq;
162: case STATE_JISX02131:
163: case STATE_JISX02132:
164: if (n < count+2)
165: goto none;
166: if (s[0] < 0x80 && s[1] < 0x80) {
167: ucs4_t wc = jisx0213_to_ucs4(((state-STATE_JISX02131+1)<<8)+s[0],s[1]);
168: if (wc) {
169: if (wc < 0x80) {
170: /* It's a combining character. */
171: ucs4_t wc1 = jisx0213_to_ucs_combining[wc - 1][0];
172: ucs4_t wc2 = jisx0213_to_ucs_combining[wc - 1][1];
173: /* We cannot output two Unicode characters at once. So,
174: output the first character and buffer the second one. */
175: *pwc = wc1;
176: conv->istate = (wc2 << 3) | state;
177: } else {
178: *pwc = wc;
179: conv->istate = state;
180: }
181: return count+2;
182: }
183: }
184: goto ilseq;
185: default: abort();
186: }
187: none:
188: conv->istate = state;
189: return RET_TOOFEW(count);
190:
191: ilseq:
192: conv->istate = state;
193: return RET_SHIFT_ILSEQ(count);
194: }
195: }
196:
197: static int
198: iso2022_jp3_flushwc (conv_t conv, ucs4_t *pwc)
199: {
200: ucs4_t last_wc = conv->istate >> 3;
201: if (last_wc) {
202: /* Output the buffered character. */
203: conv->istate &= 7;
204: *pwc = last_wc;
205: return 1;
206: } else
207: return 0;
208: }
209:
210: /*
211: * In the UCS-4 to ISO-2022-JP-3 direction, the state also holds the last two
212: * bytes to be output, shifted by 3 bits, and the STATE_xxxxx value that was
213: * effective before this buffered character, shifted by 19 bits.
214: */
215:
216: /* Composition tables for each of the relevant combining characters. */
217: static const struct { unsigned short base; unsigned short composed; } iso2022_jp3_comp_table_data[] = {
218: #define iso2022_jp3_comp_table02e5_idx 0
219: #define iso2022_jp3_comp_table02e5_len 1
220: { 0x2b64, 0x2b65 }, /* 0x12B65 = 0x12B64 U+02E5 */
221: #define iso2022_jp3_comp_table02e9_idx (iso2022_jp3_comp_table02e5_idx+iso2022_jp3_comp_table02e5_len)
222: #define iso2022_jp3_comp_table02e9_len 1
223: { 0x2b60, 0x2b66 }, /* 0x12B66 = 0x12B60 U+02E9 */
224: #define iso2022_jp3_comp_table0300_idx (iso2022_jp3_comp_table02e9_idx+iso2022_jp3_comp_table02e9_len)
225: #define iso2022_jp3_comp_table0300_len 5
226: { 0x295c, 0x2b44 }, /* 0x12B44 = 0x1295C U+0300 */
227: { 0x2b38, 0x2b48 }, /* 0x12B48 = 0x12B38 U+0300 */
228: { 0x2b37, 0x2b4a }, /* 0x12B4A = 0x12B37 U+0300 */
229: { 0x2b30, 0x2b4c }, /* 0x12B4C = 0x12B30 U+0300 */
230: { 0x2b43, 0x2b4e }, /* 0x12B4E = 0x12B43 U+0300 */
231: #define iso2022_jp3_comp_table0301_idx (iso2022_jp3_comp_table0300_idx+iso2022_jp3_comp_table0300_len)
232: #define iso2022_jp3_comp_table0301_len 4
233: { 0x2b38, 0x2b49 }, /* 0x12B49 = 0x12B38 U+0301 */
234: { 0x2b37, 0x2b4b }, /* 0x12B4B = 0x12B37 U+0301 */
235: { 0x2b30, 0x2b4d }, /* 0x12B4D = 0x12B30 U+0301 */
236: { 0x2b43, 0x2b4f }, /* 0x12B4F = 0x12B43 U+0301 */
237: #define iso2022_jp3_comp_table309a_idx (iso2022_jp3_comp_table0301_idx+iso2022_jp3_comp_table0301_len)
238: #define iso2022_jp3_comp_table309a_len 14
239: { 0x242b, 0x2477 }, /* 0x12477 = 0x1242B U+309A */
240: { 0x242d, 0x2478 }, /* 0x12478 = 0x1242D U+309A */
241: { 0x242f, 0x2479 }, /* 0x12479 = 0x1242F U+309A */
242: { 0x2431, 0x247a }, /* 0x1247A = 0x12431 U+309A */
243: { 0x2433, 0x247b }, /* 0x1247B = 0x12433 U+309A */
244: { 0x252b, 0x2577 }, /* 0x12577 = 0x1252B U+309A */
245: { 0x252d, 0x2578 }, /* 0x12578 = 0x1252D U+309A */
246: { 0x252f, 0x2579 }, /* 0x12579 = 0x1252F U+309A */
247: { 0x2531, 0x257a }, /* 0x1257A = 0x12531 U+309A */
248: { 0x2533, 0x257b }, /* 0x1257B = 0x12533 U+309A */
249: { 0x253b, 0x257c }, /* 0x1257C = 0x1253B U+309A */
250: { 0x2544, 0x257d }, /* 0x1257D = 0x12544 U+309A */
251: { 0x2548, 0x257e }, /* 0x1257E = 0x12548 U+309A */
252: { 0x2675, 0x2678 }, /* 0x12678 = 0x12675 U+309A */
253: };
254:
255: #define SPLIT_STATE \
256: unsigned short lasttwo = state >> 3; state_t prevstate = state >> 19; state &= 7
257: #define COMBINE_STATE \
258: state |= (prevstate << 19) | (lasttwo << 3)
259: #define COMBINE_STATE_NO_LASTTWO \
260: /* assume lasttwo == 0, then prevstate is ignored */
261:
262: static int
1.1.1.2 ! misho 263: iso2022_jp3_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
1.1 misho 264: {
265: int count = 0;
266: unsigned char buf[2];
267: unsigned short jch;
268: int ret;
269: state_t state = conv->ostate;
270: SPLIT_STATE;
271:
272: if (lasttwo) {
273: /* Attempt to combine the last character with this one. */
274: unsigned int idx;
275: unsigned int len;
276:
277: if (wc == 0x02e5)
278: idx = iso2022_jp3_comp_table02e5_idx,
279: len = iso2022_jp3_comp_table02e5_len;
280: else if (wc == 0x02e9)
281: idx = iso2022_jp3_comp_table02e9_idx,
282: len = iso2022_jp3_comp_table02e9_len;
283: else if (wc == 0x0300)
284: idx = iso2022_jp3_comp_table0300_idx,
285: len = iso2022_jp3_comp_table0300_len;
286: else if (wc == 0x0301)
287: idx = iso2022_jp3_comp_table0301_idx,
288: len = iso2022_jp3_comp_table0301_len;
289: else if (wc == 0x309a)
290: idx = iso2022_jp3_comp_table309a_idx,
291: len = iso2022_jp3_comp_table309a_len;
292: else
293: goto not_combining;
294:
295: do
296: if (iso2022_jp3_comp_table_data[idx].base == lasttwo)
297: break;
298: while (++idx, --len > 0);
299:
300: if (len > 0) {
301: /* Output the combined character. */
302: /* We know the combined character is in JISX0213 plane 1, but
303: the buffered character may have been in JISX0208 or in
304: JISX0213 plane 1. */
305: count = (state != STATE_JISX02131 ? 4 : 0) + 2;
306: if (n < count)
307: return RET_TOOSMALL;
308: if (state != STATE_JISX02131) {
309: r[0] = ESC;
310: r[1] = '$';
311: r[2] = '(';
312: r[3] = 'Q';
313: r += 4;
314: state = STATE_JISX02131;
315: }
316: lasttwo = iso2022_jp3_comp_table_data[idx].composed;
317: r[0] = (lasttwo >> 8) & 0xff;
318: r[1] = lasttwo & 0xff;
319: COMBINE_STATE_NO_LASTTWO;
320: conv->ostate = state;
321: return count;
322: }
323:
324: not_combining:
325: /* Output the buffered character. */
326: /* We know it is in JISX0208 or in JISX0213 plane 1. */
327: count = (prevstate != state ? 3 : 0) + 2;
328: if (n < count)
329: return RET_TOOSMALL;
330: if (prevstate != state) {
331: if (state != STATE_JISX0208) abort();
332: r[0] = ESC;
333: r[1] = '$';
334: r[2] = 'B';
335: r += 3;
336: }
337: r[0] = (lasttwo >> 8) & 0xff;
338: r[1] = lasttwo & 0xff;
339: r += 2;
340: }
341:
342: /* Try ASCII. */
343: ret = ascii_wctomb(conv,buf,wc,1);
344: if (ret != RET_ILUNI) {
345: if (ret != 1) abort();
346: if (buf[0] < 0x80) {
347: count += (state == STATE_ASCII ? 1 : 4);
348: if (n < count)
349: return RET_TOOSMALL;
350: if (state != STATE_ASCII) {
351: r[0] = ESC;
352: r[1] = '(';
353: r[2] = 'B';
354: r += 3;
355: state = STATE_ASCII;
356: }
357: r[0] = buf[0];
358: COMBINE_STATE_NO_LASTTWO;
359: conv->ostate = state;
360: return count;
361: }
362: }
363:
364: /* Try JIS X 0201-1976 Roman. */
365: ret = jisx0201_wctomb(conv,buf,wc,1);
366: if (ret != RET_ILUNI) {
367: if (ret != 1) abort();
368: if (buf[0] < 0x80) {
369: count += (state == STATE_JISX0201ROMAN ? 1 : 4);
370: if (n < count)
371: return RET_TOOSMALL;
372: if (state != STATE_JISX0201ROMAN) {
373: r[0] = ESC;
374: r[1] = '(';
375: r[2] = 'J';
376: r += 3;
377: state = STATE_JISX0201ROMAN;
378: }
379: r[0] = buf[0];
380: COMBINE_STATE_NO_LASTTWO;
381: conv->ostate = state;
382: return count;
383: }
384: }
385:
386: jch = ucs4_to_jisx0213(wc);
387:
388: /* Try JIS X 0208-1990 in place of JIS X 0208-1978 and JIS X 0208-1983. */
389: ret = jisx0208_wctomb(conv,buf,wc,2);
390: if (ret != RET_ILUNI) {
391: if (ret != 2) abort();
392: if (buf[0] < 0x80 && buf[1] < 0x80) {
393: if (jch & 0x0080) {
394: /* A possible match in comp_table_data. Buffer it. */
395: prevstate = state;
396: lasttwo = jch & 0x7f7f;
397: state = STATE_JISX0208;
398: COMBINE_STATE;
399: conv->ostate = state;
400: return count;
401: } else {
402: count += (state == STATE_JISX0208 ? 2 : 5);
403: if (n < count)
404: return RET_TOOSMALL;
405: if (state != STATE_JISX0208) {
406: r[0] = ESC;
407: r[1] = '$';
408: r[2] = 'B';
409: r += 3;
410: state = STATE_JISX0208;
411: }
412: r[0] = buf[0];
413: r[1] = buf[1];
414: COMBINE_STATE_NO_LASTTWO;
415: conv->ostate = state;
416: return count;
417: }
418: }
419: }
420:
421: /* Try JISX 0213 plane 1 and JISX 0213 plane 2. */
422: if (jch != 0) {
423: if (jch & 0x8000) {
424: /* JISX 0213 plane 2. */
425: if (state != STATE_JISX02132) {
426: count += 4;
427: if (n < count)
428: return RET_TOOSMALL;
429: r[0] = ESC;
430: r[1] = '$';
431: r[2] = '(';
432: r[3] = 'P';
433: r += 4;
434: state = STATE_JISX02132;
435: }
436: } else {
437: /* JISX 0213 plane 1. */
438: if (state != STATE_JISX02131) {
439: count += 4;
440: if (n < count)
441: return RET_TOOSMALL;
442: r[0] = ESC;
443: r[1] = '$';
444: r[2] = '(';
445: r[3] = 'Q';
446: r += 4;
447: state = STATE_JISX02131;
448: }
449: }
450: if (jch & 0x0080) {
451: /* A possible match in comp_table_data. We have to buffer it. */
452: /* We know it's a JISX 0213 plane 1 character. */
453: if (jch & 0x8000) abort();
454: prevstate = state;
455: lasttwo = jch & 0x7f7f;
456: COMBINE_STATE;
457: conv->ostate = state;
458: return count;
459: }
460: count += 2;
461: if (n < count)
462: return RET_TOOSMALL;
463: r[0] = (jch >> 8) & 0x7f;
464: r[1] = jch & 0x7f;
465: COMBINE_STATE_NO_LASTTWO;
466: conv->ostate = state;
467: return count;
468: }
469:
470: /* Try JIS X 0201-1976 Katakana. This is not officially part of
471: ISO-2022-JP-3. Therefore we try it after all other attempts. */
472: ret = jisx0201_wctomb(conv,buf,wc,1);
473: if (ret != RET_ILUNI) {
474: if (ret != 1) abort();
475: if (buf[0] >= 0x80) {
476: count += (state == STATE_JISX0201KATAKANA ? 1 : 4);
477: if (n < count)
478: return RET_TOOSMALL;
479: if (state != STATE_JISX0201KATAKANA) {
480: r[0] = ESC;
481: r[1] = '(';
482: r[2] = 'I';
483: r += 3;
484: state = STATE_JISX0201KATAKANA;
485: }
486: r[0] = buf[0]-0x80;
487: COMBINE_STATE_NO_LASTTWO;
488: conv->ostate = state;
489: return count;
490: }
491: }
492:
493: return RET_ILUNI;
494: }
495:
496: static int
1.1.1.2 ! misho 497: iso2022_jp3_reset (conv_t conv, unsigned char *r, size_t n)
1.1 misho 498: {
499: state_t state = conv->ostate;
500: SPLIT_STATE;
501: {
502: int count =
503: (lasttwo ? (prevstate != state ? 3 : 0) + 2 : 0)
504: + (state != STATE_ASCII ? 3 : 0);
505: if (n < count)
506: return RET_TOOSMALL;
507: if (lasttwo) {
508: if (prevstate != state) {
509: if (state != STATE_JISX0208) abort();
510: r[0] = ESC;
511: r[1] = '$';
512: r[2] = 'B';
513: r += 3;
514: }
515: r[0] = (lasttwo >> 8) & 0xff;
516: r[1] = lasttwo & 0xff;
517: r += 2;
518: }
519: if (state != STATE_ASCII) {
520: r[0] = ESC;
521: r[1] = '(';
522: r[2] = 'B';
523: }
524: /* conv->ostate = 0; will be done by the caller */
525: return count;
526: }
527: }
528:
529: #undef COMBINE_STATE_NO_LASTTWO
530: #undef COMBINE_STATE
531: #undef SPLIT_STATE
532: #undef STATE_JISX02132
533: #undef STATE_JISX02131
534: #undef STATE_JISX0208
535: #undef STATE_JISX0201KATAKANA
536: #undef STATE_JISX0201ROMAN
537: #undef STATE_ASCII
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>