Annotation of embedaddon/libiconv/lib/euc_jisx0213.h, revision 1.1
1.1 ! misho 1: /*
! 2: * Copyright (C) 1999-2002 Free Software Foundation, Inc.
! 3: * This file is part of the GNU LIBICONV Library.
! 4: *
! 5: * The GNU LIBICONV Library is free software; you can redistribute it
! 6: * and/or modify it under the terms of the GNU Library General Public
! 7: * License as published by the Free Software Foundation; either version 2
! 8: * of the License, or (at your option) any later version.
! 9: *
! 10: * The GNU LIBICONV Library is distributed in the hope that it will be
! 11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
! 12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 13: * Library General Public License for more details.
! 14: *
! 15: * You should have received a copy of the GNU Library General Public
! 16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
! 17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
! 18: * Fifth Floor, Boston, MA 02110-1301, USA.
! 19: */
! 20:
! 21: /*
! 22: * EUC-JISX0213
! 23: */
! 24:
! 25: /* The structure of EUC-JISX0213 is as follows:
! 26:
! 27: 0x00..0x7F: ASCII
! 28:
! 29: 0x8E{A1..FE}: JISX0201 Katakana, with prefix 0x8E, offset by +0x80.
! 30:
! 31: 0x8F{A1..FE}{A1..FE}: JISX0213 plane 2, with prefix 0x8F, offset by +0x8080.
! 32:
! 33: 0x{A1..FE}{A1..FE}: JISX0213 plane 1, offset by +0x8080.
! 34:
! 35: Note that some JISX0213 characters are not contained in Unicode 3.2
! 36: and are therefore best represented as sequences of Unicode characters.
! 37: */
! 38:
! 39: #include "jisx0213.h"
! 40: #include "flushwc.h"
! 41:
! 42: static int
! 43: euc_jisx0213_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
! 44: {
! 45: ucs4_t last_wc = conv->istate;
! 46: if (last_wc) {
! 47: /* Output the buffered character. */
! 48: conv->istate = 0;
! 49: *pwc = last_wc;
! 50: return 0; /* Don't advance the input pointer. */
! 51: } else {
! 52: unsigned char c = *s;
! 53: if (c < 0x80) {
! 54: /* Plain ASCII character. */
! 55: *pwc = (ucs4_t) c;
! 56: return 1;
! 57: } else {
! 58: if ((c >= 0xa1 && c <= 0xfe) || c == 0x8e || c == 0x8f) {
! 59: /* Two or three byte character. */
! 60: if (n >= 2) {
! 61: unsigned char c2 = s[1];
! 62: if (c2 >= 0xa1 && c2 <= 0xfe) {
! 63: if (c == 0x8e) {
! 64: /* Half-width katakana. */
! 65: if (c2 <= 0xdf) {
! 66: *pwc = c2 + 0xfec0;
! 67: return 2;
! 68: }
! 69: } else {
! 70: ucs4_t wc;
! 71: if (c == 0x8f) {
! 72: /* JISX 0213 plane 2. */
! 73: if (n >= 3) {
! 74: unsigned char c3 = s[2];
! 75: wc = jisx0213_to_ucs4(0x200-0x80+c2,c3^0x80);
! 76: } else
! 77: return RET_TOOFEW(0);
! 78: } else {
! 79: /* JISX 0213 plane 1. */
! 80: wc = jisx0213_to_ucs4(0x100-0x80+c,c2^0x80);
! 81: }
! 82: if (wc) {
! 83: if (wc < 0x80) {
! 84: /* It's a combining character. */
! 85: ucs4_t wc1 = jisx0213_to_ucs_combining[wc - 1][0];
! 86: ucs4_t wc2 = jisx0213_to_ucs_combining[wc - 1][1];
! 87: /* We cannot output two Unicode characters at once. So,
! 88: output the first character and buffer the second one. */
! 89: *pwc = wc1;
! 90: conv->istate = wc2;
! 91: } else
! 92: *pwc = wc;
! 93: return (c == 0x8f ? 3 : 2);
! 94: }
! 95: }
! 96: }
! 97: } else
! 98: return RET_TOOFEW(0);
! 99: }
! 100: return RET_ILSEQ;
! 101: }
! 102: }
! 103: }
! 104:
! 105: #define euc_jisx0213_flushwc normal_flushwc
! 106:
! 107: /* Composition tables for each of the relevant combining characters. */
! 108: static const struct { unsigned short base; unsigned short composed; } euc_jisx0213_comp_table_data[] = {
! 109: #define euc_jisx0213_comp_table02e5_idx 0
! 110: #define euc_jisx0213_comp_table02e5_len 1
! 111: { 0xabe4, 0xabe5 }, /* 0x12B65 = 0x12B64 U+02E5 */
! 112: #define euc_jisx0213_comp_table02e9_idx (euc_jisx0213_comp_table02e5_idx+euc_jisx0213_comp_table02e5_len)
! 113: #define euc_jisx0213_comp_table02e9_len 1
! 114: { 0xabe0, 0xabe6 }, /* 0x12B66 = 0x12B60 U+02E9 */
! 115: #define euc_jisx0213_comp_table0300_idx (euc_jisx0213_comp_table02e9_idx+euc_jisx0213_comp_table02e9_len)
! 116: #define euc_jisx0213_comp_table0300_len 5
! 117: { 0xa9dc, 0xabc4 }, /* 0x12B44 = 0x1295C U+0300 */
! 118: { 0xabb8, 0xabc8 }, /* 0x12B48 = 0x12B38 U+0300 */
! 119: { 0xabb7, 0xabca }, /* 0x12B4A = 0x12B37 U+0300 */
! 120: { 0xabb0, 0xabcc }, /* 0x12B4C = 0x12B30 U+0300 */
! 121: { 0xabc3, 0xabce }, /* 0x12B4E = 0x12B43 U+0300 */
! 122: #define euc_jisx0213_comp_table0301_idx (euc_jisx0213_comp_table0300_idx+euc_jisx0213_comp_table0300_len)
! 123: #define euc_jisx0213_comp_table0301_len 4
! 124: { 0xabb8, 0xabc9 }, /* 0x12B49 = 0x12B38 U+0301 */
! 125: { 0xabb7, 0xabcb }, /* 0x12B4B = 0x12B37 U+0301 */
! 126: { 0xabb0, 0xabcd }, /* 0x12B4D = 0x12B30 U+0301 */
! 127: { 0xabc3, 0xabcf }, /* 0x12B4F = 0x12B43 U+0301 */
! 128: #define euc_jisx0213_comp_table309a_idx (euc_jisx0213_comp_table0301_idx+euc_jisx0213_comp_table0301_len)
! 129: #define euc_jisx0213_comp_table309a_len 14
! 130: { 0xa4ab, 0xa4f7 }, /* 0x12477 = 0x1242B U+309A */
! 131: { 0xa4ad, 0xa4f8 }, /* 0x12478 = 0x1242D U+309A */
! 132: { 0xa4af, 0xa4f9 }, /* 0x12479 = 0x1242F U+309A */
! 133: { 0xa4b1, 0xa4fa }, /* 0x1247A = 0x12431 U+309A */
! 134: { 0xa4b3, 0xa4fb }, /* 0x1247B = 0x12433 U+309A */
! 135: { 0xa5ab, 0xa5f7 }, /* 0x12577 = 0x1252B U+309A */
! 136: { 0xa5ad, 0xa5f8 }, /* 0x12578 = 0x1252D U+309A */
! 137: { 0xa5af, 0xa5f9 }, /* 0x12579 = 0x1252F U+309A */
! 138: { 0xa5b1, 0xa5fa }, /* 0x1257A = 0x12531 U+309A */
! 139: { 0xa5b3, 0xa5fb }, /* 0x1257B = 0x12533 U+309A */
! 140: { 0xa5bb, 0xa5fc }, /* 0x1257C = 0x1253B U+309A */
! 141: { 0xa5c4, 0xa5fd }, /* 0x1257D = 0x12544 U+309A */
! 142: { 0xa5c8, 0xa5fe }, /* 0x1257E = 0x12548 U+309A */
! 143: { 0xa6f5, 0xa6f8 }, /* 0x12678 = 0x12675 U+309A */
! 144: };
! 145:
! 146: static int
! 147: euc_jisx0213_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
! 148: {
! 149: int count = 0;
! 150: unsigned short lasttwo = conv->ostate;
! 151:
! 152: if (lasttwo) {
! 153: /* Attempt to combine the last character with this one. */
! 154: unsigned int idx;
! 155: unsigned int len;
! 156:
! 157: if (wc == 0x02e5)
! 158: idx = euc_jisx0213_comp_table02e5_idx,
! 159: len = euc_jisx0213_comp_table02e5_len;
! 160: else if (wc == 0x02e9)
! 161: idx = euc_jisx0213_comp_table02e9_idx,
! 162: len = euc_jisx0213_comp_table02e9_len;
! 163: else if (wc == 0x0300)
! 164: idx = euc_jisx0213_comp_table0300_idx,
! 165: len = euc_jisx0213_comp_table0300_len;
! 166: else if (wc == 0x0301)
! 167: idx = euc_jisx0213_comp_table0301_idx,
! 168: len = euc_jisx0213_comp_table0301_len;
! 169: else if (wc == 0x309a)
! 170: idx = euc_jisx0213_comp_table309a_idx,
! 171: len = euc_jisx0213_comp_table309a_len;
! 172: else
! 173: goto not_combining;
! 174:
! 175: do
! 176: if (euc_jisx0213_comp_table_data[idx].base == lasttwo)
! 177: break;
! 178: while (++idx, --len > 0);
! 179:
! 180: if (len > 0) {
! 181: /* Output the combined character. */
! 182: if (n >= 2) {
! 183: lasttwo = euc_jisx0213_comp_table_data[idx].composed;
! 184: r[0] = (lasttwo >> 8) & 0xff;
! 185: r[1] = lasttwo & 0xff;
! 186: conv->ostate = 0;
! 187: return 2;
! 188: } else
! 189: return RET_TOOSMALL;
! 190: }
! 191:
! 192: not_combining:
! 193: /* Output the buffered character. */
! 194: if (n < 2)
! 195: return RET_TOOSMALL;
! 196: r[0] = (lasttwo >> 8) & 0xff;
! 197: r[1] = lasttwo & 0xff;
! 198: r += 2;
! 199: count = 2;
! 200: }
! 201:
! 202: if (wc < 0x80) {
! 203: /* Plain ASCII character. */
! 204: if (n > count) {
! 205: r[0] = (unsigned char) wc;
! 206: conv->ostate = 0;
! 207: return count+1;
! 208: } else
! 209: return RET_TOOSMALL;
! 210: } else if (wc >= 0xff61 && wc <= 0xff9f) {
! 211: /* Half-width katakana. */
! 212: if (n >= count+2) {
! 213: r[0] = 0x8e;
! 214: r[1] = wc - 0xfec0;
! 215: conv->ostate = 0;
! 216: return count+2;
! 217: } else
! 218: return RET_TOOSMALL;
! 219: } else {
! 220: unsigned short jch = ucs4_to_jisx0213(wc);
! 221: if (jch != 0) {
! 222: if (jch & 0x0080) {
! 223: /* A possible match in comp_table_data. We have to buffer it. */
! 224: /* We know it's a JISX 0213 plane 1 character. */
! 225: if (jch & 0x8000) abort();
! 226: conv->ostate = jch | 0x8080;
! 227: return count+0;
! 228: }
! 229: if (jch & 0x8000) {
! 230: /* JISX 0213 plane 2. */
! 231: if (n >= count+3) {
! 232: r[0] = 0x8f;
! 233: r[1] = (jch >> 8) | 0x80;
! 234: r[2] = (jch & 0xff) | 0x80;
! 235: conv->ostate = 0;
! 236: return count+3;
! 237: } else
! 238: return RET_TOOSMALL;
! 239: } else {
! 240: /* JISX 0213 plane 1. */
! 241: if (n >= count+2) {
! 242: r[0] = (jch >> 8) | 0x80;
! 243: r[1] = (jch & 0xff) | 0x80;
! 244: conv->ostate = 0;
! 245: return count+2;
! 246: } else
! 247: return RET_TOOSMALL;
! 248: }
! 249: }
! 250: return RET_ILUNI;
! 251: }
! 252: }
! 253:
! 254: static int
! 255: euc_jisx0213_reset (conv_t conv, unsigned char *r, int n)
! 256: {
! 257: state_t lasttwo = conv->ostate;
! 258:
! 259: if (lasttwo) {
! 260: if (n < 2)
! 261: return RET_TOOSMALL;
! 262: r[0] = (lasttwo >> 8) & 0xff;
! 263: r[1] = lasttwo & 0xff;
! 264: /* conv->ostate = 0; will be done by the caller */
! 265: return 2;
! 266: } else
! 267: return 0;
! 268: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>