Annotation of embedaddon/php/ext/mbstring/libmbfl/filters/mbfilter_sjis.c, revision 1.1.1.2
1.1 misho 1: /*
2: * "streamable kanji code filter and converter"
3: * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4: *
5: * LICENSE NOTICES
6: *
7: * This file is part of "streamable kanji code filter and converter",
8: * which is distributed under the terms of GNU Lesser General Public
9: * License (version 2) as published by the Free Software Foundation.
10: *
11: * This software is distributed in the hope that it will be useful,
12: * but WITHOUT ANY WARRANTY; without even the implied warranty of
13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14: * GNU Lesser General Public License for more details.
15: *
16: * You should have received a copy of the GNU Lesser General Public
17: * License along with "streamable kanji code filter and converter";
18: * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19: * Suite 330, Boston, MA 02111-1307 USA
20: *
21: * The author of this file:
22: *
23: */
24: /*
25: * The source code included in this files was separated from mbfilter_ja.c
26: * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27: *
28: */
29:
30: #ifdef HAVE_CONFIG_H
31: #include "config.h"
32: #endif
33:
34: #include "mbfilter.h"
35: #include "mbfilter_sjis.h"
36:
1.1.1.2 ! misho 37: #define UNICODE_TABLE_CP932_DEF
! 38: #define UNICODE_TABLE_JIS_DEF
! 39:
1.1 misho 40: #include "unicode_table_cp932_ext.h"
41: #include "unicode_table_jis.h"
42:
1.1.1.2 ! misho 43: int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
1.1 misho 44:
1.1.1.2 ! misho 45: const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1.1 misho 46: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
55: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
56: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
61: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
62: };
63:
64: static const char *mbfl_encoding_sjis_aliases[] = {"x-sjis", "SHIFT-JIS", NULL};
65:
66: const mbfl_encoding mbfl_encoding_sjis = {
67: mbfl_no_encoding_sjis,
68: "SJIS",
69: "Shift_JIS",
70: (const char *(*)[])&mbfl_encoding_sjis_aliases,
71: mblen_table_sjis,
1.1.1.2 ! misho 72: MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
1.1 misho 73: };
74:
75: const struct mbfl_identify_vtbl vtbl_identify_sjis = {
76: mbfl_no_encoding_sjis,
77: mbfl_filt_ident_common_ctor,
78: mbfl_filt_ident_common_dtor,
79: mbfl_filt_ident_sjis
80: };
81:
82: const struct mbfl_convert_vtbl vtbl_sjis_wchar = {
83: mbfl_no_encoding_sjis,
84: mbfl_no_encoding_wchar,
85: mbfl_filt_conv_common_ctor,
86: mbfl_filt_conv_common_dtor,
87: mbfl_filt_conv_sjis_wchar,
88: mbfl_filt_conv_common_flush
89: };
90:
91: const struct mbfl_convert_vtbl vtbl_wchar_sjis = {
92: mbfl_no_encoding_wchar,
93: mbfl_no_encoding_sjis,
94: mbfl_filt_conv_common_ctor,
95: mbfl_filt_conv_common_dtor,
96: mbfl_filt_conv_wchar_sjis,
97: mbfl_filt_conv_common_flush
98: };
99:
100: #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
101:
102: #define SJIS_ENCODE(c1,c2,s1,s2) \
103: do { \
104: s1 = c1; \
105: s1--; \
106: s1 >>= 1; \
107: if ((c1) < 0x5f) { \
108: s1 += 0x71; \
109: } else { \
110: s1 += 0xb1; \
111: } \
112: s2 = c2; \
113: if ((c1) & 1) { \
114: if ((c2) < 0x60) { \
115: s2--; \
116: } \
117: s2 += 0x20; \
118: } else { \
119: s2 += 0x7e; \
120: } \
121: } while (0)
122:
123: #define SJIS_DECODE(c1,c2,s1,s2) \
124: do { \
125: s1 = c1; \
126: if (s1 < 0xa0) { \
127: s1 -= 0x81; \
128: } else { \
129: s1 -= 0xc1; \
130: } \
131: s1 <<= 1; \
132: s1 += 0x21; \
133: s2 = c2; \
134: if (s2 < 0x9f) { \
135: if (s2 < 0x7f) { \
136: s2++; \
137: } \
138: s2 -= 0x20; \
139: } else { \
140: s1++; \
141: s2 -= 0x7e; \
142: } \
143: } while (0)
144:
145:
146: /*
147: * SJIS => wchar
148: */
149: int
150: mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter)
151: {
152: int c1, s1, s2, w;
153:
154: switch (filter->status) {
155: case 0:
156: if (c >= 0 && c < 0x80) { /* latin */
157: CK((*filter->output_function)(c, filter->data));
158: } else if (c > 0xa0 && c < 0xe0) { /* kana */
159: CK((*filter->output_function)(0xfec0 + c, filter->data));
160: } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
161: filter->status = 1;
162: filter->cache = c;
163: } else {
164: w = c & MBFL_WCSGROUP_MASK;
165: w |= MBFL_WCSGROUP_THROUGH;
166: CK((*filter->output_function)(w, filter->data));
167: }
168: break;
169:
170: case 1: /* kanji second char */
171: filter->status = 0;
172: c1 = filter->cache;
173: if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
174: SJIS_DECODE(c1, c, s1, s2);
175: w = (s1 - 0x21)*94 + s2 - 0x21;
176: if (w >= 0 && w < jisx0208_ucs_table_size) {
177: w = jisx0208_ucs_table[w];
178: } else {
179: w = 0;
180: }
181: if (w <= 0) {
182: if (s1 < 0x7f && s2 < 0x7f) {
183: w = (s1 << 8) | s2;
184: w &= MBFL_WCSPLANE_MASK;
185: w |= MBFL_WCSPLANE_JIS0208;
186: } else {
187: w = (c1 << 8) | c;
188: w &= MBFL_WCSGROUP_MASK;
189: w |= MBFL_WCSGROUP_THROUGH;
190: }
191: }
192: CK((*filter->output_function)(w, filter->data));
193: } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
194: CK((*filter->output_function)(c, filter->data));
195: } else {
196: w = (c1 << 8) | c;
197: w &= MBFL_WCSGROUP_MASK;
198: w |= MBFL_WCSGROUP_THROUGH;
199: CK((*filter->output_function)(w, filter->data));
200: }
201: break;
202:
203: default:
204: filter->status = 0;
205: break;
206: }
207:
208: return c;
209: }
210:
211: /*
212: * wchar => SJIS
213: */
214: int
215: mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter)
216: {
217: int c1, c2, s1, s2;
218:
219: s1 = 0;
220: if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
221: s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
222: } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
223: s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
224: } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
225: s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
226: } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
227: s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
228: }
229: if (s1 <= 0) {
230: c1 = c & ~MBFL_WCSPLANE_MASK;
231: if (c1 == MBFL_WCSPLANE_JIS0208) {
232: s1 = c & MBFL_WCSPLANE_MASK;
233: } else if (c == 0xa5) { /* YEN SIGN */
234: s1 = 0x216f; /* FULLWIDTH YEN SIGN */
235: } else if (c == 0x203e) { /* OVER LINE */
236: s1 = 0x2131; /* FULLWIDTH MACRON */
237: } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
238: s1 = 0x2140;
239: } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
240: s1 = 0x2141;
241: } else if (c == 0x2225) { /* PARALLEL TO */
242: s1 = 0x2142;
243: } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
244: s1 = 0x215d;
245: } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
246: s1 = 0x2171;
247: } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
248: s1 = 0x2172;
249: } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
250: s1 = 0x224c;
251: }
252: if (c == 0) {
253: s1 = 0;
254: } else if (s1 <= 0) {
255: s1 = -1;
256: }
257: } else if (s1 >= 0x8080) {
258: s1 = -1;
259: }
260: if (s1 >= 0) {
261: if (s1 < 0x100) { /* latin or kana */
262: CK((*filter->output_function)(s1, filter->data));
263: } else { /* kanji */
264: c1 = (s1 >> 8) & 0xff;
265: c2 = s1 & 0xff;
266: SJIS_ENCODE(c1, c2, s1, s2);
267: CK((*filter->output_function)(s1, filter->data));
268: CK((*filter->output_function)(s2, filter->data));
269: }
270: } else {
271: if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
272: CK(mbfl_filt_conv_illegal_output(c, filter));
273: }
274: }
275:
276: return c;
277: }
278:
1.1.1.2 ! misho 279: int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter)
1.1 misho 280: {
281: if (filter->status) { /* kanji second char */
282: if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */
283: filter->flag = 1;
284: }
285: filter->status = 0;
286: } else if (c >= 0 && c < 0x80) { /* latin ok */
287: ;
288: } else if (c > 0xa0 && c < 0xe0) { /* kana ok */
289: ;
290: } else if (c > 0x80 && c < 0xf0 && c != 0xa0) { /* kanji first char */
291: filter->status = 1;
292: } else { /* bad */
293: filter->flag = 1;
294: }
295:
296: return c;
297: }
298:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>