Return to mbfilter_utf16.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / mbstring / libmbfl / filters |
1.1 misho 1: /* 2: * "streamable kanji code filter and converter" 3: * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. 4: * 5: * LICENSE NOTICES 6: * 7: * This file is part of "streamable kanji code filter and converter", 8: * which is distributed under the terms of GNU Lesser General Public 9: * License (version 2) as published by the Free Software Foundation. 10: * 11: * This software is distributed in the hope that it will be useful, 12: * but WITHOUT ANY WARRANTY; without even the implied warranty of 13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14: * GNU Lesser General Public License for more details. 15: * 16: * You should have received a copy of the GNU Lesser General Public 17: * License along with "streamable kanji code filter and converter"; 18: * if not, write to the Free Software Foundation, Inc., 59 Temple Place, 19: * Suite 330, Boston, MA 02111-1307 USA 20: * 21: * The author of this file: 22: * 23: */ 24: /* 25: * The source code included in this files was separated from mbfilter.c 26: * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. 27: * 28: */ 29: 30: #ifdef HAVE_CONFIG_H 31: #include "config.h" 32: #endif 33: 34: #include "mbfilter.h" 35: #include "mbfilter_utf16.h" 36: 37: static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL}; 38: 39: const mbfl_encoding mbfl_encoding_utf16 = { 40: mbfl_no_encoding_utf16, 41: "UTF-16", 42: "UTF-16", 43: (const char *(*)[])&mbfl_encoding_utf16_aliases, 44: NULL, 45: MBFL_ENCTYPE_MWC2BE 46: }; 47: 48: const mbfl_encoding mbfl_encoding_utf16be = { 49: mbfl_no_encoding_utf16be, 50: "UTF-16BE", 51: "UTF-16BE", 52: NULL, 53: NULL, 54: MBFL_ENCTYPE_MWC2BE 55: }; 56: 57: const mbfl_encoding mbfl_encoding_utf16le = { 58: mbfl_no_encoding_utf16le, 59: "UTF-16LE", 60: "UTF-16LE", 61: NULL, 62: NULL, 63: MBFL_ENCTYPE_MWC2LE 64: }; 65: 66: const struct mbfl_convert_vtbl vtbl_utf16_wchar = { 67: mbfl_no_encoding_utf16, 68: mbfl_no_encoding_wchar, 69: mbfl_filt_conv_common_ctor, 70: mbfl_filt_conv_common_dtor, 71: mbfl_filt_conv_utf16_wchar, 72: mbfl_filt_conv_common_flush 73: }; 74: 75: const struct mbfl_convert_vtbl vtbl_wchar_utf16 = { 76: mbfl_no_encoding_wchar, 77: mbfl_no_encoding_utf16, 78: mbfl_filt_conv_common_ctor, 79: mbfl_filt_conv_common_dtor, 80: mbfl_filt_conv_wchar_utf16be, 81: mbfl_filt_conv_common_flush 82: }; 83: 84: const struct mbfl_convert_vtbl vtbl_utf16be_wchar = { 85: mbfl_no_encoding_utf16be, 86: mbfl_no_encoding_wchar, 87: mbfl_filt_conv_common_ctor, 88: mbfl_filt_conv_common_dtor, 89: mbfl_filt_conv_utf16be_wchar, 90: mbfl_filt_conv_common_flush 91: }; 92: 93: const struct mbfl_convert_vtbl vtbl_wchar_utf16be = { 94: mbfl_no_encoding_wchar, 95: mbfl_no_encoding_utf16be, 96: mbfl_filt_conv_common_ctor, 97: mbfl_filt_conv_common_dtor, 98: mbfl_filt_conv_wchar_utf16be, 99: mbfl_filt_conv_common_flush 100: }; 101: 102: const struct mbfl_convert_vtbl vtbl_utf16le_wchar = { 103: mbfl_no_encoding_utf16le, 104: mbfl_no_encoding_wchar, 105: mbfl_filt_conv_common_ctor, 106: mbfl_filt_conv_common_dtor, 107: mbfl_filt_conv_utf16le_wchar, 108: mbfl_filt_conv_common_flush 109: }; 110: 111: const struct mbfl_convert_vtbl vtbl_wchar_utf16le = { 112: mbfl_no_encoding_wchar, 113: mbfl_no_encoding_utf16le, 114: mbfl_filt_conv_common_ctor, 115: mbfl_filt_conv_common_dtor, 116: mbfl_filt_conv_wchar_utf16le, 117: mbfl_filt_conv_common_flush 118: }; 119: 120: #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) 121: 122: /* 123: * UTF-16 => wchar 124: */ 125: int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter) 126: { 127: int n, endian; 128: 129: endian = filter->status & 0xff00; 130: switch (filter->status & 0x0f) { 131: case 0: 132: if (endian) { 133: n = c & 0xff; 134: } else { 135: n = (c & 0xff) << 8; 136: } 137: filter->cache |= n; 138: filter->status++; 139: break; 140: default: 141: if (endian) { 142: n = (c & 0xff) << 8; 143: } else { 144: n = c & 0xff; 145: } 146: n |= filter->cache & 0xffff; 147: filter->status &= ~0x0f; 148: if (n >= 0xd800 && n < 0xdc00) { 149: filter->cache = ((n & 0x3ff) << 16) + 0x400000; 150: } else if (n >= 0xdc00 && n < 0xe000) { 151: n &= 0x3ff; 152: n |= (filter->cache & 0xfff0000) >> 6; 153: filter->cache = 0; 154: if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) { 155: CK((*filter->output_function)(n, filter->data)); 156: } else { /* illegal character */ 157: n &= MBFL_WCSGROUP_MASK; 158: n |= MBFL_WCSGROUP_THROUGH; 159: CK((*filter->output_function)(n, filter->data)); 160: } 161: } else { 162: int is_first = filter->status & 0x10; 163: filter->cache = 0; 164: filter->status |= 0x10; 165: if (!is_first) { 166: if (n == 0xfffe) { 167: if (endian) { 168: filter->status &= ~0x100; /* big-endian */ 169: } else { 170: filter->status |= 0x100; /* little-endian */ 171: } 172: break; 173: } else if (n == 0xfeff) { 174: break; 175: } 176: } 177: CK((*filter->output_function)(n, filter->data)); 178: } 179: break; 180: } 181: 182: return c; 183: } 184: 185: /* 186: * UTF-16BE => wchar 187: */ 188: int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter) 189: { 190: int n; 191: 192: switch (filter->status) { 193: case 0: 194: filter->status = 1; 195: n = (c & 0xff) << 8; 196: filter->cache |= n; 197: break; 198: default: 199: filter->status = 0; 200: n = (filter->cache & 0xff00) | (c & 0xff); 201: if (n >= 0xd800 && n < 0xdc00) { 202: filter->cache = ((n & 0x3ff) << 16) + 0x400000; 203: } else if (n >= 0xdc00 && n < 0xe000) { 204: n &= 0x3ff; 205: n |= (filter->cache & 0xfff0000) >> 6; 206: filter->cache = 0; 207: if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) { 208: CK((*filter->output_function)(n, filter->data)); 209: } else { /* illegal character */ 210: n &= MBFL_WCSGROUP_MASK; 211: n |= MBFL_WCSGROUP_THROUGH; 212: CK((*filter->output_function)(n, filter->data)); 213: } 214: } else { 215: filter->cache = 0; 216: CK((*filter->output_function)(n, filter->data)); 217: } 218: break; 219: } 220: 221: return c; 222: } 223: 224: /* 225: * wchar => UTF-16BE 226: */ 227: int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter) 228: { 229: int n; 230: 231: if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) { 232: CK((*filter->output_function)((c >> 8) & 0xff, filter->data)); 233: CK((*filter->output_function)(c & 0xff, filter->data)); 234: } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) { 235: n = ((c >> 10) - 0x40) | 0xd800; 236: CK((*filter->output_function)((n >> 8) & 0xff, filter->data)); 237: CK((*filter->output_function)(n & 0xff, filter->data)); 238: n = (c & 0x3ff) | 0xdc00; 239: CK((*filter->output_function)((n >> 8) & 0xff, filter->data)); 240: CK((*filter->output_function)(n & 0xff, filter->data)); 241: } else { 242: if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 243: CK(mbfl_filt_conv_illegal_output(c, filter)); 244: } 245: } 246: 247: return c; 248: } 249: 250: /* 251: * UTF-16LE => wchar 252: */ 253: int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter) 254: { 255: int n; 256: 257: switch (filter->status) { 258: case 0: 259: filter->status = 1; 260: n = c & 0xff; 261: filter->cache |= n; 262: break; 263: default: 264: filter->status = 0; 265: n = (filter->cache & 0xff) | ((c & 0xff) << 8); 266: if (n >= 0xd800 && n < 0xdc00) { 267: filter->cache = ((n & 0x3ff) << 16) + 0x400000; 268: } else if (n >= 0xdc00 && n < 0xe000) { 269: n &= 0x3ff; 270: n |= (filter->cache & 0xfff0000) >> 6; 271: filter->cache = 0; 272: if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) { 273: CK((*filter->output_function)(n, filter->data)); 274: } else { /* illegal character */ 275: n &= MBFL_WCSGROUP_MASK; 276: n |= MBFL_WCSGROUP_THROUGH; 277: CK((*filter->output_function)(n, filter->data)); 278: } 279: } else { 280: filter->cache = 0; 281: CK((*filter->output_function)(n, filter->data)); 282: } 283: break; 284: } 285: 286: return c; 287: } 288: 289: /* 290: * wchar => UTF-16LE 291: */ 292: int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter) 293: { 294: int n; 295: 296: if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) { 297: CK((*filter->output_function)(c & 0xff, filter->data)); 298: CK((*filter->output_function)((c >> 8) & 0xff, filter->data)); 299: } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) { 300: n = ((c >> 10) - 0x40) | 0xd800; 301: CK((*filter->output_function)(n & 0xff, filter->data)); 302: CK((*filter->output_function)((n >> 8) & 0xff, filter->data)); 303: n = (c & 0x3ff) | 0xdc00; 304: CK((*filter->output_function)(n & 0xff, filter->data)); 305: CK((*filter->output_function)((n >> 8) & 0xff, filter->data)); 306: } else { 307: if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 308: CK(mbfl_filt_conv_illegal_output(c, filter)); 309: } 310: } 311: 312: return c; 313: } 314: 315: 316: