Return to iso8859_1.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / mbstring / oniguruma / enc |
1.1 misho 1: /********************************************************************** 2: iso8859_1.c - Oniguruma (regular expression library) 3: **********************************************************************/ 4: /*- 5: * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 6: * All rights reserved. 7: * 8: * Redistribution and use in source and binary forms, with or without 9: * modification, are permitted provided that the following conditions 10: * are met: 11: * 1. Redistributions of source code must retain the above copyright 12: * notice, this list of conditions and the following disclaimer. 13: * 2. Redistributions in binary form must reproduce the above copyright 14: * notice, this list of conditions and the following disclaimer in the 15: * documentation and/or other materials provided with the distribution. 16: * 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27: * SUCH DAMAGE. 28: */ 29: 30: #include "regenc.h" 31: 32: #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ 33: ((EncISO_8859_1_CtypeTable[code] & ctype) != 0) 34: 35: static const unsigned short EncISO_8859_1_CtypeTable[256] = { 36: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 37: 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 38: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 39: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 40: 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 41: 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 42: 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 43: 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 44: 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 45: 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 46: 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 47: 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, 48: 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 49: 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 50: 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 51: 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 52: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 53: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 54: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 55: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 56: 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 57: 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, 58: 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, 59: 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, 60: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 61: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 62: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, 63: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, 64: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 65: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 66: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 67: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 68: }; 69: 70: static int 71: iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) 72: { 73: const UChar* p = *pp; 74: 75: if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && 76: ONIGENC_IS_MBC_ASCII(p)) || 77: ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && 78: !ONIGENC_IS_MBC_ASCII(p))) { 79: *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); 80: } 81: else { 82: *lower = *p; 83: } 84: (*pp)++; 85: return 1; /* return byte length of converted char to lower */ 86: } 87: 88: static int 89: iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag, 90: const UChar** pp, const UChar* end) 91: { 92: const UChar* p = *pp; 93: 94: (*pp)++; 95: if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && 96: ONIGENC_IS_MBC_ASCII(p)) || 97: ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && 98: !ONIGENC_IS_MBC_ASCII(p))) { 99: int v = (EncISO_8859_1_CtypeTable[*p] & 100: (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); 101: 102: if ((v | ONIGENC_CTYPE_LOWER) != 0) { 103: /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ 104: if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba)) 105: return FALSE; 106: else 107: return TRUE; 108: } 109: 110: return (v != 0 ? TRUE : FALSE); 111: } 112: return FALSE; 113: } 114: 115: static int 116: iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype) 117: { 118: if (code < 256) 119: return ENC_IS_ISO_8859_1_CTYPE(code, ctype); 120: else 121: return FALSE; 122: } 123: 124: OnigEncodingType OnigEncodingISO_8859_1 = { 125: onigenc_single_byte_mbc_enc_len, 126: "ISO-8859-1", /* name */ 127: 1, /* max enc length */ 128: 1, /* min enc length */ 129: (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | 130: ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), 131: { 132: (OnigCodePoint )'\\' /* esc */ 133: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 134: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 135: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 136: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 137: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 138: }, 139: onigenc_is_mbc_newline_0x0a, 140: onigenc_single_byte_mbc_to_code, 141: onigenc_single_byte_code_to_mbclen, 142: onigenc_single_byte_code_to_mbc, 143: iso_8859_1_mbc_to_normalize, 144: iso_8859_1_is_mbc_ambiguous, 145: onigenc_iso_8859_1_get_all_pair_ambig_codes, 146: onigenc_ess_tsett_get_all_comp_ambig_codes, 147: iso_8859_1_is_code_ctype, 148: onigenc_not_support_get_ctype_code_range, 149: onigenc_single_byte_left_adjust_char_head, 150: onigenc_always_true_is_allowed_reverse_match 151: };