Annotation of embedaddon/php/ext/mbstring/oniguruma/enc/iso8859_1.c, revision 1.1.1.1
1.1 misho 1: /**********************************************************************
2: iso8859_1.c - Oniguruma (regular expression library)
3: **********************************************************************/
4: /*-
5: * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
30: #include "regenc.h"
31:
32: #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
33: ((EncISO_8859_1_CtypeTable[code] & ctype) != 0)
34:
35: static const unsigned short EncISO_8859_1_CtypeTable[256] = {
36: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
37: 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
38: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
39: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
40: 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
41: 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
42: 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
43: 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
44: 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
45: 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
46: 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
47: 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
48: 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
49: 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
50: 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
51: 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
52: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
53: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
54: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
55: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
56: 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
57: 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
58: 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
59: 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
60: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
61: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
62: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
63: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
64: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
65: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
66: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
67: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
68: };
69:
70: static int
71: iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
72: {
73: const UChar* p = *pp;
74:
75: if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
76: ONIGENC_IS_MBC_ASCII(p)) ||
77: ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
78: !ONIGENC_IS_MBC_ASCII(p))) {
79: *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
80: }
81: else {
82: *lower = *p;
83: }
84: (*pp)++;
85: return 1; /* return byte length of converted char to lower */
86: }
87:
88: static int
89: iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag,
90: const UChar** pp, const UChar* end)
91: {
92: const UChar* p = *pp;
93:
94: (*pp)++;
95: if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
96: ONIGENC_IS_MBC_ASCII(p)) ||
97: ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
98: !ONIGENC_IS_MBC_ASCII(p))) {
99: int v = (EncISO_8859_1_CtypeTable[*p] &
100: (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
101:
102: if ((v | ONIGENC_CTYPE_LOWER) != 0) {
103: /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
104: if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
105: return FALSE;
106: else
107: return TRUE;
108: }
109:
110: return (v != 0 ? TRUE : FALSE);
111: }
112: return FALSE;
113: }
114:
115: static int
116: iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype)
117: {
118: if (code < 256)
119: return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
120: else
121: return FALSE;
122: }
123:
124: OnigEncodingType OnigEncodingISO_8859_1 = {
125: onigenc_single_byte_mbc_enc_len,
126: "ISO-8859-1", /* name */
127: 1, /* max enc length */
128: 1, /* min enc length */
129: (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
130: ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
131: {
132: (OnigCodePoint )'\\' /* esc */
133: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
134: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
135: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
136: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
137: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
138: },
139: onigenc_is_mbc_newline_0x0a,
140: onigenc_single_byte_mbc_to_code,
141: onigenc_single_byte_code_to_mbclen,
142: onigenc_single_byte_code_to_mbc,
143: iso_8859_1_mbc_to_normalize,
144: iso_8859_1_is_mbc_ambiguous,
145: onigenc_iso_8859_1_get_all_pair_ambig_codes,
146: onigenc_ess_tsett_get_all_comp_ambig_codes,
147: iso_8859_1_is_code_ctype,
148: onigenc_not_support_get_ctype_code_range,
149: onigenc_single_byte_left_adjust_char_head,
150: onigenc_always_true_is_allowed_reverse_match
151: };
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>