Annotation of embedaddon/php/ext/mbstring/oniguruma/regenc.c, revision 1.1.1.1
1.1 misho 1: /**********************************************************************
2: regenc.c - Oniguruma (regular expression library)
3: **********************************************************************/
4: /*-
5: * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
30: #include "regint.h"
31:
32: OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
33:
34: extern int
35: onigenc_init(void)
36: {
37: return 0;
38: }
39:
40: extern OnigEncoding
41: onigenc_get_default_encoding(void)
42: {
43: return OnigEncDefaultCharEncoding;
44: }
45:
46: extern int
47: onigenc_set_default_encoding(OnigEncoding enc)
48: {
49: OnigEncDefaultCharEncoding = enc;
50: return 0;
51: }
52:
53: extern UChar*
54: onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
55: {
56: UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
57: if (p < s) {
58: p += enc_len(enc, p);
59: }
60: return p;
61: }
62:
63: extern UChar*
64: onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
65: const UChar* start, const UChar* s, const UChar** prev)
66: {
67: UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
68:
69: if (p < s) {
70: if (prev) *prev = (const UChar* )p;
71: p += enc_len(enc, p);
72: }
73: else {
74: if (prev) *prev = (const UChar* )NULL; /* Sorry */
75: }
76: return p;
77: }
78:
79: extern UChar*
80: onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
81: {
82: if (s <= start)
83: return (UChar* )NULL;
84:
85: return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
86: }
87:
88: extern UChar*
89: onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
90: {
91: while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
92: if (s <= start)
93: return (UChar* )NULL;
94:
95: s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
96: }
97: return (UChar* )s;
98: }
99:
100: extern UChar*
101: onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
102: {
103: UChar* q = (UChar* )p;
104: while (n-- > 0) {
105: q += ONIGENC_MBC_ENC_LEN(enc, q);
106: }
107: return (q <= end ? q : NULL);
108: }
109:
110: extern int
111: onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
112: {
113: int n = 0;
114: UChar* q = (UChar* )p;
115:
116: while (q < end) {
117: q += ONIGENC_MBC_ENC_LEN(enc, q);
118: n++;
119: }
120: return n;
121: }
122:
123: extern int
124: onigenc_strlen_null(OnigEncoding enc, const UChar* s)
125: {
126: int n = 0;
127: UChar* p = (UChar* )s;
128:
129: while (1) {
130: if (*p == '\0') {
131: UChar* q;
132: int len = ONIGENC_MBC_MINLEN(enc);
133:
134: if (len == 1) return n;
135: q = p + 1;
136: while (len > 1) {
137: if (*q != '\0') break;
138: q++;
139: len--;
140: }
141: if (len == 1) return n;
142: }
143: p += ONIGENC_MBC_ENC_LEN(enc, p);
144: n++;
145: }
146: }
147:
148: extern int
149: onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
150: {
151: UChar* start = (UChar* )s;
152: UChar* p = (UChar* )s;
153:
154: while (1) {
155: if (*p == '\0') {
156: UChar* q;
157: int len = ONIGENC_MBC_MINLEN(enc);
158:
159: if (len == 1) return (int )(p - start);
160: q = p + 1;
161: while (len > 1) {
162: if (*q != '\0') break;
163: q++;
164: len--;
165: }
166: if (len == 1) return (int )(p - start);
167: }
168: p += ONIGENC_MBC_ENC_LEN(enc, p);
169: }
170: }
171:
172: #ifndef ONIG_RUBY_M17N
173:
174: #ifndef NOT_RUBY
175:
176: #define USE_APPLICATION_TO_LOWER_CASE_TABLE
177:
178: const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
179: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
180: 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
181: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
182: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
183: 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
184: 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
185: 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
186: 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
187: 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
188: 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
189: 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
190: 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
191: 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
192: 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
193: 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
194: 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
195: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
196: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
197: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
198: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
199: 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
200: 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
201: 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
202: 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
203: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
204: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
205: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
206: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
207: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
208: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
209: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
210: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
211: };
212: #endif
213:
214: const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0;
215:
216: #ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
217: static const UChar BuiltInAsciiToLowerCaseTable[] = {
218: '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
219: '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
220: '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
221: '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
222: '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
223: '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
224: '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
225: '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
226: '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
227: '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
228: '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
229: '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
230: '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
231: '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
232: '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
233: '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
234: '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
235: '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
236: '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
237: '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
238: '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
239: '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
240: '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
241: '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
242: '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
243: '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
244: '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
245: '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
246: '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
247: '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
248: '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
249: '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
250: };
251: #endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
252:
253: #ifdef USE_UPPER_CASE_TABLE
254: const UChar OnigEncAsciiToUpperCaseTable[256] = {
255: '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
256: '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
257: '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
258: '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
259: '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
260: '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
261: '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
262: '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
263: '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
264: '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
265: '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
266: '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
267: '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
268: '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
269: '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
270: '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
271: '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
272: '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
273: '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
274: '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
275: '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
276: '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
277: '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
278: '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
279: '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
280: '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
281: '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
282: '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
283: '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
284: '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
285: '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
286: '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
287: };
288: #endif
289:
290: const unsigned short OnigEncAsciiCtypeTable[256] = {
291: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
292: 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
293: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
294: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
295: 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
296: 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
297: 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
298: 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
299: 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
300: 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
301: 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
302: 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
303: 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
304: 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
305: 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
306: 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
307:
308: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
309: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
310: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
311: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
312: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
313: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
314: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
315: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
316: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
317: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
318: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
319: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
320: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
321: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
322: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
323: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
324: };
325:
326: const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
327: '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
328: '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
329: '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
330: '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
331: '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
332: '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
333: '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
334: '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
335: '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
336: '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
337: '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
338: '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
339: '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
340: '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
341: '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
342: '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
343: '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
344: '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
345: '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
346: '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
347: '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
348: '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
349: '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
350: '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
351: '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
352: '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
353: '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
354: '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
355: '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
356: '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
357: '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
358: '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
359: };
360:
361: #ifdef USE_UPPER_CASE_TABLE
362: const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
363: '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
364: '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
365: '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
366: '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
367: '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
368: '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
369: '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
370: '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
371: '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
372: '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
373: '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
374: '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
375: '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
376: '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
377: '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
378: '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
379: '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
380: '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
381: '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
382: '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
383: '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
384: '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
385: '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
386: '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
387: '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
388: '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
389: '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
390: '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
391: '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
392: '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
393: '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
394: '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
395: };
396: #endif
397:
398: extern void
399: onigenc_set_default_caseconv_table(const UChar* table)
400: {
401: if (table == (const UChar* )0) {
402: #ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
403: table = BuiltInAsciiToLowerCaseTable;
404: #else
405: return ;
406: #endif
407: }
408:
409: if (table != OnigEncAsciiToLowerCaseTable) {
410: OnigEncAsciiToLowerCaseTable = table;
411: }
412: }
413:
414: extern UChar*
415: onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
416: {
417: return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
418: }
419:
420: const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
421: { 0x41, 0x61 },
422: { 0x42, 0x62 },
423: { 0x43, 0x63 },
424: { 0x44, 0x64 },
425: { 0x45, 0x65 },
426: { 0x46, 0x66 },
427: { 0x47, 0x67 },
428: { 0x48, 0x68 },
429: { 0x49, 0x69 },
430: { 0x4a, 0x6a },
431: { 0x4b, 0x6b },
432: { 0x4c, 0x6c },
433: { 0x4d, 0x6d },
434: { 0x4e, 0x6e },
435: { 0x4f, 0x6f },
436: { 0x50, 0x70 },
437: { 0x51, 0x71 },
438: { 0x52, 0x72 },
439: { 0x53, 0x73 },
440: { 0x54, 0x74 },
441: { 0x55, 0x75 },
442: { 0x56, 0x76 },
443: { 0x57, 0x77 },
444: { 0x58, 0x78 },
445: { 0x59, 0x79 },
446: { 0x5a, 0x7a },
447:
448: { 0x61, 0x41 },
449: { 0x62, 0x42 },
450: { 0x63, 0x43 },
451: { 0x64, 0x44 },
452: { 0x65, 0x45 },
453: { 0x66, 0x46 },
454: { 0x67, 0x47 },
455: { 0x68, 0x48 },
456: { 0x69, 0x49 },
457: { 0x6a, 0x4a },
458: { 0x6b, 0x4b },
459: { 0x6c, 0x4c },
460: { 0x6d, 0x4d },
461: { 0x6e, 0x4e },
462: { 0x6f, 0x4f },
463: { 0x70, 0x50 },
464: { 0x71, 0x51 },
465: { 0x72, 0x52 },
466: { 0x73, 0x53 },
467: { 0x74, 0x54 },
468: { 0x75, 0x55 },
469: { 0x76, 0x56 },
470: { 0x77, 0x57 },
471: { 0x78, 0x58 },
472: { 0x79, 0x59 },
473: { 0x7a, 0x5a }
474: };
475:
476: extern int
477: onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
478: const OnigPairAmbigCodes** ccs)
479: {
480: if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
481: *ccs = OnigAsciiPairAmbigCodes;
482: return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
483: }
484: else {
485: return 0;
486: }
487: }
488:
489: extern int
490: onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
491: const OnigCompAmbigCodes** ccs)
492: {
493: return 0;
494: }
495:
496: extern int
497: onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
498: const OnigPairAmbigCodes** ccs)
499: {
500: static const OnigPairAmbigCodes cc[] = {
501: { 0xc0, 0xe0 },
502: { 0xc1, 0xe1 },
503: { 0xc2, 0xe2 },
504: { 0xc3, 0xe3 },
505: { 0xc4, 0xe4 },
506: { 0xc5, 0xe5 },
507: { 0xc6, 0xe6 },
508: { 0xc7, 0xe7 },
509: { 0xc8, 0xe8 },
510: { 0xc9, 0xe9 },
511: { 0xca, 0xea },
512: { 0xcb, 0xeb },
513: { 0xcc, 0xec },
514: { 0xcd, 0xed },
515: { 0xce, 0xee },
516: { 0xcf, 0xef },
517:
518: { 0xd0, 0xf0 },
519: { 0xd1, 0xf1 },
520: { 0xd2, 0xf2 },
521: { 0xd3, 0xf3 },
522: { 0xd4, 0xf4 },
523: { 0xd5, 0xf5 },
524: { 0xd6, 0xf6 },
525: { 0xd8, 0xf8 },
526: { 0xd9, 0xf9 },
527: { 0xda, 0xfa },
528: { 0xdb, 0xfb },
529: { 0xdc, 0xfc },
530: { 0xdd, 0xfd },
531: { 0xde, 0xfe },
532:
533: { 0xe0, 0xc0 },
534: { 0xe1, 0xc1 },
535: { 0xe2, 0xc2 },
536: { 0xe3, 0xc3 },
537: { 0xe4, 0xc4 },
538: { 0xe5, 0xc5 },
539: { 0xe6, 0xc6 },
540: { 0xe7, 0xc7 },
541: { 0xe8, 0xc8 },
542: { 0xe9, 0xc9 },
543: { 0xea, 0xca },
544: { 0xeb, 0xcb },
545: { 0xec, 0xcc },
546: { 0xed, 0xcd },
547: { 0xee, 0xce },
548: { 0xef, 0xcf },
549:
550: { 0xf0, 0xd0 },
551: { 0xf1, 0xd1 },
552: { 0xf2, 0xd2 },
553: { 0xf3, 0xd3 },
554: { 0xf4, 0xd4 },
555: { 0xf5, 0xd5 },
556: { 0xf6, 0xd6 },
557: { 0xf8, 0xd8 },
558: { 0xf9, 0xd9 },
559: { 0xfa, 0xda },
560: { 0xfb, 0xdb },
561: { 0xfc, 0xdc },
562: { 0xfd, 0xdd },
563: { 0xfe, 0xde }
564: };
565:
566: if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
567: *ccs = OnigAsciiPairAmbigCodes;
568: return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
569: }
570: else if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
571: *ccs = cc;
572: return sizeof(cc) / sizeof(OnigPairAmbigCodes);
573: }
574: else
575: return 0;
576: }
577:
578: extern int
579: onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
580: const OnigCompAmbigCodes** ccs)
581: {
582: static const OnigCompAmbigCodes folds[] = {
583: { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
584: };
585:
586: if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
587: *ccs = folds;
588: return sizeof(folds) / sizeof(OnigCompAmbigCodes);
589: }
590: else
591: return 0;
592: }
593:
594: extern int
595: onigenc_not_support_get_ctype_code_range(int ctype,
596: const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
597: {
598: return ONIG_NO_SUPPORT_CONFIG;
599: }
600:
601: extern int
602: onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
603: {
604: if (p < end) {
605: if (*p == 0x0a) return 1;
606: }
607: return 0;
608: }
609:
610: /* for single byte encodings */
611: extern int
612: onigenc_ascii_mbc_to_normalize(OnigAmbigType flag, const UChar** p, const UChar*end,
613: UChar* lower)
614: {
615: if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
616: *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
617: }
618: else {
619: *lower = **p;
620: }
621:
622: (*p)++;
623: return 1; /* return byte length of converted char to lower */
624: }
625:
626: extern int
627: onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag,
628: const UChar** pp, const UChar* end)
629: {
630: const UChar* p = *pp;
631:
632: (*pp)++;
633: if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
634: return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
635: }
636: else {
637: return FALSE;
638: }
639: }
640:
641: extern int
642: onigenc_single_byte_mbc_enc_len(const UChar* p)
643: {
644: return 1;
645: }
646:
647: extern OnigCodePoint
648: onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end)
649: {
650: return (OnigCodePoint )(*p);
651: }
652:
653: extern int
654: onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
655: {
656: return 1;
657: }
658:
659: extern int
660: onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
661: {
662: return (code & 0xff);
663: }
664:
665: extern int
666: onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
667: {
668: *buf = (UChar )(code & 0xff);
669: return 1;
670: }
671:
672: extern UChar*
673: onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s)
674: {
675: return (UChar* )s;
676: }
677:
678: extern int
679: onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end)
680: {
681: return TRUE;
682: }
683:
684: extern int
685: onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end)
686: {
687: return FALSE;
688: }
689:
690: extern OnigCodePoint
691: onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
692: {
693: int c, i, len;
694: OnigCodePoint n;
695:
696: len = enc_len(enc, p);
697: n = (OnigCodePoint )(*p++);
698: if (len == 1) return n;
699:
700: for (i = 1; i < len; i++) {
701: if (p >= end) break;
702: c = *p++;
703: n <<= 8; n += c;
704: }
705: return n;
706: }
707:
708: extern int
709: onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag,
710: const UChar** pp, const UChar* end, UChar* lower)
711: {
712: int len;
713: const UChar *p = *pp;
714:
715: if (ONIGENC_IS_MBC_ASCII(p)) {
716: if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
717: *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
718: }
719: else {
720: *lower = *p;
721: }
722: (*pp)++;
723: return 1;
724: }
725: else {
726: len = enc_len(enc, p);
727: if (lower != p) {
728: int i;
729: for (i = 0; i < len; i++) {
730: *lower++ = *p++;
731: }
732: }
733: (*pp) += len;
734: return len; /* return byte length of converted to lower char */
735: }
736: }
737:
738: extern int
739: onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
740: const UChar** pp, const UChar* end)
741: {
742: const UChar* p = *pp;
743:
744: if (ONIGENC_IS_MBC_ASCII(p)) {
745: (*pp)++;
746: if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
747: return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
748: }
749: else {
750: return FALSE;
751: }
752: }
753:
754: (*pp) += enc_len(enc, p);
755: return FALSE;
756: }
757:
758: extern int
759: onigenc_mb2_code_to_mbclen(OnigCodePoint code)
760: {
761: if ((code & 0xff00) != 0) return 2;
762: else return 1;
763: }
764:
765: extern int
766: onigenc_mb4_code_to_mbclen(OnigCodePoint code)
767: {
768: if ((code & 0xff000000) != 0) return 4;
769: else if ((code & 0xff0000) != 0) return 3;
770: else if ((code & 0xff00) != 0) return 2;
771: else return 1;
772: }
773:
774: extern int
775: onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
776: {
777: int first;
778:
779: if ((code & 0xff00) != 0) {
780: first = (code >> 8) & 0xff;
781: }
782: else {
783: return (int )code;
784: }
785: return first;
786: }
787:
788: extern int
789: onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
790: {
791: int first;
792:
793: if ((code & 0xff000000) != 0) {
794: first = (code >> 24) & 0xff;
795: }
796: else if ((code & 0xff0000) != 0) {
797: first = (code >> 16) & 0xff;
798: }
799: else if ((code & 0xff00) != 0) {
800: first = (code >> 8) & 0xff;
801: }
802: else {
803: return (int )code;
804: }
805: return first;
806: }
807:
808: extern int
809: onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
810: {
811: UChar *p = buf;
812:
813: if ((code & 0xff00) != 0) {
814: *p++ = (UChar )((code >> 8) & 0xff);
815: }
816: *p++ = (UChar )(code & 0xff);
817:
818: #if 1
819: if (enc_len(enc, buf) != (p - buf))
820: return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
821: #endif
822: return p - buf;
823: }
824:
825: extern int
826: onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
827: {
828: UChar *p = buf;
829:
830: if ((code & 0xff000000) != 0) {
831: *p++ = (UChar )((code >> 24) & 0xff);
832: }
833: if ((code & 0xff0000) != 0 || p != buf) {
834: *p++ = (UChar )((code >> 16) & 0xff);
835: }
836: if ((code & 0xff00) != 0 || p != buf) {
837: *p++ = (UChar )((code >> 8) & 0xff);
838: }
839: *p++ = (UChar )(code & 0xff);
840:
841: #if 1
842: if (enc_len(enc, buf) != (p - buf))
843: return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
844: #endif
845: return p - buf;
846: }
847:
848: extern int
849: onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
850: unsigned int ctype)
851: {
852: if (code < 128)
853: return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
854: else {
855: if ((ctype & (ONIGENC_CTYPE_WORD |
856: ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
857: return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
858: }
859: }
860:
861: return FALSE;
862: }
863:
864: extern int
865: onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
866: unsigned int ctype)
867: {
868: if (code < 128)
869: return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
870: else {
871: if ((ctype & (ONIGENC_CTYPE_WORD |
872: ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
873: return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
874: }
875: }
876:
877: return FALSE;
878: }
879:
880: extern int
881: onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
882: const UChar* sascii /* ascii */, int n)
883: {
884: int x, c;
885:
886: while (n-- > 0) {
887: if (p >= end) return (int )(*sascii);
888:
889: c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
890: x = *sascii - c;
891: if (x) return x;
892:
893: sascii++;
894: p += enc_len(enc, p);
895: }
896: return 0;
897: }
898:
899: #else /* ONIG_RUBY_M17N */
900:
901: extern int
902: onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
903: {
904: switch (ctype) {
905: case ONIGENC_CTYPE_NEWLINE:
906: if (code == 0x0a) return 1;
907: break;
908:
909: case ONIGENC_CTYPE_ALPHA:
910: return m17n_isalpha(enc, code);
911: break;
912: case ONIGENC_CTYPE_BLANK:
913: return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
914: break;
915: case ONIGENC_CTYPE_CNTRL:
916: return m17n_iscntrl(enc, code);
917: break;
918: case ONIGENC_CTYPE_DIGIT:
919: return m17n_isdigit(enc, code);
920: break;
921: case ONIGENC_CTYPE_GRAPH:
922: return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
923: break;
924: case ONIGENC_CTYPE_LOWER:
925: return m17n_islower(enc, code);
926: break;
927: case ONIGENC_CTYPE_PRINT:
928: return m17n_isprint(enc, code);
929: break;
930: case ONIGENC_CTYPE_PUNCT:
931: return m17n_ispunct(enc, code);
932: break;
933: case ONIGENC_CTYPE_SPACE:
934: return m17n_isspace(enc, code);
935: break;
936: case ONIGENC_CTYPE_UPPER:
937: return m17n_isupper(enc, code);
938: break;
939: case ONIGENC_CTYPE_XDIGIT:
940: return m17n_isxdigit(enc, code);
941: break;
942: case ONIGENC_CTYPE_WORD:
943: return m17n_iswchar(enc, code);
944: break;
945: case ONIGENC_CTYPE_ASCII:
946: return (code < 128 ? TRUE : FALSE);
947: break;
948: case ONIGENC_CTYPE_ALNUM:
949: return m17n_isalnum(enc, code);
950: break;
951: default:
952: break;
953: }
954:
955: return 0;
956: }
957:
958: extern int
959: onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
960: {
961: int c, len;
962:
963: m17n_mbcput(enc, code, buf);
964: c = m17n_firstbyte(enc, code);
965: len = enc_len(enc, c);
966: return len;
967: }
968:
969: extern int
970: onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
971: {
972: unsigned int c, low;
973:
974: c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
975: low = m17n_tolower(enc, c);
976: m17n_mbcput(enc, low, buf);
977:
978: return m17n_codelen(enc, low);
979: }
980:
981: extern int
982: onigenc_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
983: UChar** pp, UChar* end)
984: {
985: int len;
986: unsigned int c;
987: UChar* p = *pp;
988:
989: len = enc_len(enc, *p);
990: (*pp) += len;
991: c = m17n_codepoint(enc, p, p + len);
992:
993: if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
994: if (m17n_isupper(enc, c) || m17n_islower(enc, c))
995: return TRUE;
996: }
997:
998: return FALSE;
999: }
1000:
1001: extern UChar*
1002: onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
1003: {
1004: UChar *p;
1005: int len;
1006:
1007: if (s <= start) return s;
1008: p = s;
1009:
1010: while (!m17n_islead(enc, *p) && p > start) p--;
1011: while (p + (len = enc_len(enc, *p)) < s) {
1012: p += len;
1013: }
1014: if (p + len == s) return s;
1015: return p;
1016: }
1017:
1018: extern int
1019: onigenc_is_allowed_reverse_match(OnigEncoding enc,
1020: const UChar* s, const UChar* end)
1021: {
1022: return ONIGENC_IS_SINGLEBYTE(enc);
1023: }
1024:
1025: extern void
1026: onigenc_set_default_caseconv_table(UChar* table) { }
1027:
1028: #endif /* ONIG_RUBY_M17N */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>