Annotation of embedaddon/php/ext/mbstring/oniguruma/enc/mktable.c, revision 1.1.1.1
1.1 misho 1: /**********************************************************************
2: mktable.c
3: **********************************************************************/
4: /*-
5: * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
30: #include <stdlib.h>
31: #include <stdio.h>
32:
33: #define NOT_RUBY
34: #include "regenc.h"
35:
36: #define UNICODE_ISO_8859_1 0
37: #define ISO_8859_1 1
38: #define ISO_8859_2 2
39: #define ISO_8859_3 3
40: #define ISO_8859_4 4
41: #define ISO_8859_5 5
42: #define ISO_8859_6 6
43: #define ISO_8859_7 7
44: #define ISO_8859_8 8
45: #define ISO_8859_9 9
46: #define ISO_8859_10 10
47: #define ISO_8859_11 11
48: #define ISO_8859_13 12
49: #define ISO_8859_14 13
50: #define ISO_8859_15 14
51: #define ISO_8859_16 15
52: #define KOI8 16
53: #define KOI8_R 17
54:
55: typedef struct {
56: int num;
57: char* name;
58: } ENC_INFO;
59:
60: static ENC_INFO Info[] = {
61: { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" },
62: { ISO_8859_1, "ISO_8859_1" },
63: { ISO_8859_2, "ISO_8859_2" },
64: { ISO_8859_3, "ISO_8859_3" },
65: { ISO_8859_4, "ISO_8859_4" },
66: { ISO_8859_5, "ISO_8859_5" },
67: { ISO_8859_6, "ISO_8859_6" },
68: { ISO_8859_7, "ISO_8859_7" },
69: { ISO_8859_8, "ISO_8859_8" },
70: { ISO_8859_9, "ISO_8859_9" },
71: { ISO_8859_10, "ISO_8859_10" },
72: { ISO_8859_11, "ISO_8859_11" },
73: { ISO_8859_13, "ISO_8859_13" },
74: { ISO_8859_14, "ISO_8859_14" },
75: { ISO_8859_15, "ISO_8859_15" },
76: { ISO_8859_16, "ISO_8859_16" },
77: { KOI8, "KOI8" },
78: { KOI8_R, "KOI8_R" }
79: };
80:
81:
82: static int IsAlpha(int enc, int c)
83: {
84: if (c >= 0x41 && c <= 0x5a) return 1;
85: if (c >= 0x61 && c <= 0x7a) return 1;
86:
87: switch (enc) {
88: case UNICODE_ISO_8859_1:
89: case ISO_8859_1:
90: case ISO_8859_9:
91: if (c == 0xaa) return 1;
92: if (c == 0xb5) return 1;
93: if (c == 0xba) return 1;
94: if (c >= 0xc0 && c <= 0xd6) return 1;
95: if (c >= 0xd8 && c <= 0xf6) return 1;
96: if (c >= 0xf8 && c <= 0xff) return 1;
97: break;
98:
99: case ISO_8859_2:
100: if (c == 0xa1 || c == 0xa3) return 1;
101: if (c == 0xa5 || c == 0xa6) return 1;
102: if (c >= 0xa9 && c <= 0xac) return 1;
103: if (c >= 0xae && c <= 0xaf) return 1;
104: if (c == 0xb1 || c == 0xb3) return 1;
105: if (c == 0xb5 || c == 0xb6) return 1;
106: if (c >= 0xb9 && c <= 0xbc) return 1;
107: if (c >= 0xbe && c <= 0xbf) return 1;
108: if (c >= 0xc0 && c <= 0xd6) return 1;
109: if (c >= 0xd8 && c <= 0xf6) return 1;
110: if (c >= 0xf8 && c <= 0xfe) return 1;
111: break;
112:
113: case ISO_8859_3:
114: if (c == 0xa1) return 1;
115: if (c == 0xa6) return 1;
116: if (c >= 0xa9 && c <= 0xac) return 1;
117: if (c == 0xaf) return 1;
118: if (c == 0xb1) return 1;
119: if (c == 0xb5 || c == 0xb6) return 1;
120: if (c >= 0xb9 && c <= 0xbc) return 1;
121: if (c == 0xbf) return 1;
122: if (c >= 0xc0 && c <= 0xc2) return 1;
123: if (c >= 0xc4 && c <= 0xcf) return 1;
124: if (c >= 0xd1 && c <= 0xd6) return 1;
125: if (c >= 0xd8 && c <= 0xe2) return 1;
126: if (c >= 0xe4 && c <= 0xef) return 1;
127: if (c >= 0xf1 && c <= 0xf6) return 1;
128: if (c >= 0xf8 && c <= 0xfe) return 1;
129: break;
130:
131: case ISO_8859_4:
132: if (c >= 0xa1 && c <= 0xa3) return 1;
133: if (c == 0xa5 || c == 0xa6) return 1;
134: if (c >= 0xa9 && c <= 0xac) return 1;
135: if (c == 0xae) return 1;
136: if (c == 0xb1 || c == 0xb3) return 1;
137: if (c == 0xb5 || c == 0xb6) return 1;
138: if (c >= 0xb9 && c <= 0xbf) return 1;
139: if (c >= 0xc0 && c <= 0xd6) return 1;
140: if (c >= 0xd8 && c <= 0xf6) return 1;
141: if (c >= 0xf8 && c <= 0xfe) return 1;
142: break;
143:
144: case ISO_8859_5:
145: if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
146: if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
147: break;
148:
149: case ISO_8859_6:
150: if (c >= 0xc1 && c <= 0xda) return 1;
151: if (c >= 0xe0 && c <= 0xf2) return 1;
152: break;
153:
154: case ISO_8859_7:
155: if (c == 0xb6) return 1;
156: if (c >= 0xb8 && c <= 0xba) return 1;
157: if (c == 0xbc) return 1;
158: if (c >= 0xbe && c <= 0xbf) return 1;
159: if (c == 0xc0) return 1;
160: if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
161: if (c >= 0xdc && c <= 0xfe) return 1;
162: break;
163:
164: case ISO_8859_8:
165: if (c == 0xb5) return 1;
166: if (c >= 0xe0 && c <= 0xfa) return 1;
167: break;
168:
169: case ISO_8859_10:
170: if (c >= 0xa1 && c <= 0xa6) return 1;
171: if (c >= 0xa8 && c <= 0xac) return 1;
172: if (c == 0xae || c == 0xaf) return 1;
173: if (c >= 0xb1 && c <= 0xb6) return 1;
174: if (c >= 0xb8 && c <= 0xbc) return 1;
175: if (c >= 0xbe && c <= 0xff) return 1;
176: break;
177:
178: case ISO_8859_11:
179: if (c >= 0xa1 && c <= 0xda) return 1;
180: if (c >= 0xdf && c <= 0xfb) return 1;
181: break;
182:
183: case ISO_8859_13:
184: if (c == 0xa8) return 1;
185: if (c == 0xaa) return 1;
186: if (c == 0xaf) return 1;
187: if (c == 0xb5) return 1;
188: if (c == 0xb8) return 1;
189: if (c == 0xba) return 1;
190: if (c >= 0xbf && c <= 0xd6) return 1;
191: if (c >= 0xd8 && c <= 0xf6) return 1;
192: if (c >= 0xf8 && c <= 0xfe) return 1;
193: break;
194:
195: case ISO_8859_14:
196: if (c == 0xa1 || c == 0xa2) return 1;
197: if (c == 0xa4 || c == 0xa5) return 1;
198: if (c == 0xa6 || c == 0xa8) return 1;
199: if (c >= 0xaa && c <= 0xac) return 1;
200: if (c >= 0xaf && c <= 0xb5) return 1;
201: if (c >= 0xb7 && c <= 0xff) return 1;
202: break;
203:
204: case ISO_8859_15:
205: if (c == 0xaa) return 1;
206: if (c == 0xb5) return 1;
207: if (c == 0xba) return 1;
208: if (c >= 0xc0 && c <= 0xd6) return 1;
209: if (c >= 0xd8 && c <= 0xf6) return 1;
210: if (c >= 0xf8 && c <= 0xff) return 1;
211: if (c == 0xa6) return 1;
212: if (c == 0xa8) return 1;
213: if (c == 0xb4) return 1;
214: if (c == 0xb8) return 1;
215: if (c == 0xbc) return 1;
216: if (c == 0xbd) return 1;
217: if (c == 0xbe) return 1;
218: break;
219:
220: case ISO_8859_16:
221: if (c == 0xa1) return 1;
222: if (c == 0xa2) return 1;
223: if (c == 0xa3) return 1;
224: if (c == 0xa6) return 1;
225: if (c == 0xa8) return 1;
226: if (c == 0xaa) return 1;
227: if (c == 0xac) return 1;
228: if (c == 0xae) return 1;
229: if (c == 0xaf) return 1;
230: if (c == 0xb2) return 1;
231: if (c == 0xb3) return 1;
232: if (c == 0xb4) return 1;
233: if (c >= 0xb8 && c <= 0xba) return 1;
234: if (c == 0xbc) return 1;
235: if (c == 0xbd) return 1;
236: if (c == 0xbe) return 1;
237: if (c == 0xbf) return 1;
238: if (c >= 0xc0 && c <= 0xde) return 1;
239: if (c >= 0xdf && c <= 0xff) return 1;
240: break;
241:
242: case KOI8_R:
243: if (c == 0xa3 || c == 0xb3) return 1;
244: /* fall */
245: case KOI8:
246: if (c >= 0xc0 && c <= 0xff) return 1;
247: break;
248:
249: default:
250: exit(-1);
251: }
252:
253: return 0;
254: }
255:
256: static int IsBlank(int enc, int c)
257: {
258: if (c == 0x09 || c == 0x20) return 1;
259:
260: switch (enc) {
261: case UNICODE_ISO_8859_1:
262: case ISO_8859_1:
263: case ISO_8859_2:
264: case ISO_8859_3:
265: case ISO_8859_4:
266: case ISO_8859_5:
267: case ISO_8859_6:
268: case ISO_8859_7:
269: case ISO_8859_8:
270: case ISO_8859_9:
271: case ISO_8859_10:
272: case ISO_8859_11:
273: case ISO_8859_13:
274: case ISO_8859_14:
275: case ISO_8859_15:
276: case ISO_8859_16:
277: case KOI8:
278: if (c == 0xa0) return 1;
279: break;
280:
281: case KOI8_R:
282: if (c == 0x9a) return 1;
283: break;
284:
285: default:
286: exit(-1);
287: }
288:
289: return 0;
290: }
291:
292: static int IsCntrl(int enc, int c)
293: {
294: if (c >= 0x00 && c <= 0x1F) return 1;
295:
296: switch (enc) {
297: case UNICODE_ISO_8859_1:
298: if (c == 0xad) return 1;
299: /* fall */
300: case ISO_8859_1:
301: case ISO_8859_2:
302: case ISO_8859_3:
303: case ISO_8859_4:
304: case ISO_8859_5:
305: case ISO_8859_6:
306: case ISO_8859_7:
307: case ISO_8859_8:
308: case ISO_8859_9:
309: case ISO_8859_10:
310: case ISO_8859_11:
311: case ISO_8859_13:
312: case ISO_8859_14:
313: case ISO_8859_15:
314: case ISO_8859_16:
315: case KOI8:
316: if (c >= 0x7f && c <= 0x9F) return 1;
317: break;
318:
319:
320: case KOI8_R:
321: if (c == 0x7f) return 1;
322: break;
323:
324: default:
325: exit(-1);
326: }
327:
328: return 0;
329: }
330:
331: static int IsDigit(int enc, int c)
332: {
333: if (c >= 0x30 && c <= 0x39) return 1;
334: return 0;
335: }
336:
337: static int IsGraph(int enc, int c)
338: {
339: if (c >= 0x21 && c <= 0x7e) return 1;
340:
341: switch (enc) {
342: case UNICODE_ISO_8859_1:
343: case ISO_8859_1:
344: case ISO_8859_2:
345: case ISO_8859_4:
346: case ISO_8859_5:
347: case ISO_8859_9:
348: case ISO_8859_10:
349: case ISO_8859_13:
350: case ISO_8859_14:
351: case ISO_8859_15:
352: case ISO_8859_16:
353: if (c >= 0xa1 && c <= 0xff) return 1;
354: break;
355:
356: case ISO_8859_3:
357: if (c >= 0xa1) {
358: if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
359: c == 0xe3 || c == 0xf0)
360: return 0;
361: else
362: return 1;
363: }
364: break;
365:
366: case ISO_8859_6:
367: if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
368: return 1;
369: if (c >= 0xc1 && c <= 0xda) return 1;
370: if (c >= 0xe0 && c <= 0xf2) return 1;
371: break;
372:
373: case ISO_8859_7:
374: if (c >= 0xa1 && c <= 0xfe &&
375: c != 0xa4 && c != 0xa5 && c != 0xaa &&
376: c != 0xae && c != 0xd2) return 1;
377: break;
378:
379: case ISO_8859_8:
380: if (c >= 0xa2 && c <= 0xfa) {
381: if (c >= 0xbf && c <= 0xde) return 0;
382: return 1;
383: }
384: break;
385:
386: case ISO_8859_11:
387: if (c >= 0xa1 && c <= 0xda) return 1;
388: if (c >= 0xdf && c <= 0xfb) return 1;
389: break;
390:
391: case KOI8:
392: if (c >= 0xc0 && c <= 0xff) return 1;
393: break;
394:
395: case KOI8_R:
396: if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1;
397: break;
398:
399: default:
400: exit(-1);
401: }
402:
403: return 0;
404: }
405:
406: static int IsLower(int enc, int c)
407: {
408: if (c >= 0x61 && c <= 0x7a) return 1;
409:
410: switch (enc) {
411: case UNICODE_ISO_8859_1:
412: case ISO_8859_1:
413: case ISO_8859_9:
414: if (c == 0xaa) return 1;
415: if (c == 0xb5) return 1;
416: if (c == 0xba) return 1;
417: if (c >= 0xdf && c <= 0xf6) return 1;
418: if (c >= 0xf8 && c <= 0xff) return 1;
419: break;
420:
421: case ISO_8859_2:
422: if (c == 0xb1 || c == 0xb3) return 1;
423: if (c == 0xb5 || c == 0xb6) return 1;
424: if (c >= 0xb9 && c <= 0xbc) return 1;
425: if (c >= 0xbe && c <= 0xbf) return 1;
426: if (c >= 0xdf && c <= 0xf6) return 1;
427: if (c >= 0xf8 && c <= 0xfe) return 1;
428: break;
429:
430: case ISO_8859_3:
431: if (c == 0xb1) return 1;
432: if (c == 0xb5 || c == 0xb6) return 1;
433: if (c >= 0xb9 && c <= 0xbc) return 1;
434: if (c == 0xbf) return 1;
435: if (c == 0xdf) return 1;
436: if (c >= 0xe0 && c <= 0xe2) return 1;
437: if (c >= 0xe4 && c <= 0xef) return 1;
438: if (c >= 0xf1 && c <= 0xf6) return 1;
439: if (c >= 0xf8 && c <= 0xfe) return 1;
440: break;
441:
442: case ISO_8859_4:
443: if (c == 0xa2) return 1;
444: if (c == 0xb1 || c == 0xb3) return 1;
445: if (c == 0xb5 || c == 0xb6) return 1;
446: if (c >= 0xb9 && c <= 0xbc) return 1;
447: if (c >= 0xbe && c <= 0xbf) return 1;
448: if (c == 0xdf) return 1;
449: if (c >= 0xe0 && c <= 0xf6) return 1;
450: if (c >= 0xf8 && c <= 0xfe) return 1;
451: break;
452:
453: case ISO_8859_5:
454: if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
455: break;
456:
457: case ISO_8859_6:
458: break;
459:
460: case ISO_8859_7:
461: if (c == 0xc0) return 1;
462: if (c >= 0xdc && c <= 0xfe) return 1;
463: break;
464:
465: case ISO_8859_8:
466: if (c == 0xb5) return 1;
467: break;
468:
469: case ISO_8859_10:
470: if (c >= 0xb1 && c <= 0xb6) return 1;
471: if (c >= 0xb8 && c <= 0xbc) return 1;
472: if (c == 0xbe || c == 0xbf) return 1;
473: if (c >= 0xdf && c <= 0xff) return 1;
474: break;
475:
476: case ISO_8859_11:
477: break;
478:
479: case ISO_8859_13:
480: if (c == 0xb5) return 1;
481: if (c == 0xb8) return 1;
482: if (c == 0xba) return 1;
483: if (c == 0xbf) return 1;
484: if (c >= 0xdf && c <= 0xf6) return 1;
485: if (c >= 0xf8 && c <= 0xfe) return 1;
486: break;
487:
488: case ISO_8859_14:
489: if (c == 0xa2) return 1;
490: if (c == 0xa5) return 1;
491: if (c == 0xab) return 1;
492: if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1;
493: if (c >= 0xb8 && c <= 0xba) return 1;
494: if (c == 0xbc) return 1;
495: if (c == 0xbe || c == 0xbf) return 1;
496: if (c >= 0xdf && c <= 0xff) return 1;
497: break;
498:
499: case ISO_8859_15:
500: if (c == 0xaa) return 1;
501: if (c == 0xb5) return 1;
502: if (c == 0xba) return 1;
503: if (c >= 0xdf && c <= 0xf6) return 1;
504: if (c >= 0xf8 && c <= 0xff) return 1;
505: if (c == 0xa8) return 1;
506: if (c == 0xb8) return 1;
507: if (c == 0xbd) return 1;
508: break;
509:
510: case ISO_8859_16:
511: if (c == 0xa2) return 1;
512: if (c == 0xa8) return 1;
513: if (c == 0xae) return 1;
514: if (c == 0xb3) return 1;
515: if (c >= 0xb8 && c <= 0xba) return 1;
516: if (c == 0xbd) return 1;
517: if (c == 0xbf) return 1;
518: if (c >= 0xdf && c <= 0xff) return 1;
519: break;
520:
521: case KOI8_R:
522: if (c == 0xa3) return 1;
523: /* fall */
524: case KOI8:
525: if (c >= 0xc0 && c <= 0xdf) return 1;
526: break;
527:
528: default:
529: exit(-1);
530: }
531:
532: return 0;
533: }
534:
535: static int IsPrint(int enc, int c)
536: {
537: if (c >= 0x20 && c <= 0x7e) return 1;
538:
539: switch (enc) {
540: case UNICODE_ISO_8859_1:
541: if (c >= 0x09 && c <= 0x0d) return 1;
542: if (c == 0x85) return 1;
543: /* fall */
544: case ISO_8859_1:
545: case ISO_8859_2:
546: case ISO_8859_4:
547: case ISO_8859_5:
548: case ISO_8859_9:
549: case ISO_8859_10:
550: case ISO_8859_13:
551: case ISO_8859_14:
552: case ISO_8859_15:
553: case ISO_8859_16:
554: if (c >= 0xa0 && c <= 0xff) return 1;
555: break;
556:
557: case ISO_8859_3:
558: if (c >= 0xa0) {
559: if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
560: c == 0xe3 || c == 0xf0)
561: return 0;
562: else
563: return 1;
564: }
565: break;
566:
567: case ISO_8859_6:
568: if (c == 0xa0) return 1;
569: if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
570: return 1;
571: if (c >= 0xc1 && c <= 0xda) return 1;
572: if (c >= 0xe0 && c <= 0xf2) return 1;
573: break;
574:
575: case ISO_8859_7:
576: if (c >= 0xa0 && c <= 0xfe &&
577: c != 0xa4 && c != 0xa5 && c != 0xaa &&
578: c != 0xae && c != 0xd2) return 1;
579: break;
580:
581: case ISO_8859_8:
582: if (c >= 0xa0 && c <= 0xfa) {
583: if (c >= 0xbf && c <= 0xde) return 0;
584: if (c == 0xa1) return 0;
585: return 1;
586: }
587: break;
588:
589: case ISO_8859_11:
590: if (c >= 0xa0 && c <= 0xda) return 1;
591: if (c >= 0xdf && c <= 0xfb) return 1;
592: break;
593:
594: case KOI8:
595: if (c == 0xa0) return 1;
596: if (c >= 0xc0 && c <= 0xff) return 1;
597: break;
598:
599: case KOI8_R:
600: if (c >= 0x80 && c <= 0xff) return 1;
601: break;
602:
603: default:
604: exit(-1);
605: }
606:
607: return 0;
608: }
609:
610: static int IsPunct(int enc, int c)
611: {
612: if (enc == UNICODE_ISO_8859_1) {
613: if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
614: c == 0x7c || c == 0x7e) return 1;
615: if (c >= 0x3c && c <= 0x3e) return 1;
616: }
617:
618: if (c >= 0x21 && c <= 0x2f) return 1;
619: if (c >= 0x3a && c <= 0x40) return 1;
620: if (c >= 0x5b && c <= 0x60) return 1;
621: if (c >= 0x7b && c <= 0x7e) return 1;
622:
623: switch (enc) {
624: case ISO_8859_1:
625: case ISO_8859_9:
626: case ISO_8859_15:
627: if (c == 0xad) return 1;
628: /* fall */
629: case UNICODE_ISO_8859_1:
630: if (c == 0xa1) return 1;
631: if (c == 0xab) return 1;
632: if (c == 0xb7) return 1;
633: if (c == 0xbb) return 1;
634: if (c == 0xbf) return 1;
635: break;
636:
637: case ISO_8859_2:
638: case ISO_8859_4:
639: case ISO_8859_5:
640: case ISO_8859_14:
641: if (c == 0xad) return 1;
642: break;
643:
644: case ISO_8859_3:
645: case ISO_8859_10:
646: if (c == 0xad) return 1;
647: if (c == 0xb7) return 1;
648: if (c == 0xbd) return 1;
649: break;
650:
651: case ISO_8859_6:
652: if (c == 0xac) return 1;
653: if (c == 0xad) return 1;
654: if (c == 0xbb) return 1;
655: if (c == 0xbf) return 1;
656: break;
657:
658: case ISO_8859_7:
659: if (c == 0xa1 || c == 0xa2) return 1;
660: if (c == 0xab) return 1;
661: if (c == 0xaf) return 1;
662: if (c == 0xad) return 1;
663: if (c == 0xb7 || c == 0xbb) return 1;
664: break;
665:
666: case ISO_8859_8:
667: if (c == 0xab) return 1;
668: if (c == 0xad) return 1;
669: if (c == 0xb7) return 1;
670: if (c == 0xbb) return 1;
671: if (c == 0xdf) return 1;
672: break;
673:
674: case ISO_8859_13:
675: if (c == 0xa1 || c == 0xa5) return 1;
676: if (c == 0xab || c == 0xad) return 1;
677: if (c == 0xb4 || c == 0xb7) return 1;
678: if (c == 0xbb) return 1;
679: if (c == 0xff) return 1;
680: break;
681:
682: case ISO_8859_16:
683: if (c == 0xa5) return 1;
684: if (c == 0xab) return 1;
685: if (c == 0xad) return 1;
686: if (c == 0xb5) return 1;
687: if (c == 0xb7) return 1;
688: if (c == 0xbb) return 1;
689: break;
690:
691: case KOI8_R:
692: if (c == 0x9e) return 1;
693: break;
694:
695: case ISO_8859_11:
696: case KOI8:
697: break;
698:
699: default:
700: exit(-1);
701: }
702:
703: return 0;
704: }
705:
706: static int IsSpace(int enc, int c)
707: {
708: if (c >= 0x09 && c <= 0x0d) return 1;
709: if (c == 0x20) return 1;
710:
711: switch (enc) {
712: case UNICODE_ISO_8859_1:
713: if (c == 0x85) return 1;
714: /* fall */
715: case ISO_8859_1:
716: case ISO_8859_2:
717: case ISO_8859_3:
718: case ISO_8859_4:
719: case ISO_8859_5:
720: case ISO_8859_6:
721: case ISO_8859_7:
722: case ISO_8859_8:
723: case ISO_8859_9:
724: case ISO_8859_10:
725: case ISO_8859_11:
726: case ISO_8859_13:
727: case ISO_8859_14:
728: case ISO_8859_15:
729: case ISO_8859_16:
730: case KOI8:
731: if (c == 0xa0) return 1;
732: break;
733:
734: case KOI8_R:
735: if (c == 0x9a) return 1;
736: break;
737:
738: default:
739: exit(-1);
740: }
741:
742: return 0;
743: }
744:
745: static int IsUpper(int enc, int c)
746: {
747: if (c >= 0x41 && c <= 0x5a) return 1;
748:
749: switch (enc) {
750: case UNICODE_ISO_8859_1:
751: case ISO_8859_1:
752: case ISO_8859_9:
753: if (c >= 0xc0 && c <= 0xd6) return 1;
754: if (c >= 0xd8 && c <= 0xde) return 1;
755: break;
756:
757: case ISO_8859_2:
758: if (c == 0xa1 || c == 0xa3) return 1;
759: if (c == 0xa5 || c == 0xa6) return 1;
760: if (c >= 0xa9 && c <= 0xac) return 1;
761: if (c >= 0xae && c <= 0xaf) return 1;
762: if (c >= 0xc0 && c <= 0xd6) return 1;
763: if (c >= 0xd8 && c <= 0xde) return 1;
764: break;
765:
766: case ISO_8859_3:
767: if (c == 0xa1) return 1;
768: if (c == 0xa6) return 1;
769: if (c >= 0xa9 && c <= 0xac) return 1;
770: if (c == 0xaf) return 1;
771: if (c >= 0xc0 && c <= 0xc2) return 1;
772: if (c >= 0xc4 && c <= 0xcf) return 1;
773: if (c >= 0xd1 && c <= 0xd6) return 1;
774: if (c >= 0xd8 && c <= 0xde) return 1;
775: break;
776:
777: case ISO_8859_4:
778: if (c == 0xa1 || c == 0xa3) return 1;
779: if (c == 0xa5 || c == 0xa6) return 1;
780: if (c >= 0xa9 && c <= 0xac) return 1;
781: if (c == 0xae) return 1;
782: if (c == 0xbd) return 1;
783: if (c >= 0xc0 && c <= 0xd6) return 1;
784: if (c >= 0xd8 && c <= 0xde) return 1;
785: break;
786:
787: case ISO_8859_5:
788: if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
789: break;
790:
791: case ISO_8859_6:
792: break;
793:
794: case ISO_8859_7:
795: if (c == 0xb6) return 1;
796: if (c >= 0xb8 && c <= 0xba) return 1;
797: if (c == 0xbc) return 1;
798: if (c >= 0xbe && c <= 0xbf) return 1;
799: if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
800: break;
801:
802: case ISO_8859_8:
803: case ISO_8859_11:
804: break;
805:
806: case ISO_8859_10:
807: if (c >= 0xa1 && c <= 0xa6) return 1;
808: if (c >= 0xa8 && c <= 0xac) return 1;
809: if (c == 0xae || c == 0xaf) return 1;
810: if (c >= 0xc0 && c <= 0xde) return 1;
811: break;
812:
813: case ISO_8859_13:
814: if (c == 0xa8) return 1;
815: if (c == 0xaa) return 1;
816: if (c == 0xaf) return 1;
817: if (c >= 0xc0 && c <= 0xd6) return 1;
818: if (c >= 0xd8 && c <= 0xde) return 1;
819: break;
820:
821: case ISO_8859_14:
822: if (c == 0xa1) return 1;
823: if (c == 0xa4 || c == 0xa6) return 1;
824: if (c == 0xa8) return 1;
825: if (c == 0xaa || c == 0xac) return 1;
826: if (c == 0xaf || c == 0xb0) return 1;
827: if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1;
828: if (c == 0xbb || c == 0xbd) return 1;
829: if (c >= 0xc0 && c <= 0xde) return 1;
830: break;
831:
832: case ISO_8859_15:
833: if (c >= 0xc0 && c <= 0xd6) return 1;
834: if (c >= 0xd8 && c <= 0xde) return 1;
835: if (c == 0xa6) return 1;
836: if (c == 0xb4) return 1;
837: if (c == 0xbc) return 1;
838: if (c == 0xbe) return 1;
839: break;
840:
841: case ISO_8859_16:
842: if (c == 0xa1) return 1;
843: if (c == 0xa3) return 1;
844: if (c == 0xa6) return 1;
845: if (c == 0xaa) return 1;
846: if (c == 0xac) return 1;
847: if (c == 0xaf) return 1;
848: if (c == 0xb2) return 1;
849: if (c == 0xb4) return 1;
850: if (c == 0xbc) return 1;
851: if (c == 0xbe) return 1;
852: if (c >= 0xc0 && c <= 0xde) return 1;
853: break;
854:
855: case KOI8_R:
856: if (c == 0xb3) return 1;
857: /* fall */
858: case KOI8:
859: if (c >= 0xe0 && c <= 0xff) return 1;
860: break;
861:
862: default:
863: exit(-1);
864: }
865:
866: return 0;
867: }
868:
869: static int IsXDigit(int enc, int c)
870: {
871: if (c >= 0x30 && c <= 0x39) return 1;
872: if (c >= 0x41 && c <= 0x46) return 1;
873: if (c >= 0x61 && c <= 0x66) return 1;
874: return 0;
875: }
876:
877: static int IsWord(int enc, int c)
878: {
879: if (c >= 0x30 && c <= 0x39) return 1;
880: if (c >= 0x41 && c <= 0x5a) return 1;
881: if (c == 0x5f) return 1;
882: if (c >= 0x61 && c <= 0x7a) return 1;
883:
884: switch (enc) {
885: case UNICODE_ISO_8859_1:
886: case ISO_8859_1:
887: case ISO_8859_9:
888: if (c == 0xaa) return 1;
889: if (c >= 0xb2 && c <= 0xb3) return 1;
890: if (c == 0xb5) return 1;
891: if (c >= 0xb9 && c <= 0xba) return 1;
892: if (c >= 0xbc && c <= 0xbe) return 1;
893: if (c >= 0xc0 && c <= 0xd6) return 1;
894: if (c >= 0xd8 && c <= 0xf6) return 1;
895: if (c >= 0xf8 && c <= 0xff) return 1;
896: break;
897:
898: case ISO_8859_2:
899: if (c == 0xa1 || c == 0xa3) return 1;
900: if (c == 0xa5 || c == 0xa6) return 1;
901: if (c >= 0xa9 && c <= 0xac) return 1;
902: if (c >= 0xae && c <= 0xaf) return 1;
903: if (c == 0xb1 || c == 0xb3) return 1;
904: if (c == 0xb5 || c == 0xb6) return 1;
905: if (c >= 0xb9 && c <= 0xbc) return 1;
906: if (c >= 0xbe && c <= 0xbf) return 1;
907: if (c >= 0xc0 && c <= 0xd6) return 1;
908: if (c >= 0xd8 && c <= 0xf6) return 1;
909: if (c >= 0xf8 && c <= 0xfe) return 1;
910: break;
911:
912: case ISO_8859_3:
913: if (c == 0xa1) return 1;
914: if (c == 0xa6) return 1;
915: if (c >= 0xa9 && c <= 0xac) return 1;
916: if (c == 0xaf) return 1;
917: if (c >= 0xb1 && c <= 0xb3) return 1;
918: if (c == 0xb5 || c == 0xb6) return 1;
919: if (c >= 0xb9 && c <= 0xbd) return 1;
920: if (c == 0xbf) return 1;
921: if (c >= 0xc0 && c <= 0xc2) return 1;
922: if (c >= 0xc4 && c <= 0xcf) return 1;
923: if (c >= 0xd1 && c <= 0xd6) return 1;
924: if (c >= 0xd8 && c <= 0xe2) return 1;
925: if (c >= 0xe4 && c <= 0xef) return 1;
926: if (c >= 0xf1 && c <= 0xf6) return 1;
927: if (c >= 0xf8 && c <= 0xfe) return 1;
928: break;
929:
930: case ISO_8859_4:
931: if (c >= 0xa1 && c <= 0xa3) return 1;
932: if (c == 0xa5 || c == 0xa6) return 1;
933: if (c >= 0xa9 && c <= 0xac) return 1;
934: if (c == 0xae) return 1;
935: if (c == 0xb1 || c == 0xb3) return 1;
936: if (c == 0xb5 || c == 0xb6) return 1;
937: if (c >= 0xb9 && c <= 0xbf) return 1;
938: if (c >= 0xc0 && c <= 0xd6) return 1;
939: if (c >= 0xd8 && c <= 0xf6) return 1;
940: if (c >= 0xf8 && c <= 0xfe) return 1;
941: break;
942:
943: case ISO_8859_5:
944: if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
945: if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
946: break;
947:
948: case ISO_8859_6:
949: if (c >= 0xc1 && c <= 0xda) return 1;
950: if (c >= 0xe0 && c <= 0xea) return 1;
951: if (c >= 0xeb && c <= 0xf2) return 1;
952: break;
953:
954: case ISO_8859_7:
955: if (c == 0xb2 || c == 0xb3) return 1;
956: if (c == 0xb6) return 1;
957: if (c >= 0xb8 && c <= 0xba) return 1;
958: if (c >= 0xbc && c <= 0xbf) return 1;
959: if (c == 0xc0) return 1;
960: if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
961: if (c >= 0xdc && c <= 0xfe) return 1;
962: break;
963:
964: case ISO_8859_8:
965: if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
966: if (c >= 0xbc && c <= 0xbe) return 1;
967: if (c >= 0xe0 && c <= 0xfa) return 1;
968: break;
969:
970: case ISO_8859_10:
971: if (c >= 0xa1 && c <= 0xff) {
972: if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd)
973: return 1;
974: }
975: break;
976:
977: case ISO_8859_11:
978: if (c >= 0xa1 && c <= 0xda) return 1;
979: if (c >= 0xdf && c <= 0xfb) return 1;
980: break;
981:
982: case ISO_8859_13:
983: if (c == 0xa8) return 1;
984: if (c == 0xaa) return 1;
985: if (c == 0xaf) return 1;
986: if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
987: if (c >= 0xbc && c <= 0xbe) return 1;
988: if (c == 0xb8) return 1;
989: if (c == 0xba) return 1;
990: if (c >= 0xbf && c <= 0xd6) return 1;
991: if (c >= 0xd8 && c <= 0xf6) return 1;
992: if (c >= 0xf8 && c <= 0xfe) return 1;
993: break;
994:
995: case ISO_8859_14:
996: if (c >= 0xa1 && c <= 0xff) {
997: if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae ||
998: c == 0xb6) return 0;
999: return 1;
1000: }
1001: break;
1002:
1003: case ISO_8859_15:
1004: if (c == 0xaa) return 1;
1005: if (c >= 0xb2 && c <= 0xb3) return 1;
1006: if (c == 0xb5) return 1;
1007: if (c >= 0xb9 && c <= 0xba) return 1;
1008: if (c >= 0xbc && c <= 0xbe) return 1;
1009: if (c >= 0xc0 && c <= 0xd6) return 1;
1010: if (c >= 0xd8 && c <= 0xf6) return 1;
1011: if (c >= 0xf8 && c <= 0xff) return 1;
1012: if (c == 0xa6) return 1;
1013: if (c == 0xa8) return 1;
1014: if (c == 0xb4) return 1;
1015: if (c == 0xb8) return 1;
1016: break;
1017:
1018: case ISO_8859_16:
1019: if (c == 0xa1) return 1;
1020: if (c == 0xa2) return 1;
1021: if (c == 0xa3) return 1;
1022: if (c == 0xa6) return 1;
1023: if (c == 0xa8) return 1;
1024: if (c == 0xaa) return 1;
1025: if (c == 0xac) return 1;
1026: if (c == 0xae) return 1;
1027: if (c == 0xaf) return 1;
1028: if (c == 0xb2) return 1;
1029: if (c == 0xb3) return 1;
1030: if (c == 0xb4) return 1;
1031: if (c >= 0xb8 && c <= 0xba) return 1;
1032: if (c == 0xbc) return 1;
1033: if (c == 0xbd) return 1;
1034: if (c == 0xbe) return 1;
1035: if (c == 0xbf) return 1;
1036: if (c >= 0xc0 && c <= 0xde) return 1;
1037: if (c >= 0xdf && c <= 0xff) return 1;
1038: break;
1039:
1040: case KOI8_R:
1041: if (c == 0x9d) return 1;
1042: if (c == 0xa3 || c == 0xb3) return 1;
1043: /* fall */
1044: case KOI8:
1045: if (c >= 0xc0 && c <= 0xff) return 1;
1046: break;
1047:
1048: default:
1049: exit(-1);
1050: }
1051:
1052: return 0;
1053: }
1054:
1055: static int IsAscii(int enc, int c)
1056: {
1057: if (c >= 0x00 && c <= 0x7f) return 1;
1058: return 0;
1059: }
1060:
1061: static int IsNewline(int enc, int c)
1062: {
1063: if (c == 0x0a) return 1;
1064: return 0;
1065: }
1066:
1067: static int exec(FILE* fp, ENC_INFO* einfo)
1068: {
1069: #define NCOL 8
1070:
1071: int c, val, enc;
1072:
1073: enc = einfo->num;
1074:
1075: fprintf(fp, "static unsigned short Enc%s_CtypeTable[256] = {\n",
1076: einfo->name);
1077:
1078: for (c = 0; c < 256; c++) {
1079: val = 0;
1080: if (IsNewline(enc, c)) val |= ONIGENC_CTYPE_NEWLINE;
1081: if (IsAlpha (enc, c)) val |= ONIGENC_CTYPE_ALPHA;
1082: if (IsBlank (enc, c)) val |= ONIGENC_CTYPE_BLANK;
1083: if (IsCntrl (enc, c)) val |= ONIGENC_CTYPE_CNTRL;
1084: if (IsDigit (enc, c)) val |= ONIGENC_CTYPE_DIGIT;
1085: if (IsGraph (enc, c)) val |= ONIGENC_CTYPE_GRAPH;
1086: if (IsLower (enc, c)) val |= ONIGENC_CTYPE_LOWER;
1087: if (IsPrint (enc, c)) val |= ONIGENC_CTYPE_PRINT;
1088: if (IsPunct (enc, c)) val |= ONIGENC_CTYPE_PUNCT;
1089: if (IsSpace (enc, c)) val |= ONIGENC_CTYPE_SPACE;
1090: if (IsUpper (enc, c)) val |= ONIGENC_CTYPE_UPPER;
1091: if (IsXDigit(enc, c)) val |= ONIGENC_CTYPE_XDIGIT;
1092: if (IsWord (enc, c)) val |= ONIGENC_CTYPE_WORD;
1093: if (IsAscii (enc, c)) val |= ONIGENC_CTYPE_ASCII;
1094:
1095: if (c % NCOL == 0) fputs(" ", fp);
1096: fprintf(fp, "0x%04x", val);
1097: if (c != 255) fputs(",", fp);
1098: if (c != 0 && c % NCOL == (NCOL-1))
1099: fputs("\n", fp);
1100: else
1101: fputs(" ", fp);
1102: }
1103: fprintf(fp, "};\n");
1104: return 0;
1105: }
1106:
1107: extern int main(int argc, char* argv[])
1108: {
1109: int i;
1110: FILE* fp = stdout;
1111:
1112: for (i = 0; i < sizeof(Info)/sizeof(ENC_INFO); i++) {
1113: exec(fp, &Info[i]);
1114: }
1115: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>