![]() ![]() | ![]() |
1.1 misho 1: /*
2: * charset=UTF-8
3: * vim600: encoding=utf-8
4: */
5:
6: /*
7: * "streamable kanji code filter and converter"
8: *
9: * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
10: *
11: * This software is released under the GNU Lesser General Public License.
12: * (Version 2.1, February 1999)
13: * Please read the following detail of the licence (in japanese).
14: *
15: * ◆使用許諾条件◆
16: *
17: * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
18: * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
19: * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
20: * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
21: * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
22: * することはできません。
23: *
24: * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
25: * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
26: * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
27: * による許諾を得る必要があります。
28: *
29: * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
30: * ます。「GNU Lesser General Public License」とは、これまでLibrary General
31: * Public Licenseと呼ばれていたものです。
32: * http://www.gnu.org/ --- GNUウェブサイト
33: * http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
34: * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
35: *
36: * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
37: * はありません。
38: *
39: * ◆保証内容◆
40: *
41: * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
42: * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
43: * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
44: * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
45: * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
46: * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
47: * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
48: * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
49: * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
50: * 契約・規定に優先します。
51: *
52: * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
53: *
54: * 〒102-0073
55: * 東京都千代田区九段北1-13-5日本地所第一ビル4F
56: * 株式会社ハッピーサイズ
57: * Phone: 03-3512-3655, Fax: 03-3512-3656
58: * Email: sales@happysize.co.jp
59: * Web: http://happysize.com/
60: *
61: * ◆著者◆
62: *
63: * 金本 茂 <sgk@happysize.co.jp>
64: *
65: * ◆履歴◆
66: *
67: * 1998/11/10 sgk implementation in C++
68: * 1999/4/25 sgk Cで書きなおし。
69: * 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
70: * 1999/6/?? Unicodeサポート。
71: * 1999/6/22 sgk ライセンスをLGPLに変更。
72: *
73: */
74:
75: /*
76: * Unicode support
77: *
78: * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
79: * All rights reserved.
80: *
81: */
82:
83:
84: #ifdef HAVE_CONFIG_H
85: #include "config.h"
86: #endif
87:
88: #include <stddef.h>
89:
90: #ifdef HAVE_STRING_H
91: #include <string.h>
92: #endif
93:
94: #ifdef HAVE_STRINGS_H
95: #include <strings.h>
96: #endif
97:
98: #ifdef HAVE_STDDEF_H
99: #include <stddef.h>
100: #endif
101:
102: #include "mbfilter.h"
103: #include "mbfl_filter_output.h"
104: #include "mbfilter_pass.h"
105: #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
106:
107: #include "eaw_table.h"
108:
109: /* hex character table "0123456789ABCDEF" */
110: static char mbfl_hexchar_table[] = {
111: 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
112: };
113:
114:
115:
116: /*
117: * encoding filter
118: */
119: #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
120:
121:
122: /*
123: * buffering converter
124: */
125: mbfl_buffer_converter *
126: mbfl_buffer_converter_new(
127: enum mbfl_no_encoding from,
128: enum mbfl_no_encoding to,
129: int buf_initsz)
130: {
1.1.1.2 misho 131: const mbfl_encoding *_from = mbfl_no2encoding(from);
132: const mbfl_encoding *_to = mbfl_no2encoding(to);
133:
134: return mbfl_buffer_converter_new2(_from ? _from: &mbfl_encoding_pass, _to ? _to: &mbfl_encoding_pass, buf_initsz);
135: }
136:
137: mbfl_buffer_converter *
138: mbfl_buffer_converter_new2(
139: const mbfl_encoding *from,
140: const mbfl_encoding *to,
141: int buf_initsz)
142: {
1.1 misho 143: mbfl_buffer_converter *convd;
144:
145: /* allocate */
146: convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof (mbfl_buffer_converter));
147: if (convd == NULL) {
148: return NULL;
149: }
150:
151: /* initialize */
1.1.1.2 misho 152: convd->from = from;
153: convd->to = to;
1.1 misho 154:
155: /* create convert filter */
156: convd->filter1 = NULL;
157: convd->filter2 = NULL;
158: if (mbfl_convert_filter_get_vtbl(convd->from->no_encoding, convd->to->no_encoding) != NULL) {
159: convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
160: } else {
161: convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
162: if (convd->filter2 != NULL) {
163: convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding,
164: mbfl_no_encoding_wchar,
165: (int (*)(int, void*))convd->filter2->filter_function,
166: (int (*)(void*))convd->filter2->filter_flush,
167: convd->filter2);
168: if (convd->filter1 == NULL) {
169: mbfl_convert_filter_delete(convd->filter2);
170: }
171: }
172: }
173: if (convd->filter1 == NULL) {
174: return NULL;
175: }
176:
177: mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
178:
179: return convd;
180: }
181:
1.1.1.2 misho 182:
1.1 misho 183: void
184: mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
185: {
186: if (convd != NULL) {
187: if (convd->filter1) {
188: mbfl_convert_filter_delete(convd->filter1);
189: }
190: if (convd->filter2) {
191: mbfl_convert_filter_delete(convd->filter2);
192: }
193: mbfl_memory_device_clear(&convd->device);
194: mbfl_free((void*)convd);
195: }
196: }
197:
198: void
199: mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
200: {
201: mbfl_memory_device_reset(&convd->device);
202: }
203:
204: int
205: mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
206: {
207: if (convd != NULL) {
208: if (convd->filter2 != NULL) {
209: convd->filter2->illegal_mode = mode;
210: } else if (convd->filter1 != NULL) {
211: convd->filter1->illegal_mode = mode;
212: } else {
213: return 0;
214: }
215: }
216:
217: return 1;
218: }
219:
220: int
221: mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
222: {
223: if (convd != NULL) {
224: if (convd->filter2 != NULL) {
225: convd->filter2->illegal_substchar = substchar;
226: } else if (convd->filter1 != NULL) {
227: convd->filter1->illegal_substchar = substchar;
228: } else {
229: return 0;
230: }
231: }
232:
233: return 1;
234: }
235:
236: int
237: mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n)
238: {
239: mbfl_convert_filter *filter;
240: int (*filter_function)(int c, mbfl_convert_filter *filter);
241:
242: if (convd != NULL && p != NULL) {
243: filter = convd->filter1;
244: if (filter != NULL) {
245: filter_function = filter->filter_function;
246: while (n > 0) {
247: if ((*filter_function)(*p++, filter) < 0) {
248: break;
249: }
250: n--;
251: }
252: }
253: }
254:
255: return n;
256: }
257:
258: int
259: mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
260: {
1.1.1.2 misho 261: return mbfl_buffer_converter_feed2(convd, string, NULL);
262: }
263:
264: int
265: mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc)
266: {
1.1 misho 267: int n;
268: unsigned char *p;
269: mbfl_convert_filter *filter;
270: int (*filter_function)(int c, mbfl_convert_filter *filter);
271:
272: if (convd == NULL || string == NULL) {
273: return -1;
274: }
275: mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
276: /* feed data */
277: n = string->len;
278: p = string->val;
1.1.1.2 misho 279:
1.1 misho 280: filter = convd->filter1;
281: if (filter != NULL) {
282: filter_function = filter->filter_function;
283: while (n > 0) {
284: if ((*filter_function)(*p++, filter) < 0) {
1.1.1.2 misho 285: if (loc) {
286: *loc = p - string->val;
287: }
1.1 misho 288: return -1;
289: }
290: n--;
291: }
292: }
1.1.1.2 misho 293: if (loc) {
294: *loc = p - string->val;
295: }
1.1 misho 296: return 0;
297: }
298:
1.1.1.2 misho 299:
1.1 misho 300: int
301: mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
302: {
303: if (convd == NULL) {
304: return -1;
305: }
306:
307: if (convd->filter1 != NULL) {
308: mbfl_convert_filter_flush(convd->filter1);
309: }
310: if (convd->filter2 != NULL) {
311: mbfl_convert_filter_flush(convd->filter2);
312: }
313:
314: return 0;
315: }
316:
317: mbfl_string *
318: mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
319: {
320: if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
321: result->no_encoding = convd->to->no_encoding;
322: result->val = convd->device.buffer;
323: result->len = convd->device.pos;
324: } else {
325: result = NULL;
326: }
327:
328: return result;
329: }
330:
331: mbfl_string *
332: mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
333: {
334: if (convd == NULL || result == NULL) {
335: return NULL;
336: }
337: result->no_encoding = convd->to->no_encoding;
338: return mbfl_memory_device_result(&convd->device, result);
339: }
340:
341: mbfl_string *
342: mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
343: mbfl_string *result)
344: {
345: if (convd == NULL || string == NULL || result == NULL) {
346: return NULL;
347: }
348: mbfl_buffer_converter_feed(convd, string);
349: if (convd->filter1 != NULL) {
350: mbfl_convert_filter_flush(convd->filter1);
351: }
352: if (convd->filter2 != NULL) {
353: mbfl_convert_filter_flush(convd->filter2);
354: }
355: result->no_encoding = convd->to->no_encoding;
356: return mbfl_memory_device_result(&convd->device, result);
357: }
358:
359: int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
360: {
361: int num_illegalchars = 0;
362:
363: if (convd == NULL) {
364: return 0;
365: }
366:
367: if (convd->filter1 != NULL) {
368: num_illegalchars += convd->filter1->num_illegalchar;
369: }
370:
371: if (convd->filter2 != NULL) {
372: num_illegalchars += convd->filter2->num_illegalchar;
373: }
374:
375: return (num_illegalchars);
376: }
377:
378: /*
379: * encoding detector
380: */
381: mbfl_encoding_detector *
382: mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict)
383: {
384: mbfl_encoding_detector *identd;
385:
386: int i, num;
387: mbfl_identify_filter *filter;
388:
389: if (elist == NULL || elistsz <= 0) {
390: return NULL;
391: }
392:
393: /* allocate */
394: identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
395: if (identd == NULL) {
396: return NULL;
397: }
398: identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
399: if (identd->filter_list == NULL) {
400: mbfl_free(identd);
401: return NULL;
402: }
403:
404: /* create filters */
405: i = 0;
406: num = 0;
407: while (i < elistsz) {
408: filter = mbfl_identify_filter_new(elist[i]);
409: if (filter != NULL) {
410: identd->filter_list[num] = filter;
411: num++;
412: }
413: i++;
414: }
415: identd->filter_list_size = num;
416:
417: /* set strict flag */
418: identd->strict = strict;
419:
420: return identd;
421: }
422:
1.1.1.2 misho 423: mbfl_encoding_detector *
424: mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict)
425: {
426: mbfl_encoding_detector *identd;
427:
428: int i, num;
429: mbfl_identify_filter *filter;
430:
431: if (elist == NULL || elistsz <= 0) {
432: return NULL;
433: }
434:
435: /* allocate */
436: identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
437: if (identd == NULL) {
438: return NULL;
439: }
440: identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
441: if (identd->filter_list == NULL) {
442: mbfl_free(identd);
443: return NULL;
444: }
445:
446: /* create filters */
447: i = 0;
448: num = 0;
449: while (i < elistsz) {
450: filter = mbfl_identify_filter_new2(elist[i]);
451: if (filter != NULL) {
452: identd->filter_list[num] = filter;
453: num++;
454: }
455: i++;
456: }
457: identd->filter_list_size = num;
458:
459: /* set strict flag */
460: identd->strict = strict;
461:
462: return identd;
463: }
464:
465:
1.1 misho 466: void
467: mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
468: {
469: int i;
470:
471: if (identd != NULL) {
472: if (identd->filter_list != NULL) {
473: i = identd->filter_list_size;
474: while (i > 0) {
475: i--;
476: mbfl_identify_filter_delete(identd->filter_list[i]);
477: }
478: mbfl_free((void *)identd->filter_list);
479: }
480: mbfl_free((void *)identd);
481: }
482: }
483:
484: int
485: mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
486: {
487: int i, n, num, bad, res;
488: unsigned char *p;
489: mbfl_identify_filter *filter;
490:
491: res = 0;
492: /* feed data */
493: if (identd != NULL && string != NULL && string->val != NULL) {
494: num = identd->filter_list_size;
495: n = string->len;
496: p = string->val;
497: bad = 0;
498: while (n > 0) {
499: for (i = 0; i < num; i++) {
500: filter = identd->filter_list[i];
501: if (!filter->flag) {
502: (*filter->filter_function)(*p, filter);
503: if (filter->flag) {
504: bad++;
505: }
506: }
507: }
508: if ((num - 1) <= bad) {
509: res = 1;
510: break;
511: }
512: p++;
513: n--;
514: }
515: }
516:
517: return res;
518: }
519:
1.1.1.2 misho 520: const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd)
1.1 misho 521: {
522: mbfl_identify_filter *filter;
1.1.1.2 misho 523: const mbfl_encoding *encoding = NULL;
1.1 misho 524: int n;
525:
526: /* judge */
527: if (identd != NULL) {
528: n = identd->filter_list_size - 1;
529: while (n >= 0) {
530: filter = identd->filter_list[n];
531: if (!filter->flag) {
532: if (!identd->strict || !filter->status) {
1.1.1.2 misho 533: encoding = filter->encoding;
1.1 misho 534: }
535: }
536: n--;
537: }
538:
539: /* fallback judge */
1.1.1.2 misho 540: if (!encoding) {
1.1 misho 541: n = identd->filter_list_size - 1;
542: while (n >= 0) {
543: filter = identd->filter_list[n];
544: if (!filter->flag) {
1.1.1.2 misho 545: encoding = filter->encoding;
1.1 misho 546: }
547: n--;
548: }
549: }
550: }
551:
552: return encoding;
553: }
554:
1.1.1.2 misho 555: enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
556: {
557: const mbfl_encoding *encoding = mbfl_encoding_detector_judge2(identd);
558: return !encoding ? mbfl_no_encoding_invalid: encoding->no_encoding;
559: }
560:
1.1 misho 561:
562: /*
563: * encoding converter
564: */
565: mbfl_string *
566: mbfl_convert_encoding(
567: mbfl_string *string,
568: mbfl_string *result,
569: enum mbfl_no_encoding toenc)
570: {
571: int n;
572: unsigned char *p;
573: const mbfl_encoding *encoding;
574: mbfl_memory_device device;
575: mbfl_convert_filter *filter1;
576: mbfl_convert_filter *filter2;
577:
578: /* initialize */
579: encoding = mbfl_no2encoding(toenc);
580: if (encoding == NULL || string == NULL || result == NULL) {
581: return NULL;
582: }
583:
584: filter1 = NULL;
585: filter2 = NULL;
586: if (mbfl_convert_filter_get_vtbl(string->no_encoding, toenc) != NULL) {
587: filter1 = mbfl_convert_filter_new(string->no_encoding, toenc, mbfl_memory_device_output, 0, &device);
588: } else {
589: filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
590: if (filter2 != NULL) {
591: filter1 = mbfl_convert_filter_new(string->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
592: if (filter1 == NULL) {
593: mbfl_convert_filter_delete(filter2);
594: }
595: }
596: }
597: if (filter1 == NULL) {
598: return NULL;
599: }
600:
601: if (filter2 != NULL) {
602: filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
603: filter2->illegal_substchar = 0x3f; /* '?' */
604: }
605:
606: mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
607:
608: /* feed data */
609: n = string->len;
610: p = string->val;
611: if (p != NULL) {
612: while (n > 0) {
613: if ((*filter1->filter_function)(*p++, filter1) < 0) {
614: break;
615: }
616: n--;
617: }
618: }
619:
620: mbfl_convert_filter_flush(filter1);
621: mbfl_convert_filter_delete(filter1);
622: if (filter2 != NULL) {
623: mbfl_convert_filter_flush(filter2);
624: mbfl_convert_filter_delete(filter2);
625: }
626:
627: return mbfl_memory_device_result(&device, result);
628: }
629:
630:
631: /*
632: * identify encoding
633: */
634: const mbfl_encoding *
635: mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
636: {
637: int i, n, num, bad;
638: unsigned char *p;
639: mbfl_identify_filter *flist, *filter;
640: const mbfl_encoding *encoding;
641:
642: /* flist is an array of mbfl_identify_filter instances */
643: flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
644: if (flist == NULL) {
645: return NULL;
646: }
647:
648: num = 0;
649: if (elist != NULL) {
650: for (i = 0; i < elistsz; i++) {
651: if (!mbfl_identify_filter_init(&flist[num], elist[i])) {
652: num++;
653: }
654: }
655: }
656:
657: /* feed data */
658: n = string->len;
659: p = string->val;
660:
661: if (p != NULL) {
662: bad = 0;
663: while (n > 0) {
664: for (i = 0; i < num; i++) {
665: filter = &flist[i];
666: if (!filter->flag) {
667: (*filter->filter_function)(*p, filter);
668: if (filter->flag) {
669: bad++;
670: }
671: }
672: }
673: if ((num - 1) <= bad && !strict) {
674: break;
675: }
676: p++;
677: n--;
678: }
679: }
680:
681: /* judge */
682: encoding = NULL;
683:
684: for (i = 0; i < num; i++) {
685: filter = &flist[i];
686: if (!filter->flag) {
687: if (strict && filter->status) {
688: continue;
689: }
690: encoding = filter->encoding;
691: break;
692: }
693: }
694:
695: /* fall-back judge */
696: if (!encoding) {
697: for (i = 0; i < num; i++) {
698: filter = &flist[i];
699: if (!filter->flag && (!strict || !filter->status)) {
700: encoding = filter->encoding;
701: break;
702: }
703: }
704: }
705:
706: /* cleanup */
707: /* dtors should be called in reverse order */
708: i = num; while (--i >= 0) {
709: mbfl_identify_filter_cleanup(&flist[i]);
710: }
711:
712: mbfl_free((void *)flist);
713:
714: return encoding;
715: }
716:
1.1.1.2 misho 717: const mbfl_encoding *
718: mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict)
1.1 misho 719: {
1.1.1.2 misho 720: int i, n, num, bad;
721: unsigned char *p;
722: mbfl_identify_filter *flist, *filter;
1.1 misho 723: const mbfl_encoding *encoding;
724:
1.1.1.2 misho 725: /* flist is an array of mbfl_identify_filter instances */
726: flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
727: if (flist == NULL) {
1.1 misho 728: return NULL;
729: }
730:
1.1.1.2 misho 731: num = 0;
732: if (elist != NULL) {
733: for (i = 0; i < elistsz; i++) {
734: if (!mbfl_identify_filter_init2(&flist[num], elist[i])) {
735: num++;
736: }
737: }
738: }
1.1 misho 739:
1.1.1.2 misho 740: /* feed data */
741: n = string->len;
742: p = string->val;
743:
744: if (p != NULL) {
745: bad = 0;
746: while (n > 0) {
747: for (i = 0; i < num; i++) {
748: filter = &flist[i];
749: if (!filter->flag) {
750: (*filter->filter_function)(*p, filter);
751: if (filter->flag) {
752: bad++;
753: }
754: }
755: }
756: if ((num - 1) <= bad && !strict) {
757: break;
758: }
759: p++;
760: n--;
761: }
1.1 misho 762: }
763:
1.1.1.2 misho 764: /* judge */
765: encoding = NULL;
766:
767: for (i = 0; i < num; i++) {
768: filter = &flist[i];
769: if (!filter->flag) {
770: if (strict && filter->status) {
771: continue;
772: }
773: encoding = filter->encoding;
774: break;
775: }
776: }
777:
778: /* fall-back judge */
779: if (!encoding) {
780: for (i = 0; i < num; i++) {
781: filter = &flist[i];
782: if (!filter->flag && (!strict || !filter->status)) {
783: encoding = filter->encoding;
784: break;
785: }
786: }
787: }
788:
789: /* cleanup */
790: /* dtors should be called in reverse order */
791: i = num; while (--i >= 0) {
792: mbfl_identify_filter_cleanup(&flist[i]);
793: }
794:
795: mbfl_free((void *)flist);
796:
797: return encoding;
798: }
1.1 misho 799:
800: /*
801: * strlen
802: */
803: static int
804: filter_count_output(int c, void *data)
805: {
806: (*(int *)data)++;
807: return c;
808: }
809:
810: int
811: mbfl_strlen(mbfl_string *string)
812: {
813: int len, n, m, k;
814: unsigned char *p;
815: const unsigned char *mbtab;
816: const mbfl_encoding *encoding;
817:
818: encoding = mbfl_no2encoding(string->no_encoding);
819: if (encoding == NULL || string == NULL) {
820: return -1;
821: }
822:
823: len = 0;
824: if (encoding->flag & MBFL_ENCTYPE_SBCS) {
825: len = string->len;
826: } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
827: len = string->len/2;
828: } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
829: len = string->len/4;
830: } else if (encoding->mblen_table != NULL) {
831: mbtab = encoding->mblen_table;
832: n = 0;
833: p = string->val;
834: k = string->len;
835: /* count */
836: if (p != NULL) {
837: while (n < k) {
838: m = mbtab[*p];
839: n += m;
840: p += m;
841: len++;
842: };
843: }
844: } else {
845: /* wchar filter */
846: mbfl_convert_filter *filter = mbfl_convert_filter_new(
847: string->no_encoding,
848: mbfl_no_encoding_wchar,
849: filter_count_output, 0, &len);
850: if (filter == NULL) {
851: return -1;
852: }
853: /* count */
854: n = string->len;
855: p = string->val;
856: if (p != NULL) {
857: while (n > 0) {
858: (*filter->filter_function)(*p++, filter);
859: n--;
860: }
861: }
862: mbfl_convert_filter_delete(filter);
863: }
864:
865: return len;
866: }
867:
868:
869: /*
870: * strpos
871: */
872: struct collector_strpos_data {
873: mbfl_convert_filter *next_filter;
874: mbfl_wchar_device needle;
875: int needle_len;
876: int start;
877: int output;
878: int found_pos;
879: int needle_pos;
880: int matched_pos;
881: };
882:
883: static int
884: collector_strpos(int c, void* data)
885: {
886: int *p, *h, *m, n;
887: struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
888:
889: if (pc->output >= pc->start) {
890: if (c == (int)pc->needle.buffer[pc->needle_pos]) {
891: if (pc->needle_pos == 0) {
892: pc->found_pos = pc->output; /* found position */
893: }
894: pc->needle_pos++; /* needle pointer */
895: if (pc->needle_pos >= pc->needle_len) {
896: pc->matched_pos = pc->found_pos; /* matched position */
897: pc->needle_pos--;
898: goto retry;
899: }
900: } else if (pc->needle_pos != 0) {
901: retry:
902: h = (int *)pc->needle.buffer;
903: h++;
904: for (;;) {
905: pc->found_pos++;
906: p = h;
907: m = (int *)pc->needle.buffer;
908: n = pc->needle_pos - 1;
909: while (n > 0 && *p == *m) {
910: n--;
911: p++;
912: m++;
913: }
914: if (n <= 0) {
915: if (*m != c) {
916: pc->needle_pos = 0;
917: }
918: break;
919: } else {
920: h++;
921: pc->needle_pos--;
922: }
923: }
924: }
925: }
926:
927: pc->output++;
928: return c;
929: }
930:
931: /*
932: * oddlen
933: */
934: int
935: mbfl_oddlen(mbfl_string *string)
936: {
937: int len, n, m, k;
938: unsigned char *p;
939: const unsigned char *mbtab;
940: const mbfl_encoding *encoding;
941:
942:
943: if (string == NULL) {
944: return -1;
945: }
946: encoding = mbfl_no2encoding(string->no_encoding);
947: if (encoding == NULL) {
948: return -1;
949: }
950:
951: len = 0;
952: if (encoding->flag & MBFL_ENCTYPE_SBCS) {
953: return 0;
954: } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
955: return len % 2;
956: } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
957: return len % 4;
958: } else if (encoding->mblen_table != NULL) {
959: mbtab = encoding->mblen_table;
960: n = 0;
961: p = string->val;
962: k = string->len;
963: /* count */
964: if (p != NULL) {
965: while (n < k) {
966: m = mbtab[*p];
967: n += m;
968: p += m;
969: };
970: }
971: return n-k;
972: } else {
973: /* how can i do ? */
974: return 0;
975: }
976: /* NOT REACHED */
977: }
978:
979: int
980: mbfl_strpos(
981: mbfl_string *haystack,
982: mbfl_string *needle,
983: int offset,
984: int reverse)
985: {
986: int result;
987: mbfl_string _haystack_u8, _needle_u8;
1.1.1.3 ! misho 988: const mbfl_string *haystack_u8, *needle_u8 = NULL;
1.1 misho 989: const unsigned char *u8_tbl;
990:
991: if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
992: return -8;
993: }
994:
995: {
996: const mbfl_encoding *u8_enc;
997: u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8);
998: if (u8_enc == NULL || u8_enc->mblen_table == NULL) {
999: return -8;
1000: }
1001: u8_tbl = u8_enc->mblen_table;
1002: }
1003:
1004: if (haystack->no_encoding != mbfl_no_encoding_utf8) {
1005: mbfl_string_init(&_haystack_u8);
1006: haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8);
1007: if (haystack_u8 == NULL) {
1008: result = -4;
1009: goto out;
1010: }
1011: } else {
1012: haystack_u8 = haystack;
1013: }
1014:
1015: if (needle->no_encoding != mbfl_no_encoding_utf8) {
1016: mbfl_string_init(&_needle_u8);
1017: needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8);
1018: if (needle_u8 == NULL) {
1019: result = -4;
1020: goto out;
1021: }
1022: } else {
1023: needle_u8 = needle;
1024: }
1025:
1026: if (needle_u8->len < 1) {
1027: result = -8;
1028: goto out;
1029: }
1030:
1031: result = -1;
1032: if (haystack_u8->len < needle_u8->len) {
1033: goto out;
1034: }
1035:
1036: if (!reverse) {
1037: unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
1038: unsigned int needle_u8_len = needle_u8->len;
1039: unsigned int i;
1040: const unsigned char *p, *q, *e;
1041: const unsigned char *haystack_u8_val = haystack_u8->val,
1042: *needle_u8_val = needle_u8->val;
1043: for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
1044: jtbl[i] = needle_u8_len + 1;
1045: }
1046: for (i = 0; i < needle_u8_len - 1; ++i) {
1047: jtbl[needle_u8_val[i]] = needle_u8_len - i;
1048: }
1049: e = haystack_u8_val + haystack_u8->len;
1050: p = haystack_u8_val;
1051: while (--offset >= 0) {
1052: if (p >= e) {
1053: result = -16;
1054: goto out;
1055: }
1056: p += u8_tbl[*p];
1057: }
1058: p += needle_u8_len;
1059: if (p > e) {
1060: goto out;
1061: }
1062: while (p <= e) {
1063: const unsigned char *pv = p;
1064: q = needle_u8_val + needle_u8_len;
1065: for (;;) {
1066: if (q == needle_u8_val) {
1067: result = 0;
1068: while (p > haystack_u8_val) {
1069: unsigned char c = *--p;
1070: if (c < 0x80) {
1071: ++result;
1072: } else if ((c & 0xc0) != 0x80) {
1073: ++result;
1074: }
1075: }
1076: goto out;
1077: }
1078: if (*--q != *--p) {
1079: break;
1080: }
1081: }
1082: p += jtbl[*p];
1083: if (p <= pv) {
1084: p = pv + 1;
1085: }
1086: }
1087: } else {
1088: unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
1089: unsigned int needle_u8_len = needle_u8->len, needle_len = 0;
1090: unsigned int i;
1091: const unsigned char *p, *e, *q, *qe;
1092: const unsigned char *haystack_u8_val = haystack_u8->val,
1093: *needle_u8_val = needle_u8->val;
1094: for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
1095: jtbl[i] = needle_u8_len;
1096: }
1097: for (i = needle_u8_len - 1; i > 0; --i) {
1098: unsigned char c = needle_u8_val[i];
1099: jtbl[c] = i;
1100: if (c < 0x80) {
1101: ++needle_len;
1102: } else if ((c & 0xc0) != 0x80) {
1103: ++needle_len;
1104: }
1105: }
1106: {
1107: unsigned char c = needle_u8_val[0];
1108: if (c < 0x80) {
1109: ++needle_len;
1110: } else if ((c & 0xc0) != 0x80) {
1111: ++needle_len;
1112: }
1113: }
1114: e = haystack_u8_val;
1115: p = e + haystack_u8->len;
1116: qe = needle_u8_val + needle_u8_len;
1117: if (offset < 0) {
1118: if (-offset > needle_len) {
1119: offset += needle_len;
1120: while (offset < 0) {
1121: unsigned char c;
1122: if (p <= e) {
1123: result = -16;
1124: goto out;
1125: }
1126: c = *(--p);
1127: if (c < 0x80) {
1128: ++offset;
1129: } else if ((c & 0xc0) != 0x80) {
1130: ++offset;
1131: }
1132: }
1133: }
1134: } else {
1135: const unsigned char *ee = haystack_u8_val + haystack_u8->len;
1136: while (--offset >= 0) {
1137: if (e >= ee) {
1138: result = -16;
1139: goto out;
1140: }
1141: e += u8_tbl[*e];
1142: }
1143: }
1144: if (p < e + needle_u8_len) {
1145: goto out;
1146: }
1147: p -= needle_u8_len;
1148: while (p >= e) {
1149: const unsigned char *pv = p;
1150: q = needle_u8_val;
1151: for (;;) {
1152: if (q == qe) {
1153: result = 0;
1154: p -= needle_u8_len;
1155: while (p > haystack_u8_val) {
1156: unsigned char c = *--p;
1157: if (c < 0x80) {
1158: ++result;
1159: } else if ((c & 0xc0) != 0x80) {
1160: ++result;
1161: }
1162: }
1163: goto out;
1164: }
1165: if (*q != *p) {
1166: break;
1167: }
1168: ++p, ++q;
1169: }
1170: p -= jtbl[*p];
1171: if (p >= pv) {
1172: p = pv - 1;
1173: }
1174: }
1175: }
1176: out:
1177: if (haystack_u8 == &_haystack_u8) {
1178: mbfl_string_clear(&_haystack_u8);
1179: }
1180: if (needle_u8 == &_needle_u8) {
1181: mbfl_string_clear(&_needle_u8);
1182: }
1183: return result;
1184: }
1185:
1186: /*
1187: * substr_count
1188: */
1189:
1190: int
1191: mbfl_substr_count(
1192: mbfl_string *haystack,
1193: mbfl_string *needle
1194: )
1195: {
1196: int n, result = 0;
1197: unsigned char *p;
1198: mbfl_convert_filter *filter;
1199: struct collector_strpos_data pc;
1200:
1201: if (haystack == NULL || needle == NULL) {
1202: return -8;
1203: }
1204: /* needle is converted into wchar */
1205: mbfl_wchar_device_init(&pc.needle);
1206: filter = mbfl_convert_filter_new(
1207: needle->no_encoding,
1208: mbfl_no_encoding_wchar,
1209: mbfl_wchar_device_output, 0, &pc.needle);
1210: if (filter == NULL) {
1211: return -4;
1212: }
1213: p = needle->val;
1214: n = needle->len;
1215: if (p != NULL) {
1216: while (n > 0) {
1217: if ((*filter->filter_function)(*p++, filter) < 0) {
1218: break;
1219: }
1220: n--;
1221: }
1222: }
1223: mbfl_convert_filter_flush(filter);
1224: mbfl_convert_filter_delete(filter);
1225: pc.needle_len = pc.needle.pos;
1226: if (pc.needle.buffer == NULL) {
1227: return -4;
1228: }
1229: if (pc.needle_len <= 0) {
1230: mbfl_wchar_device_clear(&pc.needle);
1231: return -2;
1232: }
1233: /* initialize filter and collector data */
1234: filter = mbfl_convert_filter_new(
1235: haystack->no_encoding,
1236: mbfl_no_encoding_wchar,
1237: collector_strpos, 0, &pc);
1238: if (filter == NULL) {
1239: mbfl_wchar_device_clear(&pc.needle);
1240: return -4;
1241: }
1242: pc.start = 0;
1243: pc.output = 0;
1244: pc.needle_pos = 0;
1245: pc.found_pos = 0;
1246: pc.matched_pos = -1;
1247:
1248: /* feed data */
1249: p = haystack->val;
1250: n = haystack->len;
1251: if (p != NULL) {
1252: while (n > 0) {
1253: if ((*filter->filter_function)(*p++, filter) < 0) {
1254: pc.matched_pos = -4;
1255: break;
1256: }
1257: if (pc.matched_pos >= 0) {
1258: ++result;
1259: pc.matched_pos = -1;
1260: pc.needle_pos = 0;
1261: }
1262: n--;
1263: }
1264: }
1265: mbfl_convert_filter_flush(filter);
1266: mbfl_convert_filter_delete(filter);
1267: mbfl_wchar_device_clear(&pc.needle);
1268:
1269: return result;
1270: }
1271:
1272: /*
1273: * substr
1274: */
1275: struct collector_substr_data {
1276: mbfl_convert_filter *next_filter;
1277: int start;
1278: int stop;
1279: int output;
1280: };
1281:
1282: static int
1283: collector_substr(int c, void* data)
1284: {
1285: struct collector_substr_data *pc = (struct collector_substr_data*)data;
1286:
1287: if (pc->output >= pc->stop) {
1288: return -1;
1289: }
1290:
1291: if (pc->output >= pc->start) {
1292: (*pc->next_filter->filter_function)(c, pc->next_filter);
1293: }
1294:
1295: pc->output++;
1296:
1297: return c;
1298: }
1299:
1300: mbfl_string *
1301: mbfl_substr(
1302: mbfl_string *string,
1303: mbfl_string *result,
1304: int from,
1305: int length)
1306: {
1307: const mbfl_encoding *encoding;
1308: int n, m, k, len, start, end;
1309: unsigned char *p, *w;
1310: const unsigned char *mbtab;
1311:
1312: encoding = mbfl_no2encoding(string->no_encoding);
1313: if (encoding == NULL || string == NULL || result == NULL) {
1314: return NULL;
1315: }
1316: mbfl_string_init(result);
1317: result->no_language = string->no_language;
1318: result->no_encoding = string->no_encoding;
1319:
1320: if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
1321: encoding->mblen_table != NULL) {
1322: len = string->len;
1323: start = from;
1324: end = from + length;
1325: if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1326: start *= 2;
1327: end = start + length*2;
1328: } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1329: start *= 4;
1330: end = start + length*4;
1331: } else if (encoding->mblen_table != NULL) {
1332: mbtab = encoding->mblen_table;
1333: start = 0;
1334: end = 0;
1335: n = 0;
1336: k = 0;
1337: p = string->val;
1338: if (p != NULL) {
1339: /* search start position */
1340: while (k <= from) {
1341: start = n;
1342: if (n >= len) {
1343: break;
1344: }
1345: m = mbtab[*p];
1346: n += m;
1347: p += m;
1348: k++;
1349: }
1350: /* detect end position */
1351: k = 0;
1352: end = start;
1353: while (k < length) {
1354: end = n;
1355: if (n >= len) {
1356: break;
1357: }
1358: m = mbtab[*p];
1359: n += m;
1360: p += m;
1361: k++;
1362: }
1363: }
1364: }
1365:
1366: if (start > len) {
1367: start = len;
1368: }
1369: if (start < 0) {
1370: start = 0;
1371: }
1372: if (end > len) {
1373: end = len;
1374: }
1375: if (end < 0) {
1376: end = 0;
1377: }
1378: if (start > end) {
1379: start = end;
1380: }
1381:
1382: /* allocate memory and copy */
1383: n = end - start;
1384: result->len = 0;
1385: result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char));
1386: if (w != NULL) {
1387: p = string->val;
1388: if (p != NULL) {
1389: p += start;
1390: result->len = n;
1391: while (n > 0) {
1392: *w++ = *p++;
1393: n--;
1394: }
1395: }
1396: *w++ = '\0';
1397: *w++ = '\0';
1398: *w++ = '\0';
1399: *w = '\0';
1400: } else {
1401: result = NULL;
1402: }
1403: } else {
1404: mbfl_memory_device device;
1405: struct collector_substr_data pc;
1406: mbfl_convert_filter *decoder;
1407: mbfl_convert_filter *encoder;
1408:
1409: mbfl_memory_device_init(&device, length + 1, 0);
1410: mbfl_string_init(result);
1411: result->no_language = string->no_language;
1412: result->no_encoding = string->no_encoding;
1413: /* output code filter */
1414: decoder = mbfl_convert_filter_new(
1415: mbfl_no_encoding_wchar,
1416: string->no_encoding,
1417: mbfl_memory_device_output, 0, &device);
1418: /* wchar filter */
1419: encoder = mbfl_convert_filter_new(
1420: string->no_encoding,
1421: mbfl_no_encoding_wchar,
1422: collector_substr, 0, &pc);
1423: if (decoder == NULL || encoder == NULL) {
1424: mbfl_convert_filter_delete(encoder);
1425: mbfl_convert_filter_delete(decoder);
1426: return NULL;
1427: }
1428: pc.next_filter = decoder;
1429: pc.start = from;
1430: pc.stop = from + length;
1431: pc.output = 0;
1432:
1433: /* feed data */
1434: p = string->val;
1435: n = string->len;
1436: if (p != NULL) {
1437: while (n > 0) {
1438: if ((*encoder->filter_function)(*p++, encoder) < 0) {
1439: break;
1440: }
1441: n--;
1442: }
1443: }
1444:
1445: mbfl_convert_filter_flush(encoder);
1446: mbfl_convert_filter_flush(decoder);
1447: result = mbfl_memory_device_result(&device, result);
1448: mbfl_convert_filter_delete(encoder);
1449: mbfl_convert_filter_delete(decoder);
1450: }
1451:
1452: return result;
1453: }
1454:
1455: /*
1456: * strcut
1457: */
1458: mbfl_string *
1459: mbfl_strcut(
1460: mbfl_string *string,
1461: mbfl_string *result,
1462: int from,
1463: int length)
1464: {
1465: const mbfl_encoding *encoding;
1466: mbfl_memory_device device;
1467:
1468: /* validate the parameters */
1469: if (string == NULL || string->val == NULL || result == NULL) {
1470: return NULL;
1471: }
1472:
1473: if (from < 0 || length < 0) {
1474: return NULL;
1475: }
1476:
1477: if (from >= string->len) {
1478: from = string->len;
1479: }
1480:
1481: encoding = mbfl_no2encoding(string->no_encoding);
1482: if (encoding == NULL) {
1483: return NULL;
1484: }
1485:
1486: mbfl_string_init(result);
1487: result->no_language = string->no_language;
1488: result->no_encoding = string->no_encoding;
1489:
1490: if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1491: | MBFL_ENCTYPE_WCS2BE
1492: | MBFL_ENCTYPE_WCS2LE
1493: | MBFL_ENCTYPE_WCS4BE
1494: | MBFL_ENCTYPE_WCS4LE))
1495: || encoding->mblen_table != NULL) {
1496: const unsigned char *start = NULL;
1497: const unsigned char *end = NULL;
1498: unsigned char *w;
1499: unsigned int sz;
1500:
1501: if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1502: from &= -2;
1503:
1504: if (from + length >= string->len) {
1505: length = string->len - from;
1506: }
1507:
1508: start = string->val + from;
1509: end = start + (length & -2);
1510: } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1511: from &= -4;
1512:
1513: if (from + length >= string->len) {
1514: length = string->len - from;
1515: }
1516:
1517: start = string->val + from;
1518: end = start + (length & -4);
1519: } else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1520: if (from + length >= string->len) {
1521: length = string->len - from;
1522: }
1523:
1524: start = string->val + from;
1525: end = start + length;
1526: } else if (encoding->mblen_table != NULL) {
1527: const unsigned char *mbtab = encoding->mblen_table;
1528: const unsigned char *p, *q;
1529: int m;
1530:
1531: /* search start position */
1532: for (m = 0, p = string->val, q = p + from;
1533: p < q; p += (m = mbtab[*p]));
1534:
1535: if (p > q) {
1536: p -= m;
1537: }
1538:
1539: start = p;
1540:
1541: /* search end position */
1542: if ((start - string->val) + length >= (int)string->len) {
1543: end = string->val + string->len;
1544: } else {
1545: for (q = p + length; p < q; p += (m = mbtab[*p]));
1546:
1547: if (p > q) {
1548: p -= m;
1549: }
1550: end = p;
1551: }
1552: } else {
1553: /* never reached */
1554: return NULL;
1555: }
1556:
1557: /* allocate memory and copy string */
1558: sz = end - start;
1559: if ((w = (unsigned char*)mbfl_calloc(sz + 8,
1560: sizeof(unsigned char))) == NULL) {
1561: return NULL;
1562: }
1563:
1564: memcpy(w, start, sz);
1565: w[sz] = '\0';
1566: w[sz + 1] = '\0';
1567: w[sz + 2] = '\0';
1568: w[sz + 3] = '\0';
1569:
1570: result->val = w;
1571: result->len = sz;
1572: } else {
1573: mbfl_convert_filter *encoder = NULL;
1574: mbfl_convert_filter *decoder = NULL;
1575: const unsigned char *p, *q, *r;
1576: struct {
1577: mbfl_convert_filter encoder;
1578: mbfl_convert_filter decoder;
1579: const unsigned char *p;
1580: int pos;
1581: } bk, _bk;
1582:
1583: /* output code filter */
1584: if (!(decoder = mbfl_convert_filter_new(
1585: mbfl_no_encoding_wchar,
1586: string->no_encoding,
1587: mbfl_memory_device_output, 0, &device))) {
1588: return NULL;
1589: }
1590:
1591: /* wchar filter */
1592: if (!(encoder = mbfl_convert_filter_new(
1593: string->no_encoding,
1594: mbfl_no_encoding_wchar,
1595: mbfl_filter_output_null,
1596: NULL, NULL))) {
1597: mbfl_convert_filter_delete(decoder);
1598: return NULL;
1599: }
1600:
1601: mbfl_memory_device_init(&device, length + 8, 0);
1602:
1603: p = string->val;
1604:
1605: /* search start position */
1606: for (q = string->val + from; p < q; p++) {
1607: (*encoder->filter_function)(*p, encoder);
1608: }
1609:
1610: /* switch the drain direction */
1611: encoder->output_function = (int(*)(int,void *))decoder->filter_function;
1612: encoder->flush_function = (int(*)(void *))decoder->filter_flush;
1613: encoder->data = decoder;
1614:
1615: q = string->val + string->len;
1616:
1617: /* save the encoder, decoder state and the pointer */
1618: mbfl_convert_filter_copy(decoder, &_bk.decoder);
1619: mbfl_convert_filter_copy(encoder, &_bk.encoder);
1620: _bk.p = p;
1621: _bk.pos = device.pos;
1622:
1623: if (length > q - p) {
1624: length = q - p;
1625: }
1626:
1627: if (length >= 20) {
1628: /* output a little shorter than "length" */
1629: /* XXX: the constant "20" was determined purely on the heuristics. */
1630: for (r = p + length - 20; p < r; p++) {
1631: (*encoder->filter_function)(*p, encoder);
1632: }
1633:
1634: /* if the offset of the resulting string exceeds the length,
1635: * then restore the state */
1636: if (device.pos > length) {
1637: p = _bk.p;
1638: device.pos = _bk.pos;
1639: decoder->filter_dtor(decoder);
1640: encoder->filter_dtor(encoder);
1641: mbfl_convert_filter_copy(&_bk.decoder, decoder);
1642: mbfl_convert_filter_copy(&_bk.encoder, encoder);
1643: bk = _bk;
1644: } else {
1645: /* save the encoder, decoder state and the pointer */
1646: mbfl_convert_filter_copy(decoder, &bk.decoder);
1647: mbfl_convert_filter_copy(encoder, &bk.encoder);
1648: bk.p = p;
1649: bk.pos = device.pos;
1650:
1651: /* flush the stream */
1652: (*encoder->filter_flush)(encoder);
1653:
1654: /* if the offset of the resulting string exceeds the length,
1655: * then restore the state */
1656: if (device.pos > length) {
1657: bk.decoder.filter_dtor(&bk.decoder);
1658: bk.encoder.filter_dtor(&bk.encoder);
1659:
1660: p = _bk.p;
1661: device.pos = _bk.pos;
1662: decoder->filter_dtor(decoder);
1663: encoder->filter_dtor(encoder);
1664: mbfl_convert_filter_copy(&_bk.decoder, decoder);
1665: mbfl_convert_filter_copy(&_bk.encoder, encoder);
1666: bk = _bk;
1667: } else {
1668: _bk.decoder.filter_dtor(&_bk.decoder);
1669: _bk.encoder.filter_dtor(&_bk.encoder);
1670:
1671: p = bk.p;
1672: device.pos = bk.pos;
1673: decoder->filter_dtor(decoder);
1674: encoder->filter_dtor(encoder);
1675: mbfl_convert_filter_copy(&bk.decoder, decoder);
1676: mbfl_convert_filter_copy(&bk.encoder, encoder);
1677: }
1678: }
1679: } else {
1680: bk = _bk;
1681: }
1682:
1683: /* detect end position */
1684: while (p < q) {
1685: (*encoder->filter_function)(*p, encoder);
1686:
1687: if (device.pos > length) {
1688: /* restore filter */
1689: p = bk.p;
1690: device.pos = bk.pos;
1691: decoder->filter_dtor(decoder);
1692: encoder->filter_dtor(encoder);
1693: mbfl_convert_filter_copy(&bk.decoder, decoder);
1694: mbfl_convert_filter_copy(&bk.encoder, encoder);
1695: break;
1696: }
1697:
1698: p++;
1699:
1700: /* backup current state */
1701: mbfl_convert_filter_copy(decoder, &_bk.decoder);
1702: mbfl_convert_filter_copy(encoder, &_bk.encoder);
1703: _bk.pos = device.pos;
1704: _bk.p = p;
1705:
1706: (*encoder->filter_flush)(encoder);
1707:
1708: if (device.pos > length) {
1709: _bk.decoder.filter_dtor(&_bk.decoder);
1710: _bk.encoder.filter_dtor(&_bk.encoder);
1711:
1712: /* restore filter */
1713: p = bk.p;
1714: device.pos = bk.pos;
1715: decoder->filter_dtor(decoder);
1716: encoder->filter_dtor(encoder);
1717: mbfl_convert_filter_copy(&bk.decoder, decoder);
1718: mbfl_convert_filter_copy(&bk.encoder, encoder);
1719: break;
1720: }
1721:
1722: bk.decoder.filter_dtor(&bk.decoder);
1723: bk.encoder.filter_dtor(&bk.encoder);
1724:
1725: p = _bk.p;
1726: device.pos = _bk.pos;
1727: decoder->filter_dtor(decoder);
1728: encoder->filter_dtor(encoder);
1729: mbfl_convert_filter_copy(&_bk.decoder, decoder);
1730: mbfl_convert_filter_copy(&_bk.encoder, encoder);
1731:
1732: bk = _bk;
1733: }
1734:
1735: (*encoder->filter_flush)(encoder);
1736:
1737: bk.decoder.filter_dtor(&bk.decoder);
1738: bk.encoder.filter_dtor(&bk.encoder);
1739:
1740: result = mbfl_memory_device_result(&device, result);
1741:
1742: mbfl_convert_filter_delete(encoder);
1743: mbfl_convert_filter_delete(decoder);
1744: }
1745:
1746: return result;
1747: }
1748:
1749:
1750: /*
1751: * strwidth
1752: */
1753: static int is_fullwidth(int c)
1754: {
1755: int i;
1756:
1757: if (c < mbfl_eaw_table[0].begin) {
1758: return 0;
1759: }
1760:
1761: for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1762: if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1763: return 1;
1764: }
1765: }
1766:
1767: return 0;
1768: }
1769:
1770: static int
1771: filter_count_width(int c, void* data)
1772: {
1773: (*(int *)data) += (is_fullwidth(c) ? 2: 1);
1774: return c;
1775: }
1776:
1777: int
1778: mbfl_strwidth(mbfl_string *string)
1779: {
1780: int len, n;
1781: unsigned char *p;
1782: mbfl_convert_filter *filter;
1783:
1784: len = 0;
1785: if (string->len > 0 && string->val != NULL) {
1786: /* wchar filter */
1787: filter = mbfl_convert_filter_new(
1788: string->no_encoding,
1789: mbfl_no_encoding_wchar,
1790: filter_count_width, 0, &len);
1791: if (filter == NULL) {
1792: mbfl_convert_filter_delete(filter);
1793: return -1;
1794: }
1795:
1796: /* feed data */
1797: p = string->val;
1798: n = string->len;
1799: while (n > 0) {
1800: (*filter->filter_function)(*p++, filter);
1801: n--;
1802: }
1803:
1804: mbfl_convert_filter_flush(filter);
1805: mbfl_convert_filter_delete(filter);
1806: }
1807:
1808: return len;
1809: }
1810:
1811:
1812: /*
1813: * strimwidth
1814: */
1815: struct collector_strimwidth_data {
1816: mbfl_convert_filter *decoder;
1817: mbfl_convert_filter *decoder_backup;
1818: mbfl_memory_device device;
1819: int from;
1820: int width;
1821: int outwidth;
1822: int outchar;
1823: int status;
1824: int endpos;
1825: };
1826:
1827: static int
1828: collector_strimwidth(int c, void* data)
1829: {
1830: struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1831:
1832: switch (pc->status) {
1833: case 10:
1834: (*pc->decoder->filter_function)(c, pc->decoder);
1835: break;
1836: default:
1837: if (pc->outchar >= pc->from) {
1838: pc->outwidth += (is_fullwidth(c) ? 2: 1);
1839:
1840: if (pc->outwidth > pc->width) {
1841: if (pc->status == 0) {
1842: pc->endpos = pc->device.pos;
1843: mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1844: }
1845: pc->status++;
1846: (*pc->decoder->filter_function)(c, pc->decoder);
1847: c = -1;
1848: } else {
1849: (*pc->decoder->filter_function)(c, pc->decoder);
1850: }
1851: }
1852: pc->outchar++;
1853: break;
1854: }
1855:
1856: return c;
1857: }
1858:
1859: mbfl_string *
1860: mbfl_strimwidth(
1861: mbfl_string *string,
1862: mbfl_string *marker,
1863: mbfl_string *result,
1864: int from,
1865: int width)
1866: {
1867: struct collector_strimwidth_data pc;
1868: mbfl_convert_filter *encoder;
1869: int n, mkwidth;
1870: unsigned char *p;
1871:
1872: if (string == NULL || result == NULL) {
1873: return NULL;
1874: }
1875: mbfl_string_init(result);
1876: result->no_language = string->no_language;
1877: result->no_encoding = string->no_encoding;
1878: mbfl_memory_device_init(&pc.device, width, 0);
1879:
1880: /* output code filter */
1881: pc.decoder = mbfl_convert_filter_new(
1882: mbfl_no_encoding_wchar,
1883: string->no_encoding,
1884: mbfl_memory_device_output, 0, &pc.device);
1885: pc.decoder_backup = mbfl_convert_filter_new(
1886: mbfl_no_encoding_wchar,
1887: string->no_encoding,
1888: mbfl_memory_device_output, 0, &pc.device);
1889: /* wchar filter */
1890: encoder = mbfl_convert_filter_new(
1891: string->no_encoding,
1892: mbfl_no_encoding_wchar,
1893: collector_strimwidth, 0, &pc);
1894: if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1895: mbfl_convert_filter_delete(encoder);
1896: mbfl_convert_filter_delete(pc.decoder);
1897: mbfl_convert_filter_delete(pc.decoder_backup);
1898: return NULL;
1899: }
1900: mkwidth = 0;
1901: if (marker) {
1902: mkwidth = mbfl_strwidth(marker);
1903: }
1904: pc.from = from;
1905: pc.width = width - mkwidth;
1906: pc.outwidth = 0;
1907: pc.outchar = 0;
1908: pc.status = 0;
1909: pc.endpos = 0;
1910:
1911: /* feed data */
1912: p = string->val;
1913: n = string->len;
1914: if (p != NULL) {
1915: while (n > 0) {
1916: n--;
1917: if ((*encoder->filter_function)(*p++, encoder) < 0) {
1918: break;
1919: }
1920: }
1921: mbfl_convert_filter_flush(encoder);
1922: if (pc.status != 0 && mkwidth > 0) {
1923: pc.width += mkwidth;
1924: while (n > 0) {
1925: if ((*encoder->filter_function)(*p++, encoder) < 0) {
1926: break;
1927: }
1928: n--;
1929: }
1930: mbfl_convert_filter_flush(encoder);
1931: if (pc.status != 1) {
1932: pc.status = 10;
1933: pc.device.pos = pc.endpos;
1934: mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1935: mbfl_convert_filter_reset(encoder, marker->no_encoding, mbfl_no_encoding_wchar);
1936: p = marker->val;
1937: n = marker->len;
1938: while (n > 0) {
1939: if ((*encoder->filter_function)(*p++, encoder) < 0) {
1940: break;
1941: }
1942: n--;
1943: }
1944: mbfl_convert_filter_flush(encoder);
1945: }
1946: } else if (pc.status != 0) {
1947: pc.device.pos = pc.endpos;
1948: mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1949: }
1950: mbfl_convert_filter_flush(pc.decoder);
1951: }
1952: result = mbfl_memory_device_result(&pc.device, result);
1953: mbfl_convert_filter_delete(encoder);
1954: mbfl_convert_filter_delete(pc.decoder);
1955: mbfl_convert_filter_delete(pc.decoder_backup);
1956:
1957: return result;
1958: }
1959:
1960: mbfl_string *
1961: mbfl_ja_jp_hantozen(
1962: mbfl_string *string,
1963: mbfl_string *result,
1964: int mode)
1965: {
1966: int n;
1967: unsigned char *p;
1968: const mbfl_encoding *encoding;
1969: mbfl_memory_device device;
1970: mbfl_convert_filter *decoder = NULL;
1971: mbfl_convert_filter *encoder = NULL;
1972: mbfl_convert_filter *tl_filter = NULL;
1973: mbfl_convert_filter *next_filter = NULL;
1974: mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1975:
1976: /* validate parameters */
1977: if (string == NULL || result == NULL) {
1978: return NULL;
1979: }
1980:
1981: encoding = mbfl_no2encoding(string->no_encoding);
1982: if (encoding == NULL) {
1983: return NULL;
1984: }
1985:
1986: mbfl_memory_device_init(&device, string->len, 0);
1987: mbfl_string_init(result);
1988:
1989: result->no_language = string->no_language;
1990: result->no_encoding = string->no_encoding;
1991:
1992: decoder = mbfl_convert_filter_new(
1993: mbfl_no_encoding_wchar,
1994: string->no_encoding,
1995: mbfl_memory_device_output, 0, &device);
1996: if (decoder == NULL) {
1997: goto out;
1998: }
1999: next_filter = decoder;
2000:
2001: param =
2002: (mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
2003: if (param == NULL) {
2004: goto out;
2005: }
2006:
2007: param->mode = mode;
2008:
2009: tl_filter = mbfl_convert_filter_new2(
2010: &vtbl_tl_jisx0201_jisx0208,
2011: (int(*)(int, void*))next_filter->filter_function,
2012: (int(*)(void*))next_filter->filter_flush,
2013: next_filter);
2014: if (tl_filter == NULL) {
2015: mbfl_free(param);
2016: goto out;
2017: }
2018:
2019: tl_filter->opaque = param;
2020: next_filter = tl_filter;
2021:
2022: encoder = mbfl_convert_filter_new(
2023: string->no_encoding,
2024: mbfl_no_encoding_wchar,
2025: (int(*)(int, void*))next_filter->filter_function,
2026: (int(*)(void*))next_filter->filter_flush,
2027: next_filter);
2028: if (encoder == NULL) {
2029: goto out;
2030: }
2031:
2032: /* feed data */
2033: p = string->val;
2034: n = string->len;
2035: if (p != NULL) {
2036: while (n > 0) {
2037: if ((*encoder->filter_function)(*p++, encoder) < 0) {
2038: break;
2039: }
2040: n--;
2041: }
2042: }
2043:
2044: mbfl_convert_filter_flush(encoder);
2045: result = mbfl_memory_device_result(&device, result);
2046: out:
2047: if (tl_filter != NULL) {
2048: if (tl_filter->opaque != NULL) {
2049: mbfl_free(tl_filter->opaque);
2050: }
2051: mbfl_convert_filter_delete(tl_filter);
2052: }
2053:
2054: if (decoder != NULL) {
2055: mbfl_convert_filter_delete(decoder);
2056: }
2057:
2058: if (encoder != NULL) {
2059: mbfl_convert_filter_delete(encoder);
2060: }
2061:
2062: return result;
2063: }
2064:
2065:
2066: /*
2067: * MIME header encode
2068: */
2069: struct mime_header_encoder_data {
2070: mbfl_convert_filter *conv1_filter;
2071: mbfl_convert_filter *block_filter;
2072: mbfl_convert_filter *conv2_filter;
2073: mbfl_convert_filter *conv2_filter_backup;
2074: mbfl_convert_filter *encod_filter;
2075: mbfl_convert_filter *encod_filter_backup;
2076: mbfl_memory_device outdev;
2077: mbfl_memory_device tmpdev;
2078: int status1;
2079: int status2;
2080: int prevpos;
2081: int linehead;
2082: int firstindent;
2083: int encnamelen;
2084: int lwsplen;
2085: char encname[128];
2086: char lwsp[16];
2087: };
2088:
2089: static int
2090: mime_header_encoder_block_collector(int c, void *data)
2091: {
2092: int n;
2093: struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
2094:
2095: switch (pe->status2) {
2096: case 1: /* encoded word */
2097: pe->prevpos = pe->outdev.pos;
2098: mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
2099: mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
2100: (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2101: (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2102: (*pe->encod_filter->filter_flush)(pe->encod_filter);
2103: n = pe->outdev.pos - pe->linehead + pe->firstindent;
2104: pe->outdev.pos = pe->prevpos;
2105: mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
2106: mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
2107: if (n >= 74) {
2108: (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2109: (*pe->encod_filter->filter_flush)(pe->encod_filter);
2110: mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
2111: mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2112: pe->linehead = pe->outdev.pos;
2113: pe->firstindent = 0;
2114: mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
2115: c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2116: } else {
2117: c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2118: }
2119: break;
2120:
2121: default:
2122: mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
2123: c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2124: pe->status2 = 1;
2125: break;
2126: }
2127:
2128: return c;
2129: }
2130:
2131: static int
2132: mime_header_encoder_collector(int c, void *data)
2133: {
2134: static int qp_table[256] = {
2135: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2136: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2137: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
2138: 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
2139: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
2140: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
2141: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
2142: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
2143: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
2144: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
2145: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
2146: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
2147: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
2148: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
2149: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
2150: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* 0xF0 */
2151: };
2152:
2153: int n;
2154: struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
2155:
2156: switch (pe->status1) {
2157: case 11: /* encoded word */
2158: (*pe->block_filter->filter_function)(c, pe->block_filter);
2159: break;
2160:
2161: default: /* ASCII */
2162: if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
2163: mbfl_memory_device_output(c, &pe->tmpdev);
2164: pe->status1 = 1;
2165: } else if (pe->status1 == 0 && c == 0x20) { /* repeat SPACE */
2166: mbfl_memory_device_output(c, &pe->tmpdev);
2167: } else {
2168: if (pe->tmpdev.pos < 74 && c == 0x20) {
2169: n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
2170: if (n > 74) {
2171: mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
2172: pe->linehead = pe->outdev.pos;
2173: pe->firstindent = 0;
2174: } else if (pe->outdev.pos > 0) {
2175: mbfl_memory_device_output(0x20, &pe->outdev);
2176: }
2177: mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2178: mbfl_memory_device_reset(&pe->tmpdev);
2179: pe->status1 = 0;
2180: } else {
2181: n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
2182: if (n > 60) {
2183: mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
2184: pe->linehead = pe->outdev.pos;
2185: pe->firstindent = 0;
2186: } else if (pe->outdev.pos > 0) {
2187: mbfl_memory_device_output(0x20, &pe->outdev);
2188: }
2189: mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
2190: mbfl_memory_device_reset(&pe->tmpdev);
2191: (*pe->block_filter->filter_function)(c, pe->block_filter);
2192: pe->status1 = 11;
2193: }
2194: }
2195: break;
2196: }
2197:
2198: return c;
2199: }
2200:
2201: mbfl_string *
2202: mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
2203: {
2204: if (pe->status1 >= 10) {
2205: (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2206: (*pe->encod_filter->filter_flush)(pe->encod_filter);
2207: mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
2208: } else if (pe->tmpdev.pos > 0) {
2209: if (pe->outdev.pos > 0) {
2210: if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
2211: mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2212: } else {
2213: mbfl_memory_device_output(0x20, &pe->outdev);
2214: }
2215: }
2216: mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2217: }
2218: mbfl_memory_device_reset(&pe->tmpdev);
2219: pe->prevpos = 0;
2220: pe->linehead = 0;
2221: pe->status1 = 0;
2222: pe->status2 = 0;
2223:
2224: return mbfl_memory_device_result(&pe->outdev, result);
2225: }
2226:
2227: struct mime_header_encoder_data*
2228: mime_header_encoder_new(
2229: enum mbfl_no_encoding incode,
2230: enum mbfl_no_encoding outcode,
2231: enum mbfl_no_encoding transenc)
2232: {
2233: int n;
2234: const char *s;
2235: const mbfl_encoding *outencoding;
2236: struct mime_header_encoder_data *pe;
2237:
2238: /* get output encoding and check MIME charset name */
2239: outencoding = mbfl_no2encoding(outcode);
2240: if (outencoding == NULL || outencoding->mime_name == NULL || outencoding->mime_name[0] == '\0') {
2241: return NULL;
2242: }
2243:
2244: pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
2245: if (pe == NULL) {
2246: return NULL;
2247: }
2248:
2249: mbfl_memory_device_init(&pe->outdev, 0, 0);
2250: mbfl_memory_device_init(&pe->tmpdev, 0, 0);
2251: pe->prevpos = 0;
2252: pe->linehead = 0;
2253: pe->firstindent = 0;
2254: pe->status1 = 0;
2255: pe->status2 = 0;
2256:
2257: /* make the encoding description string exp. "=?ISO-2022-JP?B?" */
2258: n = 0;
2259: pe->encname[n++] = 0x3d;
2260: pe->encname[n++] = 0x3f;
2261: s = outencoding->mime_name;
2262: while (*s) {
2263: pe->encname[n++] = *s++;
2264: }
2265: pe->encname[n++] = 0x3f;
2266: if (transenc == mbfl_no_encoding_qprint) {
2267: pe->encname[n++] = 0x51;
2268: } else {
2269: pe->encname[n++] = 0x42;
2270: transenc = mbfl_no_encoding_base64;
2271: }
2272: pe->encname[n++] = 0x3f;
2273: pe->encname[n] = '\0';
2274: pe->encnamelen = n;
2275:
2276: n = 0;
2277: pe->lwsp[n++] = 0x0d;
2278: pe->lwsp[n++] = 0x0a;
2279: pe->lwsp[n++] = 0x20;
2280: pe->lwsp[n] = '\0';
2281: pe->lwsplen = n;
2282:
2283: /* transfer encode filter */
2284: pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2285: pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2286:
2287: /* Output code filter */
2288: pe->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2289: pe->conv2_filter_backup = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2290:
2291: /* encoded block filter */
2292: pe->block_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, mbfl_no_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
2293:
2294: /* Input code filter */
2295: pe->conv1_filter = mbfl_convert_filter_new(incode, mbfl_no_encoding_wchar, mime_header_encoder_collector, 0, pe);
2296:
2297: if (pe->encod_filter == NULL ||
2298: pe->encod_filter_backup == NULL ||
2299: pe->conv2_filter == NULL ||
2300: pe->conv2_filter_backup == NULL ||
2301: pe->conv1_filter == NULL) {
2302: mime_header_encoder_delete(pe);
2303: return NULL;
2304: }
2305:
2306: if (transenc == mbfl_no_encoding_qprint) {
2307: pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
2308: pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
2309: } else {
2310: pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
2311: pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
2312: }
2313:
2314: return pe;
2315: }
2316:
2317: void
2318: mime_header_encoder_delete(struct mime_header_encoder_data *pe)
2319: {
2320: if (pe) {
2321: mbfl_convert_filter_delete(pe->conv1_filter);
2322: mbfl_convert_filter_delete(pe->block_filter);
2323: mbfl_convert_filter_delete(pe->conv2_filter);
2324: mbfl_convert_filter_delete(pe->conv2_filter_backup);
2325: mbfl_convert_filter_delete(pe->encod_filter);
2326: mbfl_convert_filter_delete(pe->encod_filter_backup);
2327: mbfl_memory_device_clear(&pe->outdev);
2328: mbfl_memory_device_clear(&pe->tmpdev);
2329: mbfl_free((void*)pe);
2330: }
2331: }
2332:
2333: int
2334: mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
2335: {
2336: return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
2337: }
2338:
2339: mbfl_string *
2340: mbfl_mime_header_encode(
2341: mbfl_string *string,
2342: mbfl_string *result,
2343: enum mbfl_no_encoding outcode,
2344: enum mbfl_no_encoding encoding,
2345: const char *linefeed,
2346: int indent)
2347: {
2348: int n;
2349: unsigned char *p;
2350: struct mime_header_encoder_data *pe;
2351:
2352: mbfl_string_init(result);
2353: result->no_language = string->no_language;
2354: result->no_encoding = mbfl_no_encoding_ascii;
2355:
2356: pe = mime_header_encoder_new(string->no_encoding, outcode, encoding);
2357: if (pe == NULL) {
2358: return NULL;
2359: }
2360:
2361: if (linefeed != NULL) {
2362: n = 0;
2363: while (*linefeed && n < 8) {
2364: pe->lwsp[n++] = *linefeed++;
2365: }
2366: pe->lwsp[n++] = 0x20;
2367: pe->lwsp[n] = '\0';
2368: pe->lwsplen = n;
2369: }
2370: if (indent > 0 && indent < 74) {
2371: pe->firstindent = indent;
2372: }
2373:
2374: n = string->len;
2375: p = string->val;
2376: while (n > 0) {
2377: (*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
2378: n--;
2379: }
2380:
2381: result = mime_header_encoder_result(pe, result);
2382: mime_header_encoder_delete(pe);
2383:
2384: return result;
2385: }
2386:
2387:
2388: /*
2389: * MIME header decode
2390: */
2391: struct mime_header_decoder_data {
2392: mbfl_convert_filter *deco_filter;
2393: mbfl_convert_filter *conv1_filter;
2394: mbfl_convert_filter *conv2_filter;
2395: mbfl_memory_device outdev;
2396: mbfl_memory_device tmpdev;
2397: int cspos;
2398: int status;
2399: enum mbfl_no_encoding encoding;
2400: enum mbfl_no_encoding incode;
2401: enum mbfl_no_encoding outcode;
2402: };
2403:
2404: static int
2405: mime_header_decoder_collector(int c, void* data)
2406: {
2407: const mbfl_encoding *encoding;
2408: struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
2409:
2410: switch (pd->status) {
2411: case 1:
2412: if (c == 0x3f) { /* ? */
2413: mbfl_memory_device_output(c, &pd->tmpdev);
2414: pd->cspos = pd->tmpdev.pos;
2415: pd->status = 2;
2416: } else {
2417: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2418: mbfl_memory_device_reset(&pd->tmpdev);
2419: if (c == 0x3d) { /* = */
2420: mbfl_memory_device_output(c, &pd->tmpdev);
2421: } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2422: pd->status = 9;
2423: } else {
2424: (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2425: pd->status = 0;
2426: }
2427: }
2428: break;
2429: case 2: /* store charset string */
2430: if (c == 0x3f) { /* ? */
2431: /* identify charset */
2432: mbfl_memory_device_output('\0', &pd->tmpdev);
2433: encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2434: if (encoding != NULL) {
2435: pd->incode = encoding->no_encoding;
2436: pd->status = 3;
2437: }
2438: mbfl_memory_device_unput(&pd->tmpdev);
2439: mbfl_memory_device_output(c, &pd->tmpdev);
2440: } else {
2441: mbfl_memory_device_output(c, &pd->tmpdev);
2442: if (pd->tmpdev.pos > 100) { /* too long charset string */
2443: pd->status = 0;
2444: } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2445: mbfl_memory_device_unput(&pd->tmpdev);
2446: pd->status = 9;
2447: }
2448: if (pd->status != 2) {
2449: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2450: mbfl_memory_device_reset(&pd->tmpdev);
2451: }
2452: }
2453: break;
2454: case 3: /* identify encoding */
2455: mbfl_memory_device_output(c, &pd->tmpdev);
2456: if (c == 0x42 || c == 0x62) { /* 'B' or 'b' */
2457: pd->encoding = mbfl_no_encoding_base64;
2458: pd->status = 4;
2459: } else if (c == 0x51 || c == 0x71) { /* 'Q' or 'q' */
2460: pd->encoding = mbfl_no_encoding_qprint;
2461: pd->status = 4;
2462: } else {
2463: if (c == 0x0d || c == 0x0a) { /* CR or LF */
2464: mbfl_memory_device_unput(&pd->tmpdev);
2465: pd->status = 9;
2466: } else {
2467: pd->status = 0;
2468: }
2469: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2470: mbfl_memory_device_reset(&pd->tmpdev);
2471: }
2472: break;
2473: case 4: /* reset filter */
2474: mbfl_memory_device_output(c, &pd->tmpdev);
2475: if (c == 0x3f) { /* ? */
2476: /* charset convert filter */
2477: mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, mbfl_no_encoding_wchar);
2478: /* decode filter */
2479: mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, mbfl_no_encoding_8bit);
2480: pd->status = 5;
2481: } else {
2482: if (c == 0x0d || c == 0x0a) { /* CR or LF */
2483: mbfl_memory_device_unput(&pd->tmpdev);
2484: pd->status = 9;
2485: } else {
2486: pd->status = 0;
2487: }
2488: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2489: }
2490: mbfl_memory_device_reset(&pd->tmpdev);
2491: break;
2492: case 5: /* encoded block */
2493: if (c == 0x3f) { /* ? */
2494: pd->status = 6;
2495: } else {
2496: (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2497: }
2498: break;
2499: case 6: /* check end position */
2500: if (c == 0x3d) { /* = */
2501: /* flush and reset filter */
2502: (*pd->deco_filter->filter_flush)(pd->deco_filter);
2503: (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2504: mbfl_convert_filter_reset(pd->conv1_filter, mbfl_no_encoding_ascii, mbfl_no_encoding_wchar);
2505: pd->status = 7;
2506: } else {
2507: (*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2508: if (c != 0x3f) { /* ? */
2509: (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2510: pd->status = 5;
2511: }
2512: }
2513: break;
2514: case 7: /* after encoded block */
2515: if (c == 0x0d || c == 0x0a) { /* CR LF */
2516: pd->status = 8;
2517: } else {
2518: mbfl_memory_device_output(c, &pd->tmpdev);
2519: if (c == 0x3d) { /* = */
2520: pd->status = 1;
2521: } else if (c != 0x20 && c != 0x09) { /* not space */
2522: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2523: mbfl_memory_device_reset(&pd->tmpdev);
2524: pd->status = 0;
2525: }
2526: }
2527: break;
2528: case 8: /* folding */
2529: case 9: /* folding */
2530: if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2531: if (c == 0x3d) { /* = */
2532: if (pd->status == 8) {
2533: mbfl_memory_device_output(0x20, &pd->tmpdev); /* SPACE */
2534: } else {
2535: (*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2536: }
2537: mbfl_memory_device_output(c, &pd->tmpdev);
2538: pd->status = 1;
2539: } else {
2540: mbfl_memory_device_output(0x20, &pd->tmpdev);
2541: mbfl_memory_device_output(c, &pd->tmpdev);
2542: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2543: mbfl_memory_device_reset(&pd->tmpdev);
2544: pd->status = 0;
2545: }
2546: }
2547: break;
2548: default: /* non encoded block */
2549: if (c == 0x0d || c == 0x0a) { /* CR LF */
2550: pd->status = 9;
2551: } else if (c == 0x3d) { /* = */
2552: mbfl_memory_device_output(c, &pd->tmpdev);
2553: pd->status = 1;
2554: } else {
2555: (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2556: }
2557: break;
2558: }
2559:
2560: return c;
2561: }
2562:
2563: mbfl_string *
2564: mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2565: {
2566: switch (pd->status) {
2567: case 1:
2568: case 2:
2569: case 3:
2570: case 4:
2571: case 7:
2572: case 8:
2573: case 9:
2574: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2575: break;
2576: case 5:
2577: case 6:
2578: (*pd->deco_filter->filter_flush)(pd->deco_filter);
2579: (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2580: break;
2581: }
2582: (*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2583: mbfl_memory_device_reset(&pd->tmpdev);
2584: pd->status = 0;
2585:
2586: return mbfl_memory_device_result(&pd->outdev, result);
2587: }
2588:
2589: struct mime_header_decoder_data*
2590: mime_header_decoder_new(enum mbfl_no_encoding outcode)
2591: {
2592: struct mime_header_decoder_data *pd;
2593:
2594: pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
2595: if (pd == NULL) {
2596: return NULL;
2597: }
2598:
2599: mbfl_memory_device_init(&pd->outdev, 0, 0);
2600: mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2601: pd->cspos = 0;
2602: pd->status = 0;
2603: pd->encoding = mbfl_no_encoding_pass;
2604: pd->incode = mbfl_no_encoding_ascii;
2605: pd->outcode = outcode;
2606: /* charset convert filter */
2607: pd->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2608: pd->conv1_filter = mbfl_convert_filter_new(pd->incode, mbfl_no_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2609: /* decode filter */
2610: pd->deco_filter = mbfl_convert_filter_new(pd->encoding, mbfl_no_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2611:
2612: if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2613: mime_header_decoder_delete(pd);
2614: return NULL;
2615: }
2616:
2617: return pd;
2618: }
2619:
2620: void
2621: mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2622: {
2623: if (pd) {
2624: mbfl_convert_filter_delete(pd->conv2_filter);
2625: mbfl_convert_filter_delete(pd->conv1_filter);
2626: mbfl_convert_filter_delete(pd->deco_filter);
2627: mbfl_memory_device_clear(&pd->outdev);
2628: mbfl_memory_device_clear(&pd->tmpdev);
2629: mbfl_free((void*)pd);
2630: }
2631: }
2632:
2633: int
2634: mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
2635: {
2636: return mime_header_decoder_collector(c, pd);
2637: }
2638:
2639: mbfl_string *
2640: mbfl_mime_header_decode(
2641: mbfl_string *string,
2642: mbfl_string *result,
2643: enum mbfl_no_encoding outcode)
2644: {
2645: int n;
2646: unsigned char *p;
2647: struct mime_header_decoder_data *pd;
2648:
2649: mbfl_string_init(result);
2650: result->no_language = string->no_language;
2651: result->no_encoding = outcode;
2652:
2653: pd = mime_header_decoder_new(outcode);
2654: if (pd == NULL) {
2655: return NULL;
2656: }
2657:
2658: /* feed data */
2659: n = string->len;
2660: p = string->val;
2661: while (n > 0) {
2662: mime_header_decoder_collector(*p++, pd);
2663: n--;
2664: }
2665:
2666: result = mime_header_decoder_result(pd, result);
2667: mime_header_decoder_delete(pd);
2668:
2669: return result;
2670: }
2671:
2672:
2673:
2674: /*
2675: * convert HTML numeric entity
2676: */
2677: struct collector_htmlnumericentity_data {
2678: mbfl_convert_filter *decoder;
2679: int status;
2680: int cache;
2681: int digit;
2682: int *convmap;
2683: int mapsize;
2684: };
2685:
2686: static int
2687: collector_encode_htmlnumericentity(int c, void *data)
2688: {
2689: struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2690: int f, n, s, r, d, size, *mapelm;
2691:
2692: size = pc->mapsize;
2693: f = 0;
2694: n = 0;
2695: while (n < size) {
2696: mapelm = &(pc->convmap[n*4]);
2697: if (c >= mapelm[0] && c <= mapelm[1]) {
2698: s = (c + mapelm[2]) & mapelm[3];
2699: if (s >= 0) {
2700: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2701: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2702: r = 100000000;
2703: s %= r;
2704: while (r > 0) {
2705: d = s/r;
2706: if (d || f) {
2707: f = 1;
2708: s %= r;
2709: (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2710: }
2711: r /= 10;
2712: }
2713: if (!f) {
2714: f = 1;
2715: (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2716: }
2717: (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
2718: }
2719: }
2720: if (f) {
2721: break;
2722: }
2723: n++;
2724: }
2725: if (!f) {
2726: (*pc->decoder->filter_function)(c, pc->decoder);
2727: }
2728:
2729: return c;
2730: }
2731:
2732: static int
2733: collector_decode_htmlnumericentity(int c, void *data)
2734: {
2735: struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2736: int f, n, s, r, d, size, *mapelm;
2737:
2738: switch (pc->status) {
2739: case 1:
2740: if (c == 0x23) { /* '#' */
2741: pc->status = 2;
2742: } else {
2743: pc->status = 0;
2744: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2745: (*pc->decoder->filter_function)(c, pc->decoder);
2746: }
2747: break;
2748: case 2:
1.1.1.2 misho 2749: if (c == 0x78) { /* 'x' */
2750: pc->status = 4;
2751: } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
1.1 misho 2752: pc->cache = c - 0x30;
2753: pc->status = 3;
2754: pc->digit = 1;
2755: } else {
2756: pc->status = 0;
2757: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2758: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2759: (*pc->decoder->filter_function)(c, pc->decoder);
2760: }
2761: break;
2762: case 3:
2763: s = 0;
2764: f = 0;
2765: if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2766: if (pc->digit > 9) {
2767: pc->status = 0;
2768: s = pc->cache;
2769: f = 1;
2770: } else {
2771: s = pc->cache*10 + c - 0x30;
2772: pc->cache = s;
2773: pc->digit++;
2774: }
2775: } else {
2776: pc->status = 0;
2777: s = pc->cache;
2778: f = 1;
2779: n = 0;
2780: size = pc->mapsize;
2781: while (n < size) {
2782: mapelm = &(pc->convmap[n*4]);
2783: d = s - mapelm[2];
2784: if (d >= mapelm[0] && d <= mapelm[1]) {
2785: f = 0;
2786: (*pc->decoder->filter_function)(d, pc->decoder);
2787: if (c != 0x3b) { /* ';' */
2788: (*pc->decoder->filter_function)(c, pc->decoder);
2789: }
2790: break;
2791: }
2792: n++;
2793: }
2794: }
2795: if (f) {
2796: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2797: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2798: r = 1;
2799: n = pc->digit;
2800: while (n > 0) {
2801: r *= 10;
2802: n--;
2803: }
2804: s %= r;
2805: r /= 10;
2806: while (r > 0) {
2807: d = s/r;
2808: s %= r;
2809: r /= 10;
2810: (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2811: }
2812: (*pc->decoder->filter_function)(c, pc->decoder);
2813: }
2814: break;
1.1.1.2 misho 2815: case 4:
2816: if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2817: pc->cache = c - 0x30;
2818: pc->status = 5;
2819: pc->digit = 1;
2820: } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */
2821: pc->cache = c - 0x41 + 10;
2822: pc->status = 5;
2823: pc->digit = 1;
2824: } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f' */
2825: pc->cache = c - 0x61 + 10;
2826: pc->status = 5;
2827: pc->digit = 1;
2828: } else {
2829: pc->status = 0;
2830: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2831: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2832: (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2833: (*pc->decoder->filter_function)(c, pc->decoder);
2834: }
2835: break;
2836: case 5:
2837: s = 0;
2838: f = 0;
2839: if ((c >= 0x30 && c <= 0x39) ||
2840: (c >= 0x41 && c <= 0x46) ||
2841: (c >= 0x61 && c <= 0x66)) { /* '0' - '9' or 'a' - 'f' */
2842: if (pc->digit > 9) {
2843: pc->status = 0;
2844: s = pc->cache;
2845: f = 1;
2846: } else {
2847: if (c >= 0x30 && c <= 0x39) {
2848: s = pc->cache*16 + (c - 0x30);
2849: } else if (c >= 0x41 && c <= 0x46) {
2850: s = pc->cache*16 + (c - 0x41 + 10);
2851: } else {
2852: s = pc->cache*16 + (c - 0x61 + 10);
2853: }
2854: pc->cache = s;
2855: pc->digit++;
2856: }
2857: } else {
2858: pc->status = 0;
2859: s = pc->cache;
2860: f = 1;
2861: n = 0;
2862: size = pc->mapsize;
2863: while (n < size) {
2864: mapelm = &(pc->convmap[n*4]);
2865: d = s - mapelm[2];
2866: if (d >= mapelm[0] && d <= mapelm[1]) {
2867: f = 0;
2868: (*pc->decoder->filter_function)(d, pc->decoder);
2869: if (c != 0x3b) { /* ';' */
2870: (*pc->decoder->filter_function)(c, pc->decoder);
2871: }
2872: break;
2873: }
2874: n++;
2875: }
2876: }
2877: if (f) {
2878: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2879: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2880: (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2881: r = 1;
2882: n = pc->digit;
2883: while (n > 0) {
2884: r *= 16;
2885: n--;
2886: }
2887: s %= r;
2888: r /= 16;
2889: while (r > 0) {
2890: d = s/r;
2891: s %= r;
2892: r /= 16;
2893: (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2894: }
2895: (*pc->decoder->filter_function)(c, pc->decoder);
2896: }
2897: break;
1.1 misho 2898: default:
2899: if (c == 0x26) { /* '&' */
2900: pc->status = 1;
2901: } else {
2902: (*pc->decoder->filter_function)(c, pc->decoder);
2903: }
2904: break;
2905: }
2906:
2907: return c;
2908: }
2909:
1.1.1.2 misho 2910: static int
2911: collector_encode_hex_htmlnumericentity(int c, void *data)
2912: {
2913: struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2914: int f, n, s, r, d, size, *mapelm;
2915:
2916: size = pc->mapsize;
2917: f = 0;
2918: n = 0;
2919: while (n < size) {
2920: mapelm = &(pc->convmap[n*4]);
2921: if (c >= mapelm[0] && c <= mapelm[1]) {
2922: s = (c + mapelm[2]) & mapelm[3];
2923: if (s >= 0) {
2924: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2925: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2926: (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2927: r = 0x1000000;
2928: s %= r;
2929: while (r > 0) {
2930: d = s/r;
2931: if (d || f) {
2932: f = 1;
2933: s %= r;
2934: (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2935: }
2936: r /= 16;
2937: }
2938: if (!f) {
2939: f = 1;
2940: (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2941: }
2942: (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
2943: }
2944: }
2945: if (f) {
2946: break;
2947: }
2948: n++;
2949: }
2950: if (!f) {
2951: (*pc->decoder->filter_function)(c, pc->decoder);
2952: }
2953:
2954: return c;
2955: }
2956:
1.1 misho 2957: int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2958: {
2959: struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2960: int n, s, r, d;
2961:
2962: if (pc->status) {
2963: switch (pc->status) {
2964: case 1: /* '&' */
2965: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2966: break;
2967: case 2: /* '#' */
2968: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2969: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2970: break;
2971: case 3: /* '0'-'9' */
2972: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2973: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2974:
2975: s = pc->cache;
2976: r = 1;
2977: n = pc->digit;
2978: while (n > 0) {
2979: r *= 10;
2980: n--;
2981: }
2982: s %= r;
2983: r /= 10;
2984: while (r > 0) {
2985: d = s/r;
2986: s %= r;
2987: r /= 10;
2988: (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2989: }
2990:
2991: break;
1.1.1.2 misho 2992: case 4: /* 'x' */
2993: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2994: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2995: (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2996: break;
2997: case 5: /* '0'-'9','a'-'f' */
2998: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2999: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
3000: (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
3001:
3002: s = pc->cache;
3003: r = 1;
3004: n = pc->digit;
3005: while (n > 0) {
3006: r *= 16;
3007: n--;
3008: }
3009: s %= r;
3010: r /= 16;
3011: while (r > 0) {
3012: d = s/r;
3013: s %= r;
3014: r /= 16;
3015: (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
3016: }
3017: break;
1.1 misho 3018: default:
3019: break;
3020: }
3021: }
3022:
3023: pc->status = 0;
3024: pc->cache = 0;
3025: pc->digit = 0;
3026:
3027: return 0;
3028: }
3029:
1.1.1.2 misho 3030:
1.1 misho 3031: mbfl_string *
3032: mbfl_html_numeric_entity(
3033: mbfl_string *string,
3034: mbfl_string *result,
3035: int *convmap,
3036: int mapsize,
3037: int type)
3038: {
3039: struct collector_htmlnumericentity_data pc;
3040: mbfl_memory_device device;
3041: mbfl_convert_filter *encoder;
3042: int n;
3043: unsigned char *p;
3044:
3045: if (string == NULL || result == NULL) {
3046: return NULL;
3047: }
3048: mbfl_string_init(result);
3049: result->no_language = string->no_language;
3050: result->no_encoding = string->no_encoding;
3051: mbfl_memory_device_init(&device, string->len, 0);
3052:
3053: /* output code filter */
3054: pc.decoder = mbfl_convert_filter_new(
3055: mbfl_no_encoding_wchar,
3056: string->no_encoding,
3057: mbfl_memory_device_output, 0, &device);
3058: /* wchar filter */
1.1.1.2 misho 3059: if (type == 0) { /* decimal output */
1.1 misho 3060: encoder = mbfl_convert_filter_new(
3061: string->no_encoding,
3062: mbfl_no_encoding_wchar,
3063: collector_encode_htmlnumericentity, 0, &pc);
1.1.1.2 misho 3064: } else if (type == 2) { /* hex output */
3065: encoder = mbfl_convert_filter_new(
3066: string->no_encoding,
3067: mbfl_no_encoding_wchar,
3068: collector_encode_hex_htmlnumericentity, 0, &pc);
3069: } else { /* type == 1: decimal/hex input */
1.1 misho 3070: encoder = mbfl_convert_filter_new(
3071: string->no_encoding,
3072: mbfl_no_encoding_wchar,
1.1.1.2 misho 3073: collector_decode_htmlnumericentity,
1.1 misho 3074: (int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
3075: }
3076: if (pc.decoder == NULL || encoder == NULL) {
3077: mbfl_convert_filter_delete(encoder);
3078: mbfl_convert_filter_delete(pc.decoder);
3079: return NULL;
3080: }
3081: pc.status = 0;
3082: pc.cache = 0;
3083: pc.digit = 0;
3084: pc.convmap = convmap;
3085: pc.mapsize = mapsize;
3086:
3087: /* feed data */
3088: p = string->val;
3089: n = string->len;
3090: if (p != NULL) {
3091: while (n > 0) {
3092: if ((*encoder->filter_function)(*p++, encoder) < 0) {
3093: break;
3094: }
3095: n--;
3096: }
3097: }
3098: mbfl_convert_filter_flush(encoder);
3099: mbfl_convert_filter_flush(pc.decoder);
3100: result = mbfl_memory_device_result(&device, result);
3101: mbfl_convert_filter_delete(encoder);
3102: mbfl_convert_filter_delete(pc.decoder);
3103:
3104: return result;
3105: }
3106:
3107: /*
3108: * Local variables:
3109: * tab-width: 4
3110: * c-basic-offset: 4
3111: * End:
3112: */