Annotation of embedaddon/php/ext/mbstring/libmbfl/mbfl/mbfilter.c, revision 1.1.1.1
1.1 misho 1: /*
2: * charset=UTF-8
3: * vim600: encoding=utf-8
4: */
5:
6: /*
7: * "streamable kanji code filter and converter"
8: *
9: * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
10: *
11: * This software is released under the GNU Lesser General Public License.
12: * (Version 2.1, February 1999)
13: * Please read the following detail of the licence (in japanese).
14: *
15: * ◆使用許諾条件◆
16: *
17: * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
18: * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
19: * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
20: * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
21: * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
22: * することはできません。
23: *
24: * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
25: * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
26: * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
27: * による許諾を得る必要があります。
28: *
29: * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
30: * ます。「GNU Lesser General Public License」とは、これまでLibrary General
31: * Public Licenseと呼ばれていたものです。
32: * http://www.gnu.org/ --- GNUウェブサイト
33: * http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
34: * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
35: *
36: * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
37: * はありません。
38: *
39: * ◆保証内容◆
40: *
41: * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
42: * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
43: * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
44: * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
45: * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
46: * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
47: * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
48: * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
49: * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
50: * 契約・規定に優先します。
51: *
52: * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
53: *
54: * 〒102-0073
55: * 東京都千代田区九段北1-13-5日本地所第一ビル4F
56: * 株式会社ハッピーサイズ
57: * Phone: 03-3512-3655, Fax: 03-3512-3656
58: * Email: sales@happysize.co.jp
59: * Web: http://happysize.com/
60: *
61: * ◆著者◆
62: *
63: * 金本 茂 <sgk@happysize.co.jp>
64: *
65: * ◆履歴◆
66: *
67: * 1998/11/10 sgk implementation in C++
68: * 1999/4/25 sgk Cで書きなおし。
69: * 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
70: * 1999/6/?? Unicodeサポート。
71: * 1999/6/22 sgk ライセンスをLGPLに変更。
72: *
73: */
74:
75: /*
76: * Unicode support
77: *
78: * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
79: * All rights reserved.
80: *
81: */
82:
83:
84: #ifdef HAVE_CONFIG_H
85: #include "config.h"
86: #endif
87:
88: #include <stddef.h>
89:
90: #ifdef HAVE_STRING_H
91: #include <string.h>
92: #endif
93:
94: #ifdef HAVE_STRINGS_H
95: #include <strings.h>
96: #endif
97:
98: #ifdef HAVE_STDDEF_H
99: #include <stddef.h>
100: #endif
101:
102: #include "mbfilter.h"
103: #include "mbfl_filter_output.h"
104: #include "mbfilter_pass.h"
105: #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
106:
107: #include "eaw_table.h"
108:
109: /* hex character table "0123456789ABCDEF" */
110: static char mbfl_hexchar_table[] = {
111: 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
112: };
113:
114:
115:
116: /*
117: * encoding filter
118: */
119: #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
120:
121:
122: /*
123: * buffering converter
124: */
125: mbfl_buffer_converter *
126: mbfl_buffer_converter_new(
127: enum mbfl_no_encoding from,
128: enum mbfl_no_encoding to,
129: int buf_initsz)
130: {
131: mbfl_buffer_converter *convd;
132:
133: /* allocate */
134: convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof (mbfl_buffer_converter));
135: if (convd == NULL) {
136: return NULL;
137: }
138:
139: /* initialize */
140: convd->from = mbfl_no2encoding(from);
141: convd->to = mbfl_no2encoding(to);
142: if (convd->from == NULL) {
143: convd->from = &mbfl_encoding_pass;
144: }
145: if (convd->to == NULL) {
146: convd->to = &mbfl_encoding_pass;
147: }
148:
149: /* create convert filter */
150: convd->filter1 = NULL;
151: convd->filter2 = NULL;
152: if (mbfl_convert_filter_get_vtbl(convd->from->no_encoding, convd->to->no_encoding) != NULL) {
153: convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
154: } else {
155: convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
156: if (convd->filter2 != NULL) {
157: convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding,
158: mbfl_no_encoding_wchar,
159: (int (*)(int, void*))convd->filter2->filter_function,
160: (int (*)(void*))convd->filter2->filter_flush,
161: convd->filter2);
162: if (convd->filter1 == NULL) {
163: mbfl_convert_filter_delete(convd->filter2);
164: }
165: }
166: }
167: if (convd->filter1 == NULL) {
168: return NULL;
169: }
170:
171: mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
172:
173: return convd;
174: }
175:
176: void
177: mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
178: {
179: if (convd != NULL) {
180: if (convd->filter1) {
181: mbfl_convert_filter_delete(convd->filter1);
182: }
183: if (convd->filter2) {
184: mbfl_convert_filter_delete(convd->filter2);
185: }
186: mbfl_memory_device_clear(&convd->device);
187: mbfl_free((void*)convd);
188: }
189: }
190:
191: void
192: mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
193: {
194: mbfl_memory_device_reset(&convd->device);
195: }
196:
197: int
198: mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
199: {
200: if (convd != NULL) {
201: if (convd->filter2 != NULL) {
202: convd->filter2->illegal_mode = mode;
203: } else if (convd->filter1 != NULL) {
204: convd->filter1->illegal_mode = mode;
205: } else {
206: return 0;
207: }
208: }
209:
210: return 1;
211: }
212:
213: int
214: mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
215: {
216: if (convd != NULL) {
217: if (convd->filter2 != NULL) {
218: convd->filter2->illegal_substchar = substchar;
219: } else if (convd->filter1 != NULL) {
220: convd->filter1->illegal_substchar = substchar;
221: } else {
222: return 0;
223: }
224: }
225:
226: return 1;
227: }
228:
229: int
230: mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n)
231: {
232: mbfl_convert_filter *filter;
233: int (*filter_function)(int c, mbfl_convert_filter *filter);
234:
235: if (convd != NULL && p != NULL) {
236: filter = convd->filter1;
237: if (filter != NULL) {
238: filter_function = filter->filter_function;
239: while (n > 0) {
240: if ((*filter_function)(*p++, filter) < 0) {
241: break;
242: }
243: n--;
244: }
245: }
246: }
247:
248: return n;
249: }
250:
251: int
252: mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
253: {
254: int n;
255: unsigned char *p;
256: mbfl_convert_filter *filter;
257: int (*filter_function)(int c, mbfl_convert_filter *filter);
258:
259: if (convd == NULL || string == NULL) {
260: return -1;
261: }
262: mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
263: /* feed data */
264: n = string->len;
265: p = string->val;
266: filter = convd->filter1;
267: if (filter != NULL) {
268: filter_function = filter->filter_function;
269: while (n > 0) {
270: if ((*filter_function)(*p++, filter) < 0) {
271: return -1;
272: }
273: n--;
274: }
275: }
276:
277: return 0;
278: }
279:
280: int
281: mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
282: {
283: if (convd == NULL) {
284: return -1;
285: }
286:
287: if (convd->filter1 != NULL) {
288: mbfl_convert_filter_flush(convd->filter1);
289: }
290: if (convd->filter2 != NULL) {
291: mbfl_convert_filter_flush(convd->filter2);
292: }
293:
294: return 0;
295: }
296:
297: mbfl_string *
298: mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
299: {
300: if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
301: result->no_encoding = convd->to->no_encoding;
302: result->val = convd->device.buffer;
303: result->len = convd->device.pos;
304: } else {
305: result = NULL;
306: }
307:
308: return result;
309: }
310:
311: mbfl_string *
312: mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
313: {
314: if (convd == NULL || result == NULL) {
315: return NULL;
316: }
317: result->no_encoding = convd->to->no_encoding;
318: return mbfl_memory_device_result(&convd->device, result);
319: }
320:
321: mbfl_string *
322: mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
323: mbfl_string *result)
324: {
325: if (convd == NULL || string == NULL || result == NULL) {
326: return NULL;
327: }
328: mbfl_buffer_converter_feed(convd, string);
329: if (convd->filter1 != NULL) {
330: mbfl_convert_filter_flush(convd->filter1);
331: }
332: if (convd->filter2 != NULL) {
333: mbfl_convert_filter_flush(convd->filter2);
334: }
335: result->no_encoding = convd->to->no_encoding;
336: return mbfl_memory_device_result(&convd->device, result);
337: }
338:
339: int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
340: {
341: int num_illegalchars = 0;
342:
343: if (convd == NULL) {
344: return 0;
345: }
346:
347: if (convd->filter1 != NULL) {
348: num_illegalchars += convd->filter1->num_illegalchar;
349: }
350:
351: if (convd->filter2 != NULL) {
352: num_illegalchars += convd->filter2->num_illegalchar;
353: }
354:
355: return (num_illegalchars);
356: }
357:
358: /*
359: * encoding detector
360: */
361: mbfl_encoding_detector *
362: mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict)
363: {
364: mbfl_encoding_detector *identd;
365:
366: int i, num;
367: mbfl_identify_filter *filter;
368:
369: if (elist == NULL || elistsz <= 0) {
370: return NULL;
371: }
372:
373: /* allocate */
374: identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
375: if (identd == NULL) {
376: return NULL;
377: }
378: identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
379: if (identd->filter_list == NULL) {
380: mbfl_free(identd);
381: return NULL;
382: }
383:
384: /* create filters */
385: i = 0;
386: num = 0;
387: while (i < elistsz) {
388: filter = mbfl_identify_filter_new(elist[i]);
389: if (filter != NULL) {
390: identd->filter_list[num] = filter;
391: num++;
392: }
393: i++;
394: }
395: identd->filter_list_size = num;
396:
397: /* set strict flag */
398: identd->strict = strict;
399:
400: return identd;
401: }
402:
403: void
404: mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
405: {
406: int i;
407:
408: if (identd != NULL) {
409: if (identd->filter_list != NULL) {
410: i = identd->filter_list_size;
411: while (i > 0) {
412: i--;
413: mbfl_identify_filter_delete(identd->filter_list[i]);
414: }
415: mbfl_free((void *)identd->filter_list);
416: }
417: mbfl_free((void *)identd);
418: }
419: }
420:
421: int
422: mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
423: {
424: int i, n, num, bad, res;
425: unsigned char *p;
426: mbfl_identify_filter *filter;
427:
428: res = 0;
429: /* feed data */
430: if (identd != NULL && string != NULL && string->val != NULL) {
431: num = identd->filter_list_size;
432: n = string->len;
433: p = string->val;
434: bad = 0;
435: while (n > 0) {
436: for (i = 0; i < num; i++) {
437: filter = identd->filter_list[i];
438: if (!filter->flag) {
439: (*filter->filter_function)(*p, filter);
440: if (filter->flag) {
441: bad++;
442: }
443: }
444: }
445: if ((num - 1) <= bad) {
446: res = 1;
447: break;
448: }
449: p++;
450: n--;
451: }
452: }
453:
454: return res;
455: }
456:
457: enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
458: {
459: mbfl_identify_filter *filter;
460: enum mbfl_no_encoding encoding;
461: int n;
462:
463: /* judge */
464: encoding = mbfl_no_encoding_invalid;
465: if (identd != NULL) {
466: n = identd->filter_list_size - 1;
467: while (n >= 0) {
468: filter = identd->filter_list[n];
469: if (!filter->flag) {
470: if (!identd->strict || !filter->status) {
471: encoding = filter->encoding->no_encoding;
472: }
473: }
474: n--;
475: }
476:
477: /* fallback judge */
478: if (encoding == mbfl_no_encoding_invalid) {
479: n = identd->filter_list_size - 1;
480: while (n >= 0) {
481: filter = identd->filter_list[n];
482: if (!filter->flag) {
483: encoding = filter->encoding->no_encoding;
484: }
485: n--;
486: }
487: }
488: }
489:
490: return encoding;
491: }
492:
493:
494: /*
495: * encoding converter
496: */
497: mbfl_string *
498: mbfl_convert_encoding(
499: mbfl_string *string,
500: mbfl_string *result,
501: enum mbfl_no_encoding toenc)
502: {
503: int n;
504: unsigned char *p;
505: const mbfl_encoding *encoding;
506: mbfl_memory_device device;
507: mbfl_convert_filter *filter1;
508: mbfl_convert_filter *filter2;
509:
510: /* initialize */
511: encoding = mbfl_no2encoding(toenc);
512: if (encoding == NULL || string == NULL || result == NULL) {
513: return NULL;
514: }
515:
516: filter1 = NULL;
517: filter2 = NULL;
518: if (mbfl_convert_filter_get_vtbl(string->no_encoding, toenc) != NULL) {
519: filter1 = mbfl_convert_filter_new(string->no_encoding, toenc, mbfl_memory_device_output, 0, &device);
520: } else {
521: filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
522: if (filter2 != NULL) {
523: filter1 = mbfl_convert_filter_new(string->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
524: if (filter1 == NULL) {
525: mbfl_convert_filter_delete(filter2);
526: }
527: }
528: }
529: if (filter1 == NULL) {
530: return NULL;
531: }
532:
533: if (filter2 != NULL) {
534: filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
535: filter2->illegal_substchar = 0x3f; /* '?' */
536: }
537:
538: mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
539:
540: /* feed data */
541: n = string->len;
542: p = string->val;
543: if (p != NULL) {
544: while (n > 0) {
545: if ((*filter1->filter_function)(*p++, filter1) < 0) {
546: break;
547: }
548: n--;
549: }
550: }
551:
552: mbfl_convert_filter_flush(filter1);
553: mbfl_convert_filter_delete(filter1);
554: if (filter2 != NULL) {
555: mbfl_convert_filter_flush(filter2);
556: mbfl_convert_filter_delete(filter2);
557: }
558:
559: return mbfl_memory_device_result(&device, result);
560: }
561:
562:
563: /*
564: * identify encoding
565: */
566: const mbfl_encoding *
567: mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
568: {
569: int i, n, num, bad;
570: unsigned char *p;
571: mbfl_identify_filter *flist, *filter;
572: const mbfl_encoding *encoding;
573:
574: /* flist is an array of mbfl_identify_filter instances */
575: flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
576: if (flist == NULL) {
577: return NULL;
578: }
579:
580: num = 0;
581: if (elist != NULL) {
582: for (i = 0; i < elistsz; i++) {
583: if (!mbfl_identify_filter_init(&flist[num], elist[i])) {
584: num++;
585: }
586: }
587: }
588:
589: /* feed data */
590: n = string->len;
591: p = string->val;
592:
593: if (p != NULL) {
594: bad = 0;
595: while (n > 0) {
596: for (i = 0; i < num; i++) {
597: filter = &flist[i];
598: if (!filter->flag) {
599: (*filter->filter_function)(*p, filter);
600: if (filter->flag) {
601: bad++;
602: }
603: }
604: }
605: if ((num - 1) <= bad && !strict) {
606: break;
607: }
608: p++;
609: n--;
610: }
611: }
612:
613: /* judge */
614: encoding = NULL;
615:
616: for (i = 0; i < num; i++) {
617: filter = &flist[i];
618: if (!filter->flag) {
619: if (strict && filter->status) {
620: continue;
621: }
622: encoding = filter->encoding;
623: break;
624: }
625: }
626:
627: /* fall-back judge */
628: if (!encoding) {
629: for (i = 0; i < num; i++) {
630: filter = &flist[i];
631: if (!filter->flag && (!strict || !filter->status)) {
632: encoding = filter->encoding;
633: break;
634: }
635: }
636: }
637:
638: /* cleanup */
639: /* dtors should be called in reverse order */
640: i = num; while (--i >= 0) {
641: mbfl_identify_filter_cleanup(&flist[i]);
642: }
643:
644: mbfl_free((void *)flist);
645:
646: return encoding;
647: }
648:
649: const char*
650: mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
651: {
652: const mbfl_encoding *encoding;
653:
654: encoding = mbfl_identify_encoding(string, elist, elistsz, strict);
655: if (encoding != NULL &&
656: encoding->no_encoding > mbfl_no_encoding_charset_min &&
657: encoding->no_encoding < mbfl_no_encoding_charset_max) {
658: return encoding->name;
659: } else {
660: return NULL;
661: }
662: }
663:
664: enum mbfl_no_encoding
665: mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
666: {
667: const mbfl_encoding *encoding;
668:
669: encoding = mbfl_identify_encoding(string, elist, elistsz, strict);
670: if (encoding != NULL &&
671: encoding->no_encoding > mbfl_no_encoding_charset_min &&
672: encoding->no_encoding < mbfl_no_encoding_charset_max) {
673: return encoding->no_encoding;
674: } else {
675: return mbfl_no_encoding_invalid;
676: }
677: }
678:
679:
680: /*
681: * strlen
682: */
683: static int
684: filter_count_output(int c, void *data)
685: {
686: (*(int *)data)++;
687: return c;
688: }
689:
690: int
691: mbfl_strlen(mbfl_string *string)
692: {
693: int len, n, m, k;
694: unsigned char *p;
695: const unsigned char *mbtab;
696: const mbfl_encoding *encoding;
697:
698: encoding = mbfl_no2encoding(string->no_encoding);
699: if (encoding == NULL || string == NULL) {
700: return -1;
701: }
702:
703: len = 0;
704: if (encoding->flag & MBFL_ENCTYPE_SBCS) {
705: len = string->len;
706: } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
707: len = string->len/2;
708: } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
709: len = string->len/4;
710: } else if (encoding->mblen_table != NULL) {
711: mbtab = encoding->mblen_table;
712: n = 0;
713: p = string->val;
714: k = string->len;
715: /* count */
716: if (p != NULL) {
717: while (n < k) {
718: m = mbtab[*p];
719: n += m;
720: p += m;
721: len++;
722: };
723: }
724: } else {
725: /* wchar filter */
726: mbfl_convert_filter *filter = mbfl_convert_filter_new(
727: string->no_encoding,
728: mbfl_no_encoding_wchar,
729: filter_count_output, 0, &len);
730: if (filter == NULL) {
731: return -1;
732: }
733: /* count */
734: n = string->len;
735: p = string->val;
736: if (p != NULL) {
737: while (n > 0) {
738: (*filter->filter_function)(*p++, filter);
739: n--;
740: }
741: }
742: mbfl_convert_filter_delete(filter);
743: }
744:
745: return len;
746: }
747:
748:
749: /*
750: * strpos
751: */
752: struct collector_strpos_data {
753: mbfl_convert_filter *next_filter;
754: mbfl_wchar_device needle;
755: int needle_len;
756: int start;
757: int output;
758: int found_pos;
759: int needle_pos;
760: int matched_pos;
761: };
762:
763: static int
764: collector_strpos(int c, void* data)
765: {
766: int *p, *h, *m, n;
767: struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
768:
769: if (pc->output >= pc->start) {
770: if (c == (int)pc->needle.buffer[pc->needle_pos]) {
771: if (pc->needle_pos == 0) {
772: pc->found_pos = pc->output; /* found position */
773: }
774: pc->needle_pos++; /* needle pointer */
775: if (pc->needle_pos >= pc->needle_len) {
776: pc->matched_pos = pc->found_pos; /* matched position */
777: pc->needle_pos--;
778: goto retry;
779: }
780: } else if (pc->needle_pos != 0) {
781: retry:
782: h = (int *)pc->needle.buffer;
783: h++;
784: for (;;) {
785: pc->found_pos++;
786: p = h;
787: m = (int *)pc->needle.buffer;
788: n = pc->needle_pos - 1;
789: while (n > 0 && *p == *m) {
790: n--;
791: p++;
792: m++;
793: }
794: if (n <= 0) {
795: if (*m != c) {
796: pc->needle_pos = 0;
797: }
798: break;
799: } else {
800: h++;
801: pc->needle_pos--;
802: }
803: }
804: }
805: }
806:
807: pc->output++;
808: return c;
809: }
810:
811: /*
812: * oddlen
813: */
814: int
815: mbfl_oddlen(mbfl_string *string)
816: {
817: int len, n, m, k;
818: unsigned char *p;
819: const unsigned char *mbtab;
820: const mbfl_encoding *encoding;
821:
822:
823: if (string == NULL) {
824: return -1;
825: }
826: encoding = mbfl_no2encoding(string->no_encoding);
827: if (encoding == NULL) {
828: return -1;
829: }
830:
831: len = 0;
832: if (encoding->flag & MBFL_ENCTYPE_SBCS) {
833: return 0;
834: } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
835: return len % 2;
836: } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
837: return len % 4;
838: } else if (encoding->mblen_table != NULL) {
839: mbtab = encoding->mblen_table;
840: n = 0;
841: p = string->val;
842: k = string->len;
843: /* count */
844: if (p != NULL) {
845: while (n < k) {
846: m = mbtab[*p];
847: n += m;
848: p += m;
849: };
850: }
851: return n-k;
852: } else {
853: /* how can i do ? */
854: return 0;
855: }
856: /* NOT REACHED */
857: }
858:
859: int
860: mbfl_strpos(
861: mbfl_string *haystack,
862: mbfl_string *needle,
863: int offset,
864: int reverse)
865: {
866: int result;
867: mbfl_string _haystack_u8, _needle_u8;
868: const mbfl_string *haystack_u8, *needle_u8;
869: const unsigned char *u8_tbl;
870:
871: if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
872: return -8;
873: }
874:
875: {
876: const mbfl_encoding *u8_enc;
877: u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8);
878: if (u8_enc == NULL || u8_enc->mblen_table == NULL) {
879: return -8;
880: }
881: u8_tbl = u8_enc->mblen_table;
882: }
883:
884: if (haystack->no_encoding != mbfl_no_encoding_utf8) {
885: mbfl_string_init(&_haystack_u8);
886: haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8);
887: if (haystack_u8 == NULL) {
888: result = -4;
889: goto out;
890: }
891: } else {
892: haystack_u8 = haystack;
893: }
894:
895: if (needle->no_encoding != mbfl_no_encoding_utf8) {
896: mbfl_string_init(&_needle_u8);
897: needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8);
898: if (needle_u8 == NULL) {
899: result = -4;
900: goto out;
901: }
902: } else {
903: needle_u8 = needle;
904: }
905:
906: if (needle_u8->len < 1) {
907: result = -8;
908: goto out;
909: }
910:
911: result = -1;
912: if (haystack_u8->len < needle_u8->len) {
913: goto out;
914: }
915:
916: if (!reverse) {
917: unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
918: unsigned int needle_u8_len = needle_u8->len;
919: unsigned int i;
920: const unsigned char *p, *q, *e;
921: const unsigned char *haystack_u8_val = haystack_u8->val,
922: *needle_u8_val = needle_u8->val;
923: for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
924: jtbl[i] = needle_u8_len + 1;
925: }
926: for (i = 0; i < needle_u8_len - 1; ++i) {
927: jtbl[needle_u8_val[i]] = needle_u8_len - i;
928: }
929: e = haystack_u8_val + haystack_u8->len;
930: p = haystack_u8_val;
931: while (--offset >= 0) {
932: if (p >= e) {
933: result = -16;
934: goto out;
935: }
936: p += u8_tbl[*p];
937: }
938: p += needle_u8_len;
939: if (p > e) {
940: goto out;
941: }
942: while (p <= e) {
943: const unsigned char *pv = p;
944: q = needle_u8_val + needle_u8_len;
945: for (;;) {
946: if (q == needle_u8_val) {
947: result = 0;
948: while (p > haystack_u8_val) {
949: unsigned char c = *--p;
950: if (c < 0x80) {
951: ++result;
952: } else if ((c & 0xc0) != 0x80) {
953: ++result;
954: }
955: }
956: goto out;
957: }
958: if (*--q != *--p) {
959: break;
960: }
961: }
962: p += jtbl[*p];
963: if (p <= pv) {
964: p = pv + 1;
965: }
966: }
967: } else {
968: unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
969: unsigned int needle_u8_len = needle_u8->len, needle_len = 0;
970: unsigned int i;
971: const unsigned char *p, *e, *q, *qe;
972: const unsigned char *haystack_u8_val = haystack_u8->val,
973: *needle_u8_val = needle_u8->val;
974: for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
975: jtbl[i] = needle_u8_len;
976: }
977: for (i = needle_u8_len - 1; i > 0; --i) {
978: unsigned char c = needle_u8_val[i];
979: jtbl[c] = i;
980: if (c < 0x80) {
981: ++needle_len;
982: } else if ((c & 0xc0) != 0x80) {
983: ++needle_len;
984: }
985: }
986: {
987: unsigned char c = needle_u8_val[0];
988: if (c < 0x80) {
989: ++needle_len;
990: } else if ((c & 0xc0) != 0x80) {
991: ++needle_len;
992: }
993: }
994: e = haystack_u8_val;
995: p = e + haystack_u8->len;
996: qe = needle_u8_val + needle_u8_len;
997: if (offset < 0) {
998: if (-offset > needle_len) {
999: offset += needle_len;
1000: while (offset < 0) {
1001: unsigned char c;
1002: if (p <= e) {
1003: result = -16;
1004: goto out;
1005: }
1006: c = *(--p);
1007: if (c < 0x80) {
1008: ++offset;
1009: } else if ((c & 0xc0) != 0x80) {
1010: ++offset;
1011: }
1012: }
1013: }
1014: } else {
1015: const unsigned char *ee = haystack_u8_val + haystack_u8->len;
1016: while (--offset >= 0) {
1017: if (e >= ee) {
1018: result = -16;
1019: goto out;
1020: }
1021: e += u8_tbl[*e];
1022: }
1023: }
1024: if (p < e + needle_u8_len) {
1025: goto out;
1026: }
1027: p -= needle_u8_len;
1028: while (p >= e) {
1029: const unsigned char *pv = p;
1030: q = needle_u8_val;
1031: for (;;) {
1032: if (q == qe) {
1033: result = 0;
1034: p -= needle_u8_len;
1035: while (p > haystack_u8_val) {
1036: unsigned char c = *--p;
1037: if (c < 0x80) {
1038: ++result;
1039: } else if ((c & 0xc0) != 0x80) {
1040: ++result;
1041: }
1042: }
1043: goto out;
1044: }
1045: if (*q != *p) {
1046: break;
1047: }
1048: ++p, ++q;
1049: }
1050: p -= jtbl[*p];
1051: if (p >= pv) {
1052: p = pv - 1;
1053: }
1054: }
1055: }
1056: out:
1057: if (haystack_u8 == &_haystack_u8) {
1058: mbfl_string_clear(&_haystack_u8);
1059: }
1060: if (needle_u8 == &_needle_u8) {
1061: mbfl_string_clear(&_needle_u8);
1062: }
1063: return result;
1064: }
1065:
1066: /*
1067: * substr_count
1068: */
1069:
1070: int
1071: mbfl_substr_count(
1072: mbfl_string *haystack,
1073: mbfl_string *needle
1074: )
1075: {
1076: int n, result = 0;
1077: unsigned char *p;
1078: mbfl_convert_filter *filter;
1079: struct collector_strpos_data pc;
1080:
1081: if (haystack == NULL || needle == NULL) {
1082: return -8;
1083: }
1084: /* needle is converted into wchar */
1085: mbfl_wchar_device_init(&pc.needle);
1086: filter = mbfl_convert_filter_new(
1087: needle->no_encoding,
1088: mbfl_no_encoding_wchar,
1089: mbfl_wchar_device_output, 0, &pc.needle);
1090: if (filter == NULL) {
1091: return -4;
1092: }
1093: p = needle->val;
1094: n = needle->len;
1095: if (p != NULL) {
1096: while (n > 0) {
1097: if ((*filter->filter_function)(*p++, filter) < 0) {
1098: break;
1099: }
1100: n--;
1101: }
1102: }
1103: mbfl_convert_filter_flush(filter);
1104: mbfl_convert_filter_delete(filter);
1105: pc.needle_len = pc.needle.pos;
1106: if (pc.needle.buffer == NULL) {
1107: return -4;
1108: }
1109: if (pc.needle_len <= 0) {
1110: mbfl_wchar_device_clear(&pc.needle);
1111: return -2;
1112: }
1113: /* initialize filter and collector data */
1114: filter = mbfl_convert_filter_new(
1115: haystack->no_encoding,
1116: mbfl_no_encoding_wchar,
1117: collector_strpos, 0, &pc);
1118: if (filter == NULL) {
1119: mbfl_wchar_device_clear(&pc.needle);
1120: return -4;
1121: }
1122: pc.start = 0;
1123: pc.output = 0;
1124: pc.needle_pos = 0;
1125: pc.found_pos = 0;
1126: pc.matched_pos = -1;
1127:
1128: /* feed data */
1129: p = haystack->val;
1130: n = haystack->len;
1131: if (p != NULL) {
1132: while (n > 0) {
1133: if ((*filter->filter_function)(*p++, filter) < 0) {
1134: pc.matched_pos = -4;
1135: break;
1136: }
1137: if (pc.matched_pos >= 0) {
1138: ++result;
1139: pc.matched_pos = -1;
1140: pc.needle_pos = 0;
1141: }
1142: n--;
1143: }
1144: }
1145: mbfl_convert_filter_flush(filter);
1146: mbfl_convert_filter_delete(filter);
1147: mbfl_wchar_device_clear(&pc.needle);
1148:
1149: return result;
1150: }
1151:
1152: /*
1153: * substr
1154: */
1155: struct collector_substr_data {
1156: mbfl_convert_filter *next_filter;
1157: int start;
1158: int stop;
1159: int output;
1160: };
1161:
1162: static int
1163: collector_substr(int c, void* data)
1164: {
1165: struct collector_substr_data *pc = (struct collector_substr_data*)data;
1166:
1167: if (pc->output >= pc->stop) {
1168: return -1;
1169: }
1170:
1171: if (pc->output >= pc->start) {
1172: (*pc->next_filter->filter_function)(c, pc->next_filter);
1173: }
1174:
1175: pc->output++;
1176:
1177: return c;
1178: }
1179:
1180: mbfl_string *
1181: mbfl_substr(
1182: mbfl_string *string,
1183: mbfl_string *result,
1184: int from,
1185: int length)
1186: {
1187: const mbfl_encoding *encoding;
1188: int n, m, k, len, start, end;
1189: unsigned char *p, *w;
1190: const unsigned char *mbtab;
1191:
1192: encoding = mbfl_no2encoding(string->no_encoding);
1193: if (encoding == NULL || string == NULL || result == NULL) {
1194: return NULL;
1195: }
1196: mbfl_string_init(result);
1197: result->no_language = string->no_language;
1198: result->no_encoding = string->no_encoding;
1199:
1200: if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
1201: encoding->mblen_table != NULL) {
1202: len = string->len;
1203: start = from;
1204: end = from + length;
1205: if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1206: start *= 2;
1207: end = start + length*2;
1208: } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1209: start *= 4;
1210: end = start + length*4;
1211: } else if (encoding->mblen_table != NULL) {
1212: mbtab = encoding->mblen_table;
1213: start = 0;
1214: end = 0;
1215: n = 0;
1216: k = 0;
1217: p = string->val;
1218: if (p != NULL) {
1219: /* search start position */
1220: while (k <= from) {
1221: start = n;
1222: if (n >= len) {
1223: break;
1224: }
1225: m = mbtab[*p];
1226: n += m;
1227: p += m;
1228: k++;
1229: }
1230: /* detect end position */
1231: k = 0;
1232: end = start;
1233: while (k < length) {
1234: end = n;
1235: if (n >= len) {
1236: break;
1237: }
1238: m = mbtab[*p];
1239: n += m;
1240: p += m;
1241: k++;
1242: }
1243: }
1244: }
1245:
1246: if (start > len) {
1247: start = len;
1248: }
1249: if (start < 0) {
1250: start = 0;
1251: }
1252: if (end > len) {
1253: end = len;
1254: }
1255: if (end < 0) {
1256: end = 0;
1257: }
1258: if (start > end) {
1259: start = end;
1260: }
1261:
1262: /* allocate memory and copy */
1263: n = end - start;
1264: result->len = 0;
1265: result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char));
1266: if (w != NULL) {
1267: p = string->val;
1268: if (p != NULL) {
1269: p += start;
1270: result->len = n;
1271: while (n > 0) {
1272: *w++ = *p++;
1273: n--;
1274: }
1275: }
1276: *w++ = '\0';
1277: *w++ = '\0';
1278: *w++ = '\0';
1279: *w = '\0';
1280: } else {
1281: result = NULL;
1282: }
1283: } else {
1284: mbfl_memory_device device;
1285: struct collector_substr_data pc;
1286: mbfl_convert_filter *decoder;
1287: mbfl_convert_filter *encoder;
1288:
1289: mbfl_memory_device_init(&device, length + 1, 0);
1290: mbfl_string_init(result);
1291: result->no_language = string->no_language;
1292: result->no_encoding = string->no_encoding;
1293: /* output code filter */
1294: decoder = mbfl_convert_filter_new(
1295: mbfl_no_encoding_wchar,
1296: string->no_encoding,
1297: mbfl_memory_device_output, 0, &device);
1298: /* wchar filter */
1299: encoder = mbfl_convert_filter_new(
1300: string->no_encoding,
1301: mbfl_no_encoding_wchar,
1302: collector_substr, 0, &pc);
1303: if (decoder == NULL || encoder == NULL) {
1304: mbfl_convert_filter_delete(encoder);
1305: mbfl_convert_filter_delete(decoder);
1306: return NULL;
1307: }
1308: pc.next_filter = decoder;
1309: pc.start = from;
1310: pc.stop = from + length;
1311: pc.output = 0;
1312:
1313: /* feed data */
1314: p = string->val;
1315: n = string->len;
1316: if (p != NULL) {
1317: while (n > 0) {
1318: if ((*encoder->filter_function)(*p++, encoder) < 0) {
1319: break;
1320: }
1321: n--;
1322: }
1323: }
1324:
1325: mbfl_convert_filter_flush(encoder);
1326: mbfl_convert_filter_flush(decoder);
1327: result = mbfl_memory_device_result(&device, result);
1328: mbfl_convert_filter_delete(encoder);
1329: mbfl_convert_filter_delete(decoder);
1330: }
1331:
1332: return result;
1333: }
1334:
1335: /*
1336: * strcut
1337: */
1338: mbfl_string *
1339: mbfl_strcut(
1340: mbfl_string *string,
1341: mbfl_string *result,
1342: int from,
1343: int length)
1344: {
1345: const mbfl_encoding *encoding;
1346: mbfl_memory_device device;
1347:
1348: /* validate the parameters */
1349: if (string == NULL || string->val == NULL || result == NULL) {
1350: return NULL;
1351: }
1352:
1353: if (from < 0 || length < 0) {
1354: return NULL;
1355: }
1356:
1357: if (from >= string->len) {
1358: from = string->len;
1359: }
1360:
1361: encoding = mbfl_no2encoding(string->no_encoding);
1362: if (encoding == NULL) {
1363: return NULL;
1364: }
1365:
1366: mbfl_string_init(result);
1367: result->no_language = string->no_language;
1368: result->no_encoding = string->no_encoding;
1369:
1370: if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1371: | MBFL_ENCTYPE_WCS2BE
1372: | MBFL_ENCTYPE_WCS2LE
1373: | MBFL_ENCTYPE_WCS4BE
1374: | MBFL_ENCTYPE_WCS4LE))
1375: || encoding->mblen_table != NULL) {
1376: const unsigned char *start = NULL;
1377: const unsigned char *end = NULL;
1378: unsigned char *w;
1379: unsigned int sz;
1380:
1381: if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1382: from &= -2;
1383:
1384: if (from + length >= string->len) {
1385: length = string->len - from;
1386: }
1387:
1388: start = string->val + from;
1389: end = start + (length & -2);
1390: } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1391: from &= -4;
1392:
1393: if (from + length >= string->len) {
1394: length = string->len - from;
1395: }
1396:
1397: start = string->val + from;
1398: end = start + (length & -4);
1399: } else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1400: if (from + length >= string->len) {
1401: length = string->len - from;
1402: }
1403:
1404: start = string->val + from;
1405: end = start + length;
1406: } else if (encoding->mblen_table != NULL) {
1407: const unsigned char *mbtab = encoding->mblen_table;
1408: const unsigned char *p, *q;
1409: int m;
1410:
1411: /* search start position */
1412: for (m = 0, p = string->val, q = p + from;
1413: p < q; p += (m = mbtab[*p]));
1414:
1415: if (p > q) {
1416: p -= m;
1417: }
1418:
1419: start = p;
1420:
1421: /* search end position */
1422: if ((start - string->val) + length >= (int)string->len) {
1423: end = string->val + string->len;
1424: } else {
1425: for (q = p + length; p < q; p += (m = mbtab[*p]));
1426:
1427: if (p > q) {
1428: p -= m;
1429: }
1430: end = p;
1431: }
1432: } else {
1433: /* never reached */
1434: return NULL;
1435: }
1436:
1437: /* allocate memory and copy string */
1438: sz = end - start;
1439: if ((w = (unsigned char*)mbfl_calloc(sz + 8,
1440: sizeof(unsigned char))) == NULL) {
1441: return NULL;
1442: }
1443:
1444: memcpy(w, start, sz);
1445: w[sz] = '\0';
1446: w[sz + 1] = '\0';
1447: w[sz + 2] = '\0';
1448: w[sz + 3] = '\0';
1449:
1450: result->val = w;
1451: result->len = sz;
1452: } else {
1453: mbfl_convert_filter *encoder = NULL;
1454: mbfl_convert_filter *decoder = NULL;
1455: const unsigned char *p, *q, *r;
1456: struct {
1457: mbfl_convert_filter encoder;
1458: mbfl_convert_filter decoder;
1459: const unsigned char *p;
1460: int pos;
1461: } bk, _bk;
1462:
1463: /* output code filter */
1464: if (!(decoder = mbfl_convert_filter_new(
1465: mbfl_no_encoding_wchar,
1466: string->no_encoding,
1467: mbfl_memory_device_output, 0, &device))) {
1468: return NULL;
1469: }
1470:
1471: /* wchar filter */
1472: if (!(encoder = mbfl_convert_filter_new(
1473: string->no_encoding,
1474: mbfl_no_encoding_wchar,
1475: mbfl_filter_output_null,
1476: NULL, NULL))) {
1477: mbfl_convert_filter_delete(decoder);
1478: return NULL;
1479: }
1480:
1481: mbfl_memory_device_init(&device, length + 8, 0);
1482:
1483: p = string->val;
1484:
1485: /* search start position */
1486: for (q = string->val + from; p < q; p++) {
1487: (*encoder->filter_function)(*p, encoder);
1488: }
1489:
1490: /* switch the drain direction */
1491: encoder->output_function = (int(*)(int,void *))decoder->filter_function;
1492: encoder->flush_function = (int(*)(void *))decoder->filter_flush;
1493: encoder->data = decoder;
1494:
1495: q = string->val + string->len;
1496:
1497: /* save the encoder, decoder state and the pointer */
1498: mbfl_convert_filter_copy(decoder, &_bk.decoder);
1499: mbfl_convert_filter_copy(encoder, &_bk.encoder);
1500: _bk.p = p;
1501: _bk.pos = device.pos;
1502:
1503: if (length > q - p) {
1504: length = q - p;
1505: }
1506:
1507: if (length >= 20) {
1508: /* output a little shorter than "length" */
1509: /* XXX: the constant "20" was determined purely on the heuristics. */
1510: for (r = p + length - 20; p < r; p++) {
1511: (*encoder->filter_function)(*p, encoder);
1512: }
1513:
1514: /* if the offset of the resulting string exceeds the length,
1515: * then restore the state */
1516: if (device.pos > length) {
1517: p = _bk.p;
1518: device.pos = _bk.pos;
1519: decoder->filter_dtor(decoder);
1520: encoder->filter_dtor(encoder);
1521: mbfl_convert_filter_copy(&_bk.decoder, decoder);
1522: mbfl_convert_filter_copy(&_bk.encoder, encoder);
1523: bk = _bk;
1524: } else {
1525: /* save the encoder, decoder state and the pointer */
1526: mbfl_convert_filter_copy(decoder, &bk.decoder);
1527: mbfl_convert_filter_copy(encoder, &bk.encoder);
1528: bk.p = p;
1529: bk.pos = device.pos;
1530:
1531: /* flush the stream */
1532: (*encoder->filter_flush)(encoder);
1533:
1534: /* if the offset of the resulting string exceeds the length,
1535: * then restore the state */
1536: if (device.pos > length) {
1537: bk.decoder.filter_dtor(&bk.decoder);
1538: bk.encoder.filter_dtor(&bk.encoder);
1539:
1540: p = _bk.p;
1541: device.pos = _bk.pos;
1542: decoder->filter_dtor(decoder);
1543: encoder->filter_dtor(encoder);
1544: mbfl_convert_filter_copy(&_bk.decoder, decoder);
1545: mbfl_convert_filter_copy(&_bk.encoder, encoder);
1546: bk = _bk;
1547: } else {
1548: _bk.decoder.filter_dtor(&_bk.decoder);
1549: _bk.encoder.filter_dtor(&_bk.encoder);
1550:
1551: p = bk.p;
1552: device.pos = bk.pos;
1553: decoder->filter_dtor(decoder);
1554: encoder->filter_dtor(encoder);
1555: mbfl_convert_filter_copy(&bk.decoder, decoder);
1556: mbfl_convert_filter_copy(&bk.encoder, encoder);
1557: }
1558: }
1559: } else {
1560: bk = _bk;
1561: }
1562:
1563: /* detect end position */
1564: while (p < q) {
1565: (*encoder->filter_function)(*p, encoder);
1566:
1567: if (device.pos > length) {
1568: /* restore filter */
1569: p = bk.p;
1570: device.pos = bk.pos;
1571: decoder->filter_dtor(decoder);
1572: encoder->filter_dtor(encoder);
1573: mbfl_convert_filter_copy(&bk.decoder, decoder);
1574: mbfl_convert_filter_copy(&bk.encoder, encoder);
1575: break;
1576: }
1577:
1578: p++;
1579:
1580: /* backup current state */
1581: mbfl_convert_filter_copy(decoder, &_bk.decoder);
1582: mbfl_convert_filter_copy(encoder, &_bk.encoder);
1583: _bk.pos = device.pos;
1584: _bk.p = p;
1585:
1586: (*encoder->filter_flush)(encoder);
1587:
1588: if (device.pos > length) {
1589: _bk.decoder.filter_dtor(&_bk.decoder);
1590: _bk.encoder.filter_dtor(&_bk.encoder);
1591:
1592: /* restore filter */
1593: p = bk.p;
1594: device.pos = bk.pos;
1595: decoder->filter_dtor(decoder);
1596: encoder->filter_dtor(encoder);
1597: mbfl_convert_filter_copy(&bk.decoder, decoder);
1598: mbfl_convert_filter_copy(&bk.encoder, encoder);
1599: break;
1600: }
1601:
1602: bk.decoder.filter_dtor(&bk.decoder);
1603: bk.encoder.filter_dtor(&bk.encoder);
1604:
1605: p = _bk.p;
1606: device.pos = _bk.pos;
1607: decoder->filter_dtor(decoder);
1608: encoder->filter_dtor(encoder);
1609: mbfl_convert_filter_copy(&_bk.decoder, decoder);
1610: mbfl_convert_filter_copy(&_bk.encoder, encoder);
1611:
1612: bk = _bk;
1613: }
1614:
1615: (*encoder->filter_flush)(encoder);
1616:
1617: bk.decoder.filter_dtor(&bk.decoder);
1618: bk.encoder.filter_dtor(&bk.encoder);
1619:
1620: result = mbfl_memory_device_result(&device, result);
1621:
1622: mbfl_convert_filter_delete(encoder);
1623: mbfl_convert_filter_delete(decoder);
1624: }
1625:
1626: return result;
1627: }
1628:
1629:
1630: /*
1631: * strwidth
1632: */
1633: static int is_fullwidth(int c)
1634: {
1635: int i;
1636:
1637: if (c < mbfl_eaw_table[0].begin) {
1638: return 0;
1639: }
1640:
1641: for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1642: if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1643: return 1;
1644: }
1645: }
1646:
1647: return 0;
1648: }
1649:
1650: static int
1651: filter_count_width(int c, void* data)
1652: {
1653: (*(int *)data) += (is_fullwidth(c) ? 2: 1);
1654: return c;
1655: }
1656:
1657: int
1658: mbfl_strwidth(mbfl_string *string)
1659: {
1660: int len, n;
1661: unsigned char *p;
1662: mbfl_convert_filter *filter;
1663:
1664: len = 0;
1665: if (string->len > 0 && string->val != NULL) {
1666: /* wchar filter */
1667: filter = mbfl_convert_filter_new(
1668: string->no_encoding,
1669: mbfl_no_encoding_wchar,
1670: filter_count_width, 0, &len);
1671: if (filter == NULL) {
1672: mbfl_convert_filter_delete(filter);
1673: return -1;
1674: }
1675:
1676: /* feed data */
1677: p = string->val;
1678: n = string->len;
1679: while (n > 0) {
1680: (*filter->filter_function)(*p++, filter);
1681: n--;
1682: }
1683:
1684: mbfl_convert_filter_flush(filter);
1685: mbfl_convert_filter_delete(filter);
1686: }
1687:
1688: return len;
1689: }
1690:
1691:
1692: /*
1693: * strimwidth
1694: */
1695: struct collector_strimwidth_data {
1696: mbfl_convert_filter *decoder;
1697: mbfl_convert_filter *decoder_backup;
1698: mbfl_memory_device device;
1699: int from;
1700: int width;
1701: int outwidth;
1702: int outchar;
1703: int status;
1704: int endpos;
1705: };
1706:
1707: static int
1708: collector_strimwidth(int c, void* data)
1709: {
1710: struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1711:
1712: switch (pc->status) {
1713: case 10:
1714: (*pc->decoder->filter_function)(c, pc->decoder);
1715: break;
1716: default:
1717: if (pc->outchar >= pc->from) {
1718: pc->outwidth += (is_fullwidth(c) ? 2: 1);
1719:
1720: if (pc->outwidth > pc->width) {
1721: if (pc->status == 0) {
1722: pc->endpos = pc->device.pos;
1723: mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1724: }
1725: pc->status++;
1726: (*pc->decoder->filter_function)(c, pc->decoder);
1727: c = -1;
1728: } else {
1729: (*pc->decoder->filter_function)(c, pc->decoder);
1730: }
1731: }
1732: pc->outchar++;
1733: break;
1734: }
1735:
1736: return c;
1737: }
1738:
1739: mbfl_string *
1740: mbfl_strimwidth(
1741: mbfl_string *string,
1742: mbfl_string *marker,
1743: mbfl_string *result,
1744: int from,
1745: int width)
1746: {
1747: struct collector_strimwidth_data pc;
1748: mbfl_convert_filter *encoder;
1749: int n, mkwidth;
1750: unsigned char *p;
1751:
1752: if (string == NULL || result == NULL) {
1753: return NULL;
1754: }
1755: mbfl_string_init(result);
1756: result->no_language = string->no_language;
1757: result->no_encoding = string->no_encoding;
1758: mbfl_memory_device_init(&pc.device, width, 0);
1759:
1760: /* output code filter */
1761: pc.decoder = mbfl_convert_filter_new(
1762: mbfl_no_encoding_wchar,
1763: string->no_encoding,
1764: mbfl_memory_device_output, 0, &pc.device);
1765: pc.decoder_backup = mbfl_convert_filter_new(
1766: mbfl_no_encoding_wchar,
1767: string->no_encoding,
1768: mbfl_memory_device_output, 0, &pc.device);
1769: /* wchar filter */
1770: encoder = mbfl_convert_filter_new(
1771: string->no_encoding,
1772: mbfl_no_encoding_wchar,
1773: collector_strimwidth, 0, &pc);
1774: if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1775: mbfl_convert_filter_delete(encoder);
1776: mbfl_convert_filter_delete(pc.decoder);
1777: mbfl_convert_filter_delete(pc.decoder_backup);
1778: return NULL;
1779: }
1780: mkwidth = 0;
1781: if (marker) {
1782: mkwidth = mbfl_strwidth(marker);
1783: }
1784: pc.from = from;
1785: pc.width = width - mkwidth;
1786: pc.outwidth = 0;
1787: pc.outchar = 0;
1788: pc.status = 0;
1789: pc.endpos = 0;
1790:
1791: /* feed data */
1792: p = string->val;
1793: n = string->len;
1794: if (p != NULL) {
1795: while (n > 0) {
1796: n--;
1797: if ((*encoder->filter_function)(*p++, encoder) < 0) {
1798: break;
1799: }
1800: }
1801: mbfl_convert_filter_flush(encoder);
1802: if (pc.status != 0 && mkwidth > 0) {
1803: pc.width += mkwidth;
1804: while (n > 0) {
1805: if ((*encoder->filter_function)(*p++, encoder) < 0) {
1806: break;
1807: }
1808: n--;
1809: }
1810: mbfl_convert_filter_flush(encoder);
1811: if (pc.status != 1) {
1812: pc.status = 10;
1813: pc.device.pos = pc.endpos;
1814: mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1815: mbfl_convert_filter_reset(encoder, marker->no_encoding, mbfl_no_encoding_wchar);
1816: p = marker->val;
1817: n = marker->len;
1818: while (n > 0) {
1819: if ((*encoder->filter_function)(*p++, encoder) < 0) {
1820: break;
1821: }
1822: n--;
1823: }
1824: mbfl_convert_filter_flush(encoder);
1825: }
1826: } else if (pc.status != 0) {
1827: pc.device.pos = pc.endpos;
1828: mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1829: }
1830: mbfl_convert_filter_flush(pc.decoder);
1831: }
1832: result = mbfl_memory_device_result(&pc.device, result);
1833: mbfl_convert_filter_delete(encoder);
1834: mbfl_convert_filter_delete(pc.decoder);
1835: mbfl_convert_filter_delete(pc.decoder_backup);
1836:
1837: return result;
1838: }
1839:
1840: mbfl_string *
1841: mbfl_ja_jp_hantozen(
1842: mbfl_string *string,
1843: mbfl_string *result,
1844: int mode)
1845: {
1846: int n;
1847: unsigned char *p;
1848: const mbfl_encoding *encoding;
1849: mbfl_memory_device device;
1850: mbfl_convert_filter *decoder = NULL;
1851: mbfl_convert_filter *encoder = NULL;
1852: mbfl_convert_filter *tl_filter = NULL;
1853: mbfl_convert_filter *next_filter = NULL;
1854: mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1855:
1856: /* validate parameters */
1857: if (string == NULL || result == NULL) {
1858: return NULL;
1859: }
1860:
1861: encoding = mbfl_no2encoding(string->no_encoding);
1862: if (encoding == NULL) {
1863: return NULL;
1864: }
1865:
1866: mbfl_memory_device_init(&device, string->len, 0);
1867: mbfl_string_init(result);
1868:
1869: result->no_language = string->no_language;
1870: result->no_encoding = string->no_encoding;
1871:
1872: decoder = mbfl_convert_filter_new(
1873: mbfl_no_encoding_wchar,
1874: string->no_encoding,
1875: mbfl_memory_device_output, 0, &device);
1876: if (decoder == NULL) {
1877: goto out;
1878: }
1879: next_filter = decoder;
1880:
1881: param =
1882: (mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
1883: if (param == NULL) {
1884: goto out;
1885: }
1886:
1887: param->mode = mode;
1888:
1889: tl_filter = mbfl_convert_filter_new2(
1890: &vtbl_tl_jisx0201_jisx0208,
1891: (int(*)(int, void*))next_filter->filter_function,
1892: (int(*)(void*))next_filter->filter_flush,
1893: next_filter);
1894: if (tl_filter == NULL) {
1895: mbfl_free(param);
1896: goto out;
1897: }
1898:
1899: tl_filter->opaque = param;
1900: next_filter = tl_filter;
1901:
1902: encoder = mbfl_convert_filter_new(
1903: string->no_encoding,
1904: mbfl_no_encoding_wchar,
1905: (int(*)(int, void*))next_filter->filter_function,
1906: (int(*)(void*))next_filter->filter_flush,
1907: next_filter);
1908: if (encoder == NULL) {
1909: goto out;
1910: }
1911:
1912: /* feed data */
1913: p = string->val;
1914: n = string->len;
1915: if (p != NULL) {
1916: while (n > 0) {
1917: if ((*encoder->filter_function)(*p++, encoder) < 0) {
1918: break;
1919: }
1920: n--;
1921: }
1922: }
1923:
1924: mbfl_convert_filter_flush(encoder);
1925: result = mbfl_memory_device_result(&device, result);
1926: out:
1927: if (tl_filter != NULL) {
1928: if (tl_filter->opaque != NULL) {
1929: mbfl_free(tl_filter->opaque);
1930: }
1931: mbfl_convert_filter_delete(tl_filter);
1932: }
1933:
1934: if (decoder != NULL) {
1935: mbfl_convert_filter_delete(decoder);
1936: }
1937:
1938: if (encoder != NULL) {
1939: mbfl_convert_filter_delete(encoder);
1940: }
1941:
1942: return result;
1943: }
1944:
1945:
1946: /*
1947: * MIME header encode
1948: */
1949: struct mime_header_encoder_data {
1950: mbfl_convert_filter *conv1_filter;
1951: mbfl_convert_filter *block_filter;
1952: mbfl_convert_filter *conv2_filter;
1953: mbfl_convert_filter *conv2_filter_backup;
1954: mbfl_convert_filter *encod_filter;
1955: mbfl_convert_filter *encod_filter_backup;
1956: mbfl_memory_device outdev;
1957: mbfl_memory_device tmpdev;
1958: int status1;
1959: int status2;
1960: int prevpos;
1961: int linehead;
1962: int firstindent;
1963: int encnamelen;
1964: int lwsplen;
1965: char encname[128];
1966: char lwsp[16];
1967: };
1968:
1969: static int
1970: mime_header_encoder_block_collector(int c, void *data)
1971: {
1972: int n;
1973: struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1974:
1975: switch (pe->status2) {
1976: case 1: /* encoded word */
1977: pe->prevpos = pe->outdev.pos;
1978: mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
1979: mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
1980: (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1981: (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1982: (*pe->encod_filter->filter_flush)(pe->encod_filter);
1983: n = pe->outdev.pos - pe->linehead + pe->firstindent;
1984: pe->outdev.pos = pe->prevpos;
1985: mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
1986: mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
1987: if (n >= 74) {
1988: (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1989: (*pe->encod_filter->filter_flush)(pe->encod_filter);
1990: mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
1991: mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
1992: pe->linehead = pe->outdev.pos;
1993: pe->firstindent = 0;
1994: mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1995: c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1996: } else {
1997: c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1998: }
1999: break;
2000:
2001: default:
2002: mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
2003: c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2004: pe->status2 = 1;
2005: break;
2006: }
2007:
2008: return c;
2009: }
2010:
2011: static int
2012: mime_header_encoder_collector(int c, void *data)
2013: {
2014: static int qp_table[256] = {
2015: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2016: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2017: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
2018: 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
2019: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
2020: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
2021: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
2022: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
2023: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
2024: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
2025: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
2026: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
2027: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
2028: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
2029: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
2030: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* 0xF0 */
2031: };
2032:
2033: int n;
2034: struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
2035:
2036: switch (pe->status1) {
2037: case 11: /* encoded word */
2038: (*pe->block_filter->filter_function)(c, pe->block_filter);
2039: break;
2040:
2041: default: /* ASCII */
2042: if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
2043: mbfl_memory_device_output(c, &pe->tmpdev);
2044: pe->status1 = 1;
2045: } else if (pe->status1 == 0 && c == 0x20) { /* repeat SPACE */
2046: mbfl_memory_device_output(c, &pe->tmpdev);
2047: } else {
2048: if (pe->tmpdev.pos < 74 && c == 0x20) {
2049: n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
2050: if (n > 74) {
2051: mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
2052: pe->linehead = pe->outdev.pos;
2053: pe->firstindent = 0;
2054: } else if (pe->outdev.pos > 0) {
2055: mbfl_memory_device_output(0x20, &pe->outdev);
2056: }
2057: mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2058: mbfl_memory_device_reset(&pe->tmpdev);
2059: pe->status1 = 0;
2060: } else {
2061: n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
2062: if (n > 60) {
2063: mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
2064: pe->linehead = pe->outdev.pos;
2065: pe->firstindent = 0;
2066: } else if (pe->outdev.pos > 0) {
2067: mbfl_memory_device_output(0x20, &pe->outdev);
2068: }
2069: mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
2070: mbfl_memory_device_reset(&pe->tmpdev);
2071: (*pe->block_filter->filter_function)(c, pe->block_filter);
2072: pe->status1 = 11;
2073: }
2074: }
2075: break;
2076: }
2077:
2078: return c;
2079: }
2080:
2081: mbfl_string *
2082: mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
2083: {
2084: if (pe->status1 >= 10) {
2085: (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2086: (*pe->encod_filter->filter_flush)(pe->encod_filter);
2087: mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
2088: } else if (pe->tmpdev.pos > 0) {
2089: if (pe->outdev.pos > 0) {
2090: if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
2091: mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2092: } else {
2093: mbfl_memory_device_output(0x20, &pe->outdev);
2094: }
2095: }
2096: mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2097: }
2098: mbfl_memory_device_reset(&pe->tmpdev);
2099: pe->prevpos = 0;
2100: pe->linehead = 0;
2101: pe->status1 = 0;
2102: pe->status2 = 0;
2103:
2104: return mbfl_memory_device_result(&pe->outdev, result);
2105: }
2106:
2107: struct mime_header_encoder_data*
2108: mime_header_encoder_new(
2109: enum mbfl_no_encoding incode,
2110: enum mbfl_no_encoding outcode,
2111: enum mbfl_no_encoding transenc)
2112: {
2113: int n;
2114: const char *s;
2115: const mbfl_encoding *outencoding;
2116: struct mime_header_encoder_data *pe;
2117:
2118: /* get output encoding and check MIME charset name */
2119: outencoding = mbfl_no2encoding(outcode);
2120: if (outencoding == NULL || outencoding->mime_name == NULL || outencoding->mime_name[0] == '\0') {
2121: return NULL;
2122: }
2123:
2124: pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
2125: if (pe == NULL) {
2126: return NULL;
2127: }
2128:
2129: mbfl_memory_device_init(&pe->outdev, 0, 0);
2130: mbfl_memory_device_init(&pe->tmpdev, 0, 0);
2131: pe->prevpos = 0;
2132: pe->linehead = 0;
2133: pe->firstindent = 0;
2134: pe->status1 = 0;
2135: pe->status2 = 0;
2136:
2137: /* make the encoding description string exp. "=?ISO-2022-JP?B?" */
2138: n = 0;
2139: pe->encname[n++] = 0x3d;
2140: pe->encname[n++] = 0x3f;
2141: s = outencoding->mime_name;
2142: while (*s) {
2143: pe->encname[n++] = *s++;
2144: }
2145: pe->encname[n++] = 0x3f;
2146: if (transenc == mbfl_no_encoding_qprint) {
2147: pe->encname[n++] = 0x51;
2148: } else {
2149: pe->encname[n++] = 0x42;
2150: transenc = mbfl_no_encoding_base64;
2151: }
2152: pe->encname[n++] = 0x3f;
2153: pe->encname[n] = '\0';
2154: pe->encnamelen = n;
2155:
2156: n = 0;
2157: pe->lwsp[n++] = 0x0d;
2158: pe->lwsp[n++] = 0x0a;
2159: pe->lwsp[n++] = 0x20;
2160: pe->lwsp[n] = '\0';
2161: pe->lwsplen = n;
2162:
2163: /* transfer encode filter */
2164: pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2165: pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2166:
2167: /* Output code filter */
2168: pe->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2169: pe->conv2_filter_backup = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2170:
2171: /* encoded block filter */
2172: pe->block_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, mbfl_no_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
2173:
2174: /* Input code filter */
2175: pe->conv1_filter = mbfl_convert_filter_new(incode, mbfl_no_encoding_wchar, mime_header_encoder_collector, 0, pe);
2176:
2177: if (pe->encod_filter == NULL ||
2178: pe->encod_filter_backup == NULL ||
2179: pe->conv2_filter == NULL ||
2180: pe->conv2_filter_backup == NULL ||
2181: pe->conv1_filter == NULL) {
2182: mime_header_encoder_delete(pe);
2183: return NULL;
2184: }
2185:
2186: if (transenc == mbfl_no_encoding_qprint) {
2187: pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
2188: pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
2189: } else {
2190: pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
2191: pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
2192: }
2193:
2194: return pe;
2195: }
2196:
2197: void
2198: mime_header_encoder_delete(struct mime_header_encoder_data *pe)
2199: {
2200: if (pe) {
2201: mbfl_convert_filter_delete(pe->conv1_filter);
2202: mbfl_convert_filter_delete(pe->block_filter);
2203: mbfl_convert_filter_delete(pe->conv2_filter);
2204: mbfl_convert_filter_delete(pe->conv2_filter_backup);
2205: mbfl_convert_filter_delete(pe->encod_filter);
2206: mbfl_convert_filter_delete(pe->encod_filter_backup);
2207: mbfl_memory_device_clear(&pe->outdev);
2208: mbfl_memory_device_clear(&pe->tmpdev);
2209: mbfl_free((void*)pe);
2210: }
2211: }
2212:
2213: int
2214: mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
2215: {
2216: return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
2217: }
2218:
2219: mbfl_string *
2220: mbfl_mime_header_encode(
2221: mbfl_string *string,
2222: mbfl_string *result,
2223: enum mbfl_no_encoding outcode,
2224: enum mbfl_no_encoding encoding,
2225: const char *linefeed,
2226: int indent)
2227: {
2228: int n;
2229: unsigned char *p;
2230: struct mime_header_encoder_data *pe;
2231:
2232: mbfl_string_init(result);
2233: result->no_language = string->no_language;
2234: result->no_encoding = mbfl_no_encoding_ascii;
2235:
2236: pe = mime_header_encoder_new(string->no_encoding, outcode, encoding);
2237: if (pe == NULL) {
2238: return NULL;
2239: }
2240:
2241: if (linefeed != NULL) {
2242: n = 0;
2243: while (*linefeed && n < 8) {
2244: pe->lwsp[n++] = *linefeed++;
2245: }
2246: pe->lwsp[n++] = 0x20;
2247: pe->lwsp[n] = '\0';
2248: pe->lwsplen = n;
2249: }
2250: if (indent > 0 && indent < 74) {
2251: pe->firstindent = indent;
2252: }
2253:
2254: n = string->len;
2255: p = string->val;
2256: while (n > 0) {
2257: (*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
2258: n--;
2259: }
2260:
2261: result = mime_header_encoder_result(pe, result);
2262: mime_header_encoder_delete(pe);
2263:
2264: return result;
2265: }
2266:
2267:
2268: /*
2269: * MIME header decode
2270: */
2271: struct mime_header_decoder_data {
2272: mbfl_convert_filter *deco_filter;
2273: mbfl_convert_filter *conv1_filter;
2274: mbfl_convert_filter *conv2_filter;
2275: mbfl_memory_device outdev;
2276: mbfl_memory_device tmpdev;
2277: int cspos;
2278: int status;
2279: enum mbfl_no_encoding encoding;
2280: enum mbfl_no_encoding incode;
2281: enum mbfl_no_encoding outcode;
2282: };
2283:
2284: static int
2285: mime_header_decoder_collector(int c, void* data)
2286: {
2287: const mbfl_encoding *encoding;
2288: struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
2289:
2290: switch (pd->status) {
2291: case 1:
2292: if (c == 0x3f) { /* ? */
2293: mbfl_memory_device_output(c, &pd->tmpdev);
2294: pd->cspos = pd->tmpdev.pos;
2295: pd->status = 2;
2296: } else {
2297: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2298: mbfl_memory_device_reset(&pd->tmpdev);
2299: if (c == 0x3d) { /* = */
2300: mbfl_memory_device_output(c, &pd->tmpdev);
2301: } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2302: pd->status = 9;
2303: } else {
2304: (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2305: pd->status = 0;
2306: }
2307: }
2308: break;
2309: case 2: /* store charset string */
2310: if (c == 0x3f) { /* ? */
2311: /* identify charset */
2312: mbfl_memory_device_output('\0', &pd->tmpdev);
2313: encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2314: if (encoding != NULL) {
2315: pd->incode = encoding->no_encoding;
2316: pd->status = 3;
2317: }
2318: mbfl_memory_device_unput(&pd->tmpdev);
2319: mbfl_memory_device_output(c, &pd->tmpdev);
2320: } else {
2321: mbfl_memory_device_output(c, &pd->tmpdev);
2322: if (pd->tmpdev.pos > 100) { /* too long charset string */
2323: pd->status = 0;
2324: } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2325: mbfl_memory_device_unput(&pd->tmpdev);
2326: pd->status = 9;
2327: }
2328: if (pd->status != 2) {
2329: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2330: mbfl_memory_device_reset(&pd->tmpdev);
2331: }
2332: }
2333: break;
2334: case 3: /* identify encoding */
2335: mbfl_memory_device_output(c, &pd->tmpdev);
2336: if (c == 0x42 || c == 0x62) { /* 'B' or 'b' */
2337: pd->encoding = mbfl_no_encoding_base64;
2338: pd->status = 4;
2339: } else if (c == 0x51 || c == 0x71) { /* 'Q' or 'q' */
2340: pd->encoding = mbfl_no_encoding_qprint;
2341: pd->status = 4;
2342: } else {
2343: if (c == 0x0d || c == 0x0a) { /* CR or LF */
2344: mbfl_memory_device_unput(&pd->tmpdev);
2345: pd->status = 9;
2346: } else {
2347: pd->status = 0;
2348: }
2349: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2350: mbfl_memory_device_reset(&pd->tmpdev);
2351: }
2352: break;
2353: case 4: /* reset filter */
2354: mbfl_memory_device_output(c, &pd->tmpdev);
2355: if (c == 0x3f) { /* ? */
2356: /* charset convert filter */
2357: mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, mbfl_no_encoding_wchar);
2358: /* decode filter */
2359: mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, mbfl_no_encoding_8bit);
2360: pd->status = 5;
2361: } else {
2362: if (c == 0x0d || c == 0x0a) { /* CR or LF */
2363: mbfl_memory_device_unput(&pd->tmpdev);
2364: pd->status = 9;
2365: } else {
2366: pd->status = 0;
2367: }
2368: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2369: }
2370: mbfl_memory_device_reset(&pd->tmpdev);
2371: break;
2372: case 5: /* encoded block */
2373: if (c == 0x3f) { /* ? */
2374: pd->status = 6;
2375: } else {
2376: (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2377: }
2378: break;
2379: case 6: /* check end position */
2380: if (c == 0x3d) { /* = */
2381: /* flush and reset filter */
2382: (*pd->deco_filter->filter_flush)(pd->deco_filter);
2383: (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2384: mbfl_convert_filter_reset(pd->conv1_filter, mbfl_no_encoding_ascii, mbfl_no_encoding_wchar);
2385: pd->status = 7;
2386: } else {
2387: (*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2388: if (c != 0x3f) { /* ? */
2389: (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2390: pd->status = 5;
2391: }
2392: }
2393: break;
2394: case 7: /* after encoded block */
2395: if (c == 0x0d || c == 0x0a) { /* CR LF */
2396: pd->status = 8;
2397: } else {
2398: mbfl_memory_device_output(c, &pd->tmpdev);
2399: if (c == 0x3d) { /* = */
2400: pd->status = 1;
2401: } else if (c != 0x20 && c != 0x09) { /* not space */
2402: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2403: mbfl_memory_device_reset(&pd->tmpdev);
2404: pd->status = 0;
2405: }
2406: }
2407: break;
2408: case 8: /* folding */
2409: case 9: /* folding */
2410: if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2411: if (c == 0x3d) { /* = */
2412: if (pd->status == 8) {
2413: mbfl_memory_device_output(0x20, &pd->tmpdev); /* SPACE */
2414: } else {
2415: (*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2416: }
2417: mbfl_memory_device_output(c, &pd->tmpdev);
2418: pd->status = 1;
2419: } else {
2420: mbfl_memory_device_output(0x20, &pd->tmpdev);
2421: mbfl_memory_device_output(c, &pd->tmpdev);
2422: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2423: mbfl_memory_device_reset(&pd->tmpdev);
2424: pd->status = 0;
2425: }
2426: }
2427: break;
2428: default: /* non encoded block */
2429: if (c == 0x0d || c == 0x0a) { /* CR LF */
2430: pd->status = 9;
2431: } else if (c == 0x3d) { /* = */
2432: mbfl_memory_device_output(c, &pd->tmpdev);
2433: pd->status = 1;
2434: } else {
2435: (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2436: }
2437: break;
2438: }
2439:
2440: return c;
2441: }
2442:
2443: mbfl_string *
2444: mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2445: {
2446: switch (pd->status) {
2447: case 1:
2448: case 2:
2449: case 3:
2450: case 4:
2451: case 7:
2452: case 8:
2453: case 9:
2454: mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2455: break;
2456: case 5:
2457: case 6:
2458: (*pd->deco_filter->filter_flush)(pd->deco_filter);
2459: (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2460: break;
2461: }
2462: (*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2463: mbfl_memory_device_reset(&pd->tmpdev);
2464: pd->status = 0;
2465:
2466: return mbfl_memory_device_result(&pd->outdev, result);
2467: }
2468:
2469: struct mime_header_decoder_data*
2470: mime_header_decoder_new(enum mbfl_no_encoding outcode)
2471: {
2472: struct mime_header_decoder_data *pd;
2473:
2474: pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
2475: if (pd == NULL) {
2476: return NULL;
2477: }
2478:
2479: mbfl_memory_device_init(&pd->outdev, 0, 0);
2480: mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2481: pd->cspos = 0;
2482: pd->status = 0;
2483: pd->encoding = mbfl_no_encoding_pass;
2484: pd->incode = mbfl_no_encoding_ascii;
2485: pd->outcode = outcode;
2486: /* charset convert filter */
2487: pd->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2488: pd->conv1_filter = mbfl_convert_filter_new(pd->incode, mbfl_no_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2489: /* decode filter */
2490: pd->deco_filter = mbfl_convert_filter_new(pd->encoding, mbfl_no_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2491:
2492: if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2493: mime_header_decoder_delete(pd);
2494: return NULL;
2495: }
2496:
2497: return pd;
2498: }
2499:
2500: void
2501: mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2502: {
2503: if (pd) {
2504: mbfl_convert_filter_delete(pd->conv2_filter);
2505: mbfl_convert_filter_delete(pd->conv1_filter);
2506: mbfl_convert_filter_delete(pd->deco_filter);
2507: mbfl_memory_device_clear(&pd->outdev);
2508: mbfl_memory_device_clear(&pd->tmpdev);
2509: mbfl_free((void*)pd);
2510: }
2511: }
2512:
2513: int
2514: mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
2515: {
2516: return mime_header_decoder_collector(c, pd);
2517: }
2518:
2519: mbfl_string *
2520: mbfl_mime_header_decode(
2521: mbfl_string *string,
2522: mbfl_string *result,
2523: enum mbfl_no_encoding outcode)
2524: {
2525: int n;
2526: unsigned char *p;
2527: struct mime_header_decoder_data *pd;
2528:
2529: mbfl_string_init(result);
2530: result->no_language = string->no_language;
2531: result->no_encoding = outcode;
2532:
2533: pd = mime_header_decoder_new(outcode);
2534: if (pd == NULL) {
2535: return NULL;
2536: }
2537:
2538: /* feed data */
2539: n = string->len;
2540: p = string->val;
2541: while (n > 0) {
2542: mime_header_decoder_collector(*p++, pd);
2543: n--;
2544: }
2545:
2546: result = mime_header_decoder_result(pd, result);
2547: mime_header_decoder_delete(pd);
2548:
2549: return result;
2550: }
2551:
2552:
2553:
2554: /*
2555: * convert HTML numeric entity
2556: */
2557: struct collector_htmlnumericentity_data {
2558: mbfl_convert_filter *decoder;
2559: int status;
2560: int cache;
2561: int digit;
2562: int *convmap;
2563: int mapsize;
2564: };
2565:
2566: static int
2567: collector_encode_htmlnumericentity(int c, void *data)
2568: {
2569: struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2570: int f, n, s, r, d, size, *mapelm;
2571:
2572: size = pc->mapsize;
2573: f = 0;
2574: n = 0;
2575: while (n < size) {
2576: mapelm = &(pc->convmap[n*4]);
2577: if (c >= mapelm[0] && c <= mapelm[1]) {
2578: s = (c + mapelm[2]) & mapelm[3];
2579: if (s >= 0) {
2580: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2581: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2582: r = 100000000;
2583: s %= r;
2584: while (r > 0) {
2585: d = s/r;
2586: if (d || f) {
2587: f = 1;
2588: s %= r;
2589: (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2590: }
2591: r /= 10;
2592: }
2593: if (!f) {
2594: f = 1;
2595: (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2596: }
2597: (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
2598: }
2599: }
2600: if (f) {
2601: break;
2602: }
2603: n++;
2604: }
2605: if (!f) {
2606: (*pc->decoder->filter_function)(c, pc->decoder);
2607: }
2608:
2609: return c;
2610: }
2611:
2612: static int
2613: collector_decode_htmlnumericentity(int c, void *data)
2614: {
2615: struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2616: int f, n, s, r, d, size, *mapelm;
2617:
2618: switch (pc->status) {
2619: case 1:
2620: if (c == 0x23) { /* '#' */
2621: pc->status = 2;
2622: } else {
2623: pc->status = 0;
2624: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2625: (*pc->decoder->filter_function)(c, pc->decoder);
2626: }
2627: break;
2628: case 2:
2629: if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2630: pc->cache = c - 0x30;
2631: pc->status = 3;
2632: pc->digit = 1;
2633: } else {
2634: pc->status = 0;
2635: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2636: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2637: (*pc->decoder->filter_function)(c, pc->decoder);
2638: }
2639: break;
2640: case 3:
2641: s = 0;
2642: f = 0;
2643: if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2644: if (pc->digit > 9) {
2645: pc->status = 0;
2646: s = pc->cache;
2647: f = 1;
2648: } else {
2649: s = pc->cache*10 + c - 0x30;
2650: pc->cache = s;
2651: pc->digit++;
2652: }
2653: } else {
2654: pc->status = 0;
2655: s = pc->cache;
2656: f = 1;
2657: n = 0;
2658: size = pc->mapsize;
2659: while (n < size) {
2660: mapelm = &(pc->convmap[n*4]);
2661: d = s - mapelm[2];
2662: if (d >= mapelm[0] && d <= mapelm[1]) {
2663: f = 0;
2664: (*pc->decoder->filter_function)(d, pc->decoder);
2665: if (c != 0x3b) { /* ';' */
2666: (*pc->decoder->filter_function)(c, pc->decoder);
2667: }
2668: break;
2669: }
2670: n++;
2671: }
2672: }
2673: if (f) {
2674: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2675: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2676: r = 1;
2677: n = pc->digit;
2678: while (n > 0) {
2679: r *= 10;
2680: n--;
2681: }
2682: s %= r;
2683: r /= 10;
2684: while (r > 0) {
2685: d = s/r;
2686: s %= r;
2687: r /= 10;
2688: (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2689: }
2690: (*pc->decoder->filter_function)(c, pc->decoder);
2691: }
2692: break;
2693: default:
2694: if (c == 0x26) { /* '&' */
2695: pc->status = 1;
2696: } else {
2697: (*pc->decoder->filter_function)(c, pc->decoder);
2698: }
2699: break;
2700: }
2701:
2702: return c;
2703: }
2704:
2705: int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2706: {
2707: struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2708: int n, s, r, d;
2709:
2710: if (pc->status) {
2711: switch (pc->status) {
2712: case 1: /* '&' */
2713: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2714: break;
2715: case 2: /* '#' */
2716: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2717: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2718: break;
2719: case 3: /* '0'-'9' */
2720: (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2721: (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2722:
2723: s = pc->cache;
2724: r = 1;
2725: n = pc->digit;
2726: while (n > 0) {
2727: r *= 10;
2728: n--;
2729: }
2730: s %= r;
2731: r /= 10;
2732: while (r > 0) {
2733: d = s/r;
2734: s %= r;
2735: r /= 10;
2736: (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2737: }
2738:
2739: break;
2740: default:
2741: break;
2742: }
2743: }
2744:
2745: pc->status = 0;
2746: pc->cache = 0;
2747: pc->digit = 0;
2748:
2749: return 0;
2750: }
2751:
2752: mbfl_string *
2753: mbfl_html_numeric_entity(
2754: mbfl_string *string,
2755: mbfl_string *result,
2756: int *convmap,
2757: int mapsize,
2758: int type)
2759: {
2760: struct collector_htmlnumericentity_data pc;
2761: mbfl_memory_device device;
2762: mbfl_convert_filter *encoder;
2763: int n;
2764: unsigned char *p;
2765:
2766: if (string == NULL || result == NULL) {
2767: return NULL;
2768: }
2769: mbfl_string_init(result);
2770: result->no_language = string->no_language;
2771: result->no_encoding = string->no_encoding;
2772: mbfl_memory_device_init(&device, string->len, 0);
2773:
2774: /* output code filter */
2775: pc.decoder = mbfl_convert_filter_new(
2776: mbfl_no_encoding_wchar,
2777: string->no_encoding,
2778: mbfl_memory_device_output, 0, &device);
2779: /* wchar filter */
2780: if (type == 0) {
2781: encoder = mbfl_convert_filter_new(
2782: string->no_encoding,
2783: mbfl_no_encoding_wchar,
2784: collector_encode_htmlnumericentity, 0, &pc);
2785: } else {
2786: encoder = mbfl_convert_filter_new(
2787: string->no_encoding,
2788: mbfl_no_encoding_wchar,
2789: collector_decode_htmlnumericentity,
2790: (int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
2791: }
2792: if (pc.decoder == NULL || encoder == NULL) {
2793: mbfl_convert_filter_delete(encoder);
2794: mbfl_convert_filter_delete(pc.decoder);
2795: return NULL;
2796: }
2797: pc.status = 0;
2798: pc.cache = 0;
2799: pc.digit = 0;
2800: pc.convmap = convmap;
2801: pc.mapsize = mapsize;
2802:
2803: /* feed data */
2804: p = string->val;
2805: n = string->len;
2806: if (p != NULL) {
2807: while (n > 0) {
2808: if ((*encoder->filter_function)(*p++, encoder) < 0) {
2809: break;
2810: }
2811: n--;
2812: }
2813: }
2814: mbfl_convert_filter_flush(encoder);
2815: mbfl_convert_filter_flush(pc.decoder);
2816: result = mbfl_memory_device_result(&device, result);
2817: mbfl_convert_filter_delete(encoder);
2818: mbfl_convert_filter_delete(pc.decoder);
2819:
2820: return result;
2821: }
2822:
2823: /*
2824: * Local variables:
2825: * tab-width: 4
2826: * c-basic-offset: 4
2827: * End:
2828: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>