Annotation of embedaddon/php/ext/gd/libgd/gdkanji.c, revision 1.1.1.1
1.1 misho 1:
2: /* gdkanji.c (Kanji code converter) */
3: /* written by Masahito Yamaga (ma@yama-ga.com) */
4:
5: #include <stdio.h>
6: #include <stdlib.h>
7: #include <string.h>
8: #include "gd.h"
9: #include "gdhelpers.h"
10:
11: #include <stdarg.h>
12: #if defined(HAVE_ICONV_H) || defined(HAVE_ICONV)
13: #include <iconv.h>
14: #ifdef HAVE_ERRNO_H
15: #include <errno.h>
16: #endif
17: #endif
18:
19: #if defined(HAVE_ICONV_H) && !defined(HAVE_ICONV)
20: #define HAVE_ICONV 1
21: #endif
22:
23: #define LIBNAME "any2eucjp()"
24:
25: #if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
26: #ifndef SJISPRE
27: #define SJISPRE 1
28: #endif
29: #endif
30:
31: #ifdef TRUE
32: #undef TRUE
33: #endif
34: #ifdef FALSE
35: #undef FALSE
36: #endif
37:
38: #define TRUE 1
39: #define FALSE 0
40:
41: #define NEW 1
42: #define OLD 2
43: #define ESCI 3
44: #define NEC 4
45: #define EUC 5
46: #define SJIS 6
47: #define EUCORSJIS 7
48: #define ASCII 8
49:
50: #define NEWJISSTR "JIS7"
51: #define OLDJISSTR "jis"
52: #define EUCSTR "eucJP"
53: #define SJISSTR "SJIS"
54:
55: #define ESC 27
56: #define SS2 142
57:
58: static void
59: debug (const char *format,...)
60: {
61: #ifdef DEBUG
62: va_list args;
63:
64: va_start (args, format);
65: fprintf (stdout, "%s: ", LIBNAME);
66: vfprintf (stdout, format, args);
67: fprintf (stdout, "\n");
68: va_end (args);
69: #endif
70: }
71:
72: static void
73: error (const char *format,...)
74: {
75: va_list args;
76: char *tmp;
77: TSRMLS_FETCH();
78:
79: va_start(args, format);
80: vspprintf(&tmp, 0, format, args);
81: va_end(args);
82: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", LIBNAME, tmp);
83: efree(tmp);
84: }
85:
86: /* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
87:
88: static int
89: DetectKanjiCode (unsigned char *str)
90: {
91: static int whatcode = ASCII;
92: int oldcode = ASCII;
93: int c, i;
94: char *lang = NULL;
95:
96: c = '\1';
97: i = 0;
98:
99: if (whatcode != EUCORSJIS && whatcode != ASCII)
100: {
101: oldcode = whatcode;
102: whatcode = ASCII;
103: }
104:
105: while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0')
106: {
107: if ((c = str[i++]) != '\0')
108: {
109: if (c == ESC)
110: {
111: c = str[i++];
112: if (c == '$')
113: {
114: c = str[i++];
115: if (c == 'B')
116: whatcode = NEW;
117: else if (c == '@')
118: whatcode = OLD;
119: }
120: else if (c == '(')
121: {
122: c = str[i++];
123: if (c == 'I')
124: whatcode = ESCI;
125: }
126: else if (c == 'K')
127: whatcode = NEC;
128: }
129: else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
130: whatcode = SJIS;
131: else if (c == SS2)
132: {
133: c = str[i++];
134: if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160) || (c >= 224 && c <= 252))
135: whatcode = SJIS;
136: else if (c >= 161 && c <= 223)
137: whatcode = EUCORSJIS;
138: }
139: else if (c >= 161 && c <= 223)
140: {
141: c = str[i++];
142: if (c >= 240 && c <= 254)
143: whatcode = EUC;
144: else if (c >= 161 && c <= 223)
145: whatcode = EUCORSJIS;
146: else if (c >= 224 && c <= 239)
147: {
148: whatcode = EUCORSJIS;
149: while (c >= 64 && c != '\0' && whatcode == EUCORSJIS)
150: {
151: if (c >= 129)
152: {
153: if (c <= 141 || (c >= 143 && c <= 159))
154: whatcode = SJIS;
155: else if (c >= 253 && c <= 254)
156: whatcode = EUC;
157: }
158: c = str[i++];
159: }
160: }
161: else if (c <= 159)
162: whatcode = SJIS;
163: }
164: else if (c >= 240 && c <= 254)
165: whatcode = EUC;
166: else if (c >= 224 && c <= 239)
167: {
168: c = str[i++];
169: if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
170: whatcode = SJIS;
171: else if (c >= 253 && c <= 254)
172: whatcode = EUC;
173: else if (c >= 161 && c <= 252)
174: whatcode = EUCORSJIS;
175: }
176: }
177: }
178:
179: #ifdef DEBUG
180: if (whatcode == ASCII)
181: debug ("Kanji code not included.");
182: else if (whatcode == EUCORSJIS)
183: debug ("Kanji code not detected.");
184: else
185: debug ("Kanji code detected at %d byte.", i);
186: #endif
187:
188: if (whatcode == EUCORSJIS && oldcode != ASCII)
189: whatcode = oldcode;
190:
191: if (whatcode == EUCORSJIS)
192: {
193: if (getenv ("LC_ALL"))
194: lang = getenv ("LC_ALL");
195: else if (getenv ("LC_CTYPE"))
196: lang = getenv ("LC_CTYPE");
197: else if (getenv ("LANG"))
198: lang = getenv ("LANG");
199:
200: if (lang)
201: {
202: if (strcmp (lang, "ja_JP.SJIS") == 0 ||
203: #ifdef hpux
204: strcmp (lang, "japanese") == 0 ||
205: #endif
206: strcmp (lang, "ja_JP.mscode") == 0 ||
207: strcmp (lang, "ja_JP.PCK") == 0)
208: whatcode = SJIS;
209: else if (strncmp (lang, "ja", 2) == 0)
210: #ifdef SJISPRE
211: whatcode = SJIS;
212: #else
213: whatcode = EUC;
214: #endif
215: }
216: }
217:
218: if (whatcode == EUCORSJIS)
219: #ifdef SJISPRE
220: whatcode = SJIS;
221: #else
222: whatcode = EUC;
223: #endif
224:
225: return whatcode;
226: }
227:
228: /* SJIStoJIS() is sjis2jis() by Ken Lunde. */
229:
230: static void
231: SJIStoJIS (int *p1, int *p2)
232: {
233: register unsigned char c1 = *p1;
234: register unsigned char c2 = *p2;
235: register int adjust = c2 < 159;
236: register int rowOffset = c1 < 160 ? 112 : 176;
237: register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
238:
239: *p1 = ((c1 - rowOffset) << 1) - adjust;
240: *p2 -= cellOffset;
241: }
242:
243: /* han2zen() was derived from han2zen() written by Ken Lunde. */
244:
245: #define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
246: #define IS_HANDAKU(c) (c >= 202 && c <= 206)
247:
248: static void
249: han2zen (int *p1, int *p2)
250: {
251: int c = *p1;
252: int daku = FALSE;
253: int handaku = FALSE;
254: int mtable[][2] =
255: {
256: {129, 66},
257: {129, 117},
258: {129, 118},
259: {129, 65},
260: {129, 69},
261: {131, 146},
262: {131, 64},
263: {131, 66},
264: {131, 68},
265: {131, 70},
266: {131, 72},
267: {131, 131},
268: {131, 133},
269: {131, 135},
270: {131, 98},
271: {129, 91},
272: {131, 65},
273: {131, 67},
274: {131, 69},
275: {131, 71},
276: {131, 73},
277: {131, 74},
278: {131, 76},
279: {131, 78},
280: {131, 80},
281: {131, 82},
282: {131, 84},
283: {131, 86},
284: {131, 88},
285: {131, 90},
286: {131, 92},
287: {131, 94},
288: {131, 96},
289: {131, 99},
290: {131, 101},
291: {131, 103},
292: {131, 105},
293: {131, 106},
294: {131, 107},
295: {131, 108},
296: {131, 109},
297: {131, 110},
298: {131, 113},
299: {131, 116},
300: {131, 119},
301: {131, 122},
302: {131, 125},
303: {131, 126},
304: {131, 128},
305: {131, 129},
306: {131, 130},
307: {131, 132},
308: {131, 134},
309: {131, 136},
310: {131, 137},
311: {131, 138},
312: {131, 139},
313: {131, 140},
314: {131, 141},
315: {131, 143},
316: {131, 147},
317: {129, 74},
318: {129, 75}
319: };
320:
321: if (*p2 == 222 && IS_DAKU (*p1))
322: daku = TRUE; /* Daku-ten */
323: else if (*p2 == 223 && IS_HANDAKU (*p1))
324: handaku = TRUE; /* Han-daku-ten */
325:
326: *p1 = mtable[c - 161][0];
327: *p2 = mtable[c - 161][1];
328:
329: if (daku)
330: {
331: if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
332: (*p2)++;
333: else if (*p2 == 131 && *p2 == 69)
334: *p2 = 148;
335: }
336: else if (handaku && *p2 >= 110 && *p2 <= 122)
337: (*p2) += 2;
338: }
339:
340: /* Recast strcpy to handle unsigned chars used below. */
341: #define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
342:
343: static void
344: do_convert (unsigned char *to, unsigned char *from, const char *code)
345: {
346: #ifdef HAVE_ICONV
347: iconv_t cd;
348: size_t from_len, to_len;
349:
350: if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1)
351: {
352: error ("iconv_open() error");
353: #ifdef HAVE_ERRNO_H
354: if (errno == EINVAL)
355: error ("invalid code specification: \"%s\" or \"%s\"",
356: EUCSTR, code);
357: #endif
358: strcpy ((char *) to, (const char *) from);
359: return;
360: }
361:
362: from_len = strlen ((const char *) from) + 1;
363: to_len = BUFSIZ;
364:
365: if ((int) iconv(cd, (char **) &from, &from_len, (char **) &to, &to_len) == -1)
366: {
367: #ifdef HAVE_ERRNO_H
368: if (errno == EINVAL)
369: error ("invalid end of input string");
370: else if (errno == EILSEQ)
371: error ("invalid code in input string");
372: else if (errno == E2BIG)
373: error ("output buffer overflow at do_convert()");
374: else
375: #endif
376: error ("something happen");
377: strcpy ((char *) to, (const char *) from);
378: return;
379: }
380:
381: if (iconv_close (cd) != 0)
382: {
383: error ("iconv_close() error");
384: }
385: #else
386: int p1, p2, i, j;
387: int jisx0208 = FALSE;
388: int hankaku = FALSE;
389:
390: j = 0;
391: if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0)
392: {
393: for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
394: {
395: if (from[i] == ESC)
396: {
397: i++;
398: if (from[i] == '$')
399: {
400: jisx0208 = TRUE;
401: hankaku = FALSE;
402: i++;
403: }
404: else if (from[i] == '(')
405: {
406: jisx0208 = FALSE;
407: i++;
408: if (from[i] == 'I') /* Hankaku Kana */
409: hankaku = TRUE;
410: else
411: hankaku = FALSE;
412: }
413: }
414: else
415: {
416: if (jisx0208)
417: to[j++] = from[i] + 128;
418: else if (hankaku)
419: {
420: to[j++] = SS2;
421: to[j++] = from[i] + 128;
422: }
423: else
424: to[j++] = from[i];
425: }
426: }
427: }
428: else if (strcmp (code, SJISSTR) == 0)
429: {
430: for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
431: {
432: p1 = from[i];
433: if (p1 < 127)
434: to[j++] = p1;
435: else if ((p1 >= 161) && (p1 <= 223))
436: { /* Hankaku Kana */
437: to[j++] = SS2;
438: to[j++] = p1;
439: }
440: else
441: {
442: p2 = from[++i];
443: SJIStoJIS (&p1, &p2);
444: to[j++] = p1 + 128;
445: to[j++] = p2 + 128;
446: }
447: }
448: }
449: else
450: {
451: error ("invalid code specification: \"%s\"", code);
452: return;
453: }
454:
455: if (j >= BUFSIZ)
456: {
457: error ("output buffer overflow at do_convert()");
458: ustrcpy (to, from);
459: }
460: else
461: to[j] = '\0';
462: #endif /* HAVE_ICONV */
463: }
464:
465: static int
466: do_check_and_conv (unsigned char *to, unsigned char *from)
467: {
468: static unsigned char tmp[BUFSIZ];
469: int p1, p2, i, j;
470: int kanji = TRUE;
471:
472: switch (DetectKanjiCode (from))
473: {
474: case NEW:
475: debug ("Kanji code is New JIS.");
476: do_convert (tmp, from, NEWJISSTR);
477: break;
478: case OLD:
479: debug ("Kanji code is Old JIS.");
480: do_convert (tmp, from, OLDJISSTR);
481: break;
482: case ESCI:
483: debug ("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
484: do_convert (tmp, from, NEWJISSTR);
485: break;
486: case NEC:
487: debug ("Kanji code is NEC Kanji.");
488: error ("cannot convert NEC Kanji.");
489: ustrcpy (tmp, from);
490: kanji = FALSE;
491: break;
492: case EUC:
493: debug ("Kanji code is EUC.");
494: ustrcpy (tmp, from);
495: break;
496: case SJIS:
497: debug ("Kanji code is SJIS.");
498: do_convert (tmp, from, SJISSTR);
499: break;
500: case EUCORSJIS:
501: debug ("Kanji code is EUC or SJIS.");
502: ustrcpy (tmp, from);
503: kanji = FALSE;
504: break;
505: case ASCII:
506: debug ("This is ASCII string.");
507: ustrcpy (tmp, from);
508: kanji = FALSE;
509: break;
510: default:
511: debug ("This string includes unknown code.");
512: ustrcpy (tmp, from);
513: kanji = FALSE;
514: break;
515: }
516:
517: /* Hankaku Kana ---> Zenkaku Kana */
518: if (kanji)
519: {
520: j = 0;
521: for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++)
522: {
523: if (tmp[i] == SS2)
524: {
525: p1 = tmp[++i];
526: if (tmp[i + 1] == SS2)
527: {
528: p2 = tmp[i + 2];
529: if (p2 == 222 || p2 == 223)
530: i += 2;
531: else
532: p2 = 0;
533: }
534: else
535: p2 = 0;
536: han2zen (&p1, &p2);
537: SJIStoJIS (&p1, &p2);
538: to[j++] = p1 + 128;
539: to[j++] = p2 + 128;
540: }
541: else
542: to[j++] = tmp[i];
543: }
544:
545: if (j >= BUFSIZ)
546: {
547: error ("output buffer overflow at Hankaku --> Zenkaku");
548: ustrcpy (to, tmp);
549: }
550: else
551: to[j] = '\0';
552: }
553: else
554: ustrcpy (to, tmp);
555:
556: return kanji;
557: }
558:
559: int
560: any2eucjp (unsigned char *dest, unsigned char *src, unsigned int dest_max)
561: {
562: static unsigned char tmp_dest[BUFSIZ];
563: int ret;
564:
565: if (strlen ((const char *) src) >= BUFSIZ)
566: {
567: error ("input string too large");
568: return -1;
569: }
570: if (dest_max > BUFSIZ)
571: {
572: error ("invalid maximum size of destination\nit should be less than %d.", BUFSIZ);
573: return -1;
574: }
575: ret = do_check_and_conv (tmp_dest, src);
576: if (strlen ((const char *) tmp_dest) >= dest_max)
577: {
578: error ("output buffer overflow");
579: ustrcpy (dest, src);
580: return -1;
581: }
582: ustrcpy (dest, tmp_dest);
583: return ret;
584: }
585:
586: #if 0
587: unsigned int
588: strwidth (unsigned char *s)
589: {
590: unsigned char *t;
591: unsigned int i;
592:
593: t = (unsigned char *) gdMalloc (BUFSIZ);
594: any2eucjp (t, s, BUFSIZ);
595: i = strlen (t);
596: gdFree (t);
597: return i;
598: }
599:
600: #ifdef DEBUG
601: int
602: main ()
603: {
604: unsigned char input[BUFSIZ];
605: unsigned char *output;
606: unsigned char *str;
607: int c, i = 0;
608:
609: while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
610: input[i++] = c;
611: input[i] = '\0';
612:
613: printf ("input : %d bytes\n", strlen ((const char *) input));
614: printf ("output: %d bytes\n", strwidth (input));
615:
616: output = (unsigned char *) gdMalloc (BUFSIZ);
617: any2eucjp (output, input, BUFSIZ);
618: str = output;
619: while (*str != '\0')
620: putchar (*(str++));
621: putchar ('\n');
622: gdFree (output);
623:
624: return 0;
625: }
626: #endif
627: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>