Annotation of embedaddon/libiconv/lib/loop_unicode.h, revision 1.1.1.1

1.1       misho       1: /*
                      2:  * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
                      3:  * This file is part of the GNU LIBICONV Library.
                      4:  *
                      5:  * The GNU LIBICONV Library is free software; you can redistribute it
                      6:  * and/or modify it under the terms of the GNU Library General Public
                      7:  * License as published by the Free Software Foundation; either version 2
                      8:  * of the License, or (at your option) any later version.
                      9:  *
                     10:  * The GNU LIBICONV Library is distributed in the hope that it will be
                     11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
                     12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     13:  * Library General Public License for more details.
                     14:  *
                     15:  * You should have received a copy of the GNU Library General Public
                     16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
                     17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
                     18:  * Fifth Floor, Boston, MA 02110-1301, USA.
                     19:  */
                     20: 
                     21: /* This file defines the conversion loop via Unicode as a pivot encoding. */
                     22: 
                     23: /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
                     24: static int unicode_transliterate (conv_t cd, ucs4_t wc,
                     25:                                   unsigned char* outptr, size_t outleft)
                     26: {
                     27:   if (cd->oflags & HAVE_HANGUL_JAMO) {
                     28:     /* Decompose Hangul into Jamo. Use double-width Jamo (contained
                     29:        in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
                     30:        (contained in Unicode only). */
                     31:     ucs4_t buf[3];
                     32:     int ret = johab_hangul_decompose(cd,buf,wc);
                     33:     if (ret != RET_ILUNI) {
                     34:       /* we know 1 <= ret <= 3 */
                     35:       state_t backup_state = cd->ostate;
                     36:       unsigned char* backup_outptr = outptr;
                     37:       size_t backup_outleft = outleft;
                     38:       int i, sub_outcount;
                     39:       for (i = 0; i < ret; i++) {
                     40:         if (outleft == 0) {
                     41:           sub_outcount = RET_TOOSMALL;
                     42:           goto johab_hangul_failed;
                     43:         }
                     44:         sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
                     45:         if (sub_outcount <= RET_ILUNI)
                     46:           goto johab_hangul_failed;
                     47:         if (!(sub_outcount <= outleft)) abort();
                     48:         outptr += sub_outcount; outleft -= sub_outcount;
                     49:       }
                     50:       return outptr-backup_outptr;
                     51:     johab_hangul_failed:
                     52:       cd->ostate = backup_state;
                     53:       outptr = backup_outptr;
                     54:       outleft = backup_outleft;
                     55:       if (sub_outcount != RET_ILUNI)
                     56:         return RET_TOOSMALL;
                     57:     }
                     58:   }
                     59:   {
                     60:     /* Try to use a variant, but postfix it with
                     61:        U+303E IDEOGRAPHIC VARIATION INDICATOR
                     62:        (cf. Ken Lunde's "CJKV information processing", p. 188). */
                     63:     int indx = -1;
                     64:     if (wc == 0x3006)
                     65:       indx = 0;
                     66:     else if (wc == 0x30f6)
                     67:       indx = 1;
                     68:     else if (wc >= 0x4e00 && wc < 0xa000)
                     69:       indx = cjk_variants_indx[wc-0x4e00];
                     70:     if (indx >= 0) {
                     71:       for (;; indx++) {
                     72:         ucs4_t buf[2];
                     73:         unsigned short variant = cjk_variants[indx];
                     74:         unsigned short last = variant & 0x8000;
                     75:         variant &= 0x7fff;
                     76:         variant += 0x3000;
                     77:         buf[0] = variant; buf[1] = 0x303e;
                     78:         {
                     79:           state_t backup_state = cd->ostate;
                     80:           unsigned char* backup_outptr = outptr;
                     81:           size_t backup_outleft = outleft;
                     82:           int i, sub_outcount;
                     83:           for (i = 0; i < 2; i++) {
                     84:             if (outleft == 0) {
                     85:               sub_outcount = RET_TOOSMALL;
                     86:               goto variant_failed;
                     87:             }
                     88:             sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
                     89:             if (sub_outcount <= RET_ILUNI)
                     90:               goto variant_failed;
                     91:             if (!(sub_outcount <= outleft)) abort();
                     92:             outptr += sub_outcount; outleft -= sub_outcount;
                     93:           }
                     94:           return outptr-backup_outptr;
                     95:         variant_failed:
                     96:           cd->ostate = backup_state;
                     97:           outptr = backup_outptr;
                     98:           outleft = backup_outleft;
                     99:           if (sub_outcount != RET_ILUNI)
                    100:             return RET_TOOSMALL;
                    101:         }
                    102:         if (last)
                    103:           break;
                    104:       }
                    105:     }
                    106:   }
                    107:   if (wc >= 0x2018 && wc <= 0x201a) {
                    108:     /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
                    109:     ucs4_t substitute =
                    110:       (cd->oflags & HAVE_QUOTATION_MARKS
                    111:        ? (wc == 0x201a ? 0x2018 : wc)
                    112:        : (cd->oflags & HAVE_ACCENTS
                    113:           ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
                    114:           : 0x0027 /* use apostrophe */
                    115:       )  );
                    116:     int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
                    117:     if (outcount != RET_ILUNI)
                    118:       return outcount;
                    119:   }
                    120:   {
                    121:     /* Use the transliteration table. */
                    122:     int indx = translit_index(wc);
                    123:     if (indx >= 0) {
                    124:       const unsigned int * cp = &translit_data[indx];
                    125:       unsigned int num = *cp++;
                    126:       state_t backup_state = cd->ostate;
                    127:       unsigned char* backup_outptr = outptr;
                    128:       size_t backup_outleft = outleft;
                    129:       unsigned int i;
                    130:       int sub_outcount;
                    131:       for (i = 0; i < num; i++) {
                    132:         if (outleft == 0) {
                    133:           sub_outcount = RET_TOOSMALL;
                    134:           goto translit_failed;
                    135:         }
                    136:         sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
                    137:         if (sub_outcount == RET_ILUNI)
                    138:           /* Recursive transliteration. */
                    139:           sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
                    140:         if (sub_outcount <= RET_ILUNI)
                    141:           goto translit_failed;
                    142:         if (!(sub_outcount <= outleft)) abort();
                    143:         outptr += sub_outcount; outleft -= sub_outcount;
                    144:       }
                    145:       return outptr-backup_outptr;
                    146:     translit_failed:
                    147:       cd->ostate = backup_state;
                    148:       outptr = backup_outptr;
                    149:       outleft = backup_outleft;
                    150:       if (sub_outcount != RET_ILUNI)
                    151:         return RET_TOOSMALL;
                    152:     }
                    153:   }
                    154:   return RET_ILUNI;
                    155: }
                    156: 
                    157: #ifndef LIBICONV_PLUG
                    158: 
                    159: struct uc_to_mb_fallback_locals {
                    160:   unsigned char* l_outbuf;
                    161:   size_t l_outbytesleft;
                    162:   int l_errno;
                    163: };
                    164: 
                    165: static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
                    166:                                         void* callback_arg)
                    167: {
                    168:   struct uc_to_mb_fallback_locals * plocals =
                    169:     (struct uc_to_mb_fallback_locals *) callback_arg;
                    170:   /* Do nothing if already encountered an error in a previous call. */
                    171:   if (plocals->l_errno == 0) {
                    172:     /* Attempt to copy the passed buffer to the output buffer. */
                    173:     if (plocals->l_outbytesleft < buflen)
                    174:       plocals->l_errno = E2BIG;
                    175:     else {
                    176:       memcpy(plocals->l_outbuf, buf, buflen);
                    177:       plocals->l_outbuf += buflen;
                    178:       plocals->l_outbytesleft -= buflen;
                    179:     }
                    180:   }
                    181: }
                    182: 
                    183: struct mb_to_uc_fallback_locals {
                    184:   conv_t l_cd;
                    185:   unsigned char* l_outbuf;
                    186:   size_t l_outbytesleft;
                    187:   int l_errno;
                    188: };
                    189: 
                    190: static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
                    191:                                         void* callback_arg)
                    192: {
                    193:   struct mb_to_uc_fallback_locals * plocals =
                    194:     (struct mb_to_uc_fallback_locals *) callback_arg;
                    195:   /* Do nothing if already encountered an error in a previous call. */
                    196:   if (plocals->l_errno == 0) {
                    197:     /* Attempt to convert the passed buffer to the target encoding. */
                    198:     conv_t cd = plocals->l_cd;
                    199:     unsigned char* outptr = plocals->l_outbuf;
                    200:     size_t outleft = plocals->l_outbytesleft;
                    201:     for (; buflen > 0; buf++, buflen--) {
                    202:       ucs4_t wc = *buf;
                    203:       int outcount;
                    204:       if (outleft == 0) {
                    205:         plocals->l_errno = E2BIG;
                    206:         break;
                    207:       }
                    208:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
                    209:       if (outcount != RET_ILUNI)
                    210:         goto outcount_ok;
                    211:       /* Handle Unicode tag characters (range U+E0000..U+E007F). */
                    212:       if ((wc >> 7) == (0xe0000 >> 7))
                    213:         goto outcount_zero;
                    214:       /* Try transliteration. */
                    215:       if (cd->transliterate) {
                    216:         outcount = unicode_transliterate(cd,wc,outptr,outleft);
                    217:         if (outcount != RET_ILUNI)
                    218:           goto outcount_ok;
                    219:       }
                    220:       if (cd->discard_ilseq) {
                    221:         outcount = 0;
                    222:         goto outcount_ok;
                    223:       }
                    224:       #ifndef LIBICONV_PLUG
                    225:       else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
                    226:         struct uc_to_mb_fallback_locals locals;
                    227:         locals.l_outbuf = outptr;
                    228:         locals.l_outbytesleft = outleft;
                    229:         locals.l_errno = 0;
                    230:         cd->fallbacks.uc_to_mb_fallback(wc,
                    231:                                         uc_to_mb_write_replacement,
                    232:                                         &locals,
                    233:                                         cd->fallbacks.data);
                    234:         if (locals.l_errno != 0) {
                    235:           plocals->l_errno = locals.l_errno;
                    236:           break;
                    237:         }
                    238:         outptr = locals.l_outbuf;
                    239:         outleft = locals.l_outbytesleft;
                    240:         outcount = 0;
                    241:         goto outcount_ok;
                    242:       }
                    243:       #endif
                    244:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
                    245:       if (outcount != RET_ILUNI)
                    246:         goto outcount_ok;
                    247:       plocals->l_errno = EILSEQ;
                    248:       break;
                    249:     outcount_ok:
                    250:       if (outcount < 0) {
                    251:         plocals->l_errno = E2BIG;
                    252:         break;
                    253:       }
                    254:       #ifndef LIBICONV_PLUG
                    255:       if (cd->hooks.uc_hook)
                    256:         (*cd->hooks.uc_hook)(wc, cd->hooks.data);
                    257:       #endif
                    258:       if (!(outcount <= outleft)) abort();
                    259:       outptr += outcount; outleft -= outcount;
                    260:     outcount_zero: ;
                    261:     }
                    262:     plocals->l_outbuf = outptr;
                    263:     plocals->l_outbytesleft = outleft;
                    264:   }
                    265: }
                    266: 
                    267: #endif /* !LIBICONV_PLUG */
                    268: 
                    269: static size_t unicode_loop_convert (iconv_t icd,
                    270:                                     const char* * inbuf, size_t *inbytesleft,
                    271:                                     char* * outbuf, size_t *outbytesleft)
                    272: {
                    273:   conv_t cd = (conv_t) icd;
                    274:   size_t result = 0;
                    275:   const unsigned char* inptr = (const unsigned char*) *inbuf;
                    276:   size_t inleft = *inbytesleft;
                    277:   unsigned char* outptr = (unsigned char*) *outbuf;
                    278:   size_t outleft = *outbytesleft;
                    279:   while (inleft > 0) {
                    280:     state_t last_istate = cd->istate;
                    281:     ucs4_t wc;
                    282:     int incount;
                    283:     int outcount;
                    284:     incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
                    285:     if (incount < 0) {
                    286:       if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
                    287:         /* Case 1: invalid input, possibly after a shift sequence */
                    288:         incount = DECODE_SHIFT_ILSEQ(incount);
                    289:         if (cd->discard_ilseq) {
                    290:           switch (cd->iindex) {
                    291:             case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
                    292:             case ei_utf32: case ei_utf32be: case ei_utf32le:
                    293:             case ei_ucs4internal: case ei_ucs4swapped:
                    294:               incount += 4; break;
                    295:             case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
                    296:             case ei_utf16: case ei_utf16be: case ei_utf16le:
                    297:             case ei_ucs2internal: case ei_ucs2swapped:
                    298:               incount += 2; break;
                    299:             default:
                    300:               incount += 1; break;
                    301:           }
                    302:           goto outcount_zero;
                    303:         }
                    304:         #ifndef LIBICONV_PLUG
                    305:         else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
                    306:           unsigned int incount2;
                    307:           struct mb_to_uc_fallback_locals locals;
                    308:           switch (cd->iindex) {
                    309:             case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
                    310:             case ei_utf32: case ei_utf32be: case ei_utf32le:
                    311:             case ei_ucs4internal: case ei_ucs4swapped:
                    312:               incount2 = 4; break;
                    313:             case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
                    314:             case ei_utf16: case ei_utf16be: case ei_utf16le:
                    315:             case ei_ucs2internal: case ei_ucs2swapped:
                    316:               incount2 = 2; break;
                    317:             default:
                    318:               incount2 = 1; break;
                    319:           }
                    320:           locals.l_cd = cd;
                    321:           locals.l_outbuf = outptr;
                    322:           locals.l_outbytesleft = outleft;
                    323:           locals.l_errno = 0;
                    324:           cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
                    325:                                           mb_to_uc_write_replacement,
                    326:                                           &locals,
                    327:                                           cd->fallbacks.data);
                    328:           if (locals.l_errno != 0) {
                    329:             inptr += incount; inleft -= incount;
                    330:             errno = locals.l_errno;
                    331:             result = -1;
                    332:             break;
                    333:           }
                    334:           incount += incount2;
                    335:           outptr = locals.l_outbuf;
                    336:           outleft = locals.l_outbytesleft;
                    337:           result += 1;
                    338:           goto outcount_zero;
                    339:         }
                    340:         #endif
                    341:         inptr += incount; inleft -= incount;
                    342:         errno = EILSEQ;
                    343:         result = -1;
                    344:         break;
                    345:       }
                    346:       if (incount == RET_TOOFEW(0)) {
                    347:         /* Case 2: not enough bytes available to detect anything */
                    348:         errno = EINVAL;
                    349:         result = -1;
                    350:         break;
                    351:       }
                    352:       /* Case 3: k bytes read, but only a shift sequence */
                    353:       incount = DECODE_TOOFEW(incount);
                    354:     } else {
                    355:       /* Case 4: k bytes read, making up a wide character */
                    356:       if (outleft == 0) {
                    357:         cd->istate = last_istate;
                    358:         errno = E2BIG;
                    359:         result = -1;
                    360:         break;
                    361:       }
                    362:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
                    363:       if (outcount != RET_ILUNI)
                    364:         goto outcount_ok;
                    365:       /* Handle Unicode tag characters (range U+E0000..U+E007F). */
                    366:       if ((wc >> 7) == (0xe0000 >> 7))
                    367:         goto outcount_zero;
                    368:       /* Try transliteration. */
                    369:       result++;
                    370:       if (cd->transliterate) {
                    371:         outcount = unicode_transliterate(cd,wc,outptr,outleft);
                    372:         if (outcount != RET_ILUNI)
                    373:           goto outcount_ok;
                    374:       }
                    375:       if (cd->discard_ilseq) {
                    376:         outcount = 0;
                    377:         goto outcount_ok;
                    378:       }
                    379:       #ifndef LIBICONV_PLUG
                    380:       else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
                    381:         struct uc_to_mb_fallback_locals locals;
                    382:         locals.l_outbuf = outptr;
                    383:         locals.l_outbytesleft = outleft;
                    384:         locals.l_errno = 0;
                    385:         cd->fallbacks.uc_to_mb_fallback(wc,
                    386:                                         uc_to_mb_write_replacement,
                    387:                                         &locals,
                    388:                                         cd->fallbacks.data);
                    389:         if (locals.l_errno != 0) {
                    390:           cd->istate = last_istate;
                    391:           errno = locals.l_errno;
                    392:           return -1;
                    393:         }
                    394:         outptr = locals.l_outbuf;
                    395:         outleft = locals.l_outbytesleft;
                    396:         outcount = 0;
                    397:         goto outcount_ok;
                    398:       }
                    399:       #endif
                    400:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
                    401:       if (outcount != RET_ILUNI)
                    402:         goto outcount_ok;
                    403:       cd->istate = last_istate;
                    404:       errno = EILSEQ;
                    405:       result = -1;
                    406:       break;
                    407:     outcount_ok:
                    408:       if (outcount < 0) {
                    409:         cd->istate = last_istate;
                    410:         errno = E2BIG;
                    411:         result = -1;
                    412:         break;
                    413:       }
                    414:       #ifndef LIBICONV_PLUG
                    415:       if (cd->hooks.uc_hook)
                    416:         (*cd->hooks.uc_hook)(wc, cd->hooks.data);
                    417:       #endif
                    418:       if (!(outcount <= outleft)) abort();
                    419:       outptr += outcount; outleft -= outcount;
                    420:     }
                    421:   outcount_zero:
                    422:     if (!(incount <= inleft)) abort();
                    423:     inptr += incount; inleft -= incount;
                    424:   }
                    425:   *inbuf = (const char*) inptr;
                    426:   *inbytesleft = inleft;
                    427:   *outbuf = (char*) outptr;
                    428:   *outbytesleft = outleft;
                    429:   return result;
                    430: }
                    431: 
                    432: static size_t unicode_loop_reset (iconv_t icd,
                    433:                                   char* * outbuf, size_t *outbytesleft)
                    434: {
                    435:   conv_t cd = (conv_t) icd;
                    436:   if (outbuf == NULL || *outbuf == NULL) {
                    437:     /* Reset the states. */
                    438:     memset(&cd->istate,'\0',sizeof(state_t));
                    439:     memset(&cd->ostate,'\0',sizeof(state_t));
                    440:     return 0;
                    441:   } else {
                    442:     size_t result = 0;
                    443:     if (cd->ifuncs.xxx_flushwc) {
                    444:       state_t last_istate = cd->istate;
                    445:       ucs4_t wc;
                    446:       if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
                    447:         unsigned char* outptr = (unsigned char*) *outbuf;
                    448:         size_t outleft = *outbytesleft;
                    449:         int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
                    450:         if (outcount != RET_ILUNI)
                    451:           goto outcount_ok;
                    452:         /* Handle Unicode tag characters (range U+E0000..U+E007F). */
                    453:         if ((wc >> 7) == (0xe0000 >> 7))
                    454:           goto outcount_zero;
                    455:         /* Try transliteration. */
                    456:         result++;
                    457:         if (cd->transliterate) {
                    458:           outcount = unicode_transliterate(cd,wc,outptr,outleft);
                    459:           if (outcount != RET_ILUNI)
                    460:             goto outcount_ok;
                    461:         }
                    462:         if (cd->discard_ilseq) {
                    463:           outcount = 0;
                    464:           goto outcount_ok;
                    465:         }
                    466:         #ifndef LIBICONV_PLUG
                    467:         else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
                    468:           struct uc_to_mb_fallback_locals locals;
                    469:           locals.l_outbuf = outptr;
                    470:           locals.l_outbytesleft = outleft;
                    471:           locals.l_errno = 0;
                    472:           cd->fallbacks.uc_to_mb_fallback(wc,
                    473:                                           uc_to_mb_write_replacement,
                    474:                                           &locals,
                    475:                                           cd->fallbacks.data);
                    476:           if (locals.l_errno != 0) {
                    477:             cd->istate = last_istate;
                    478:             errno = locals.l_errno;
                    479:             return -1;
                    480:           }
                    481:           outptr = locals.l_outbuf;
                    482:           outleft = locals.l_outbytesleft;
                    483:           outcount = 0;
                    484:           goto outcount_ok;
                    485:         }
                    486:         #endif
                    487:         outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
                    488:         if (outcount != RET_ILUNI)
                    489:           goto outcount_ok;
                    490:         cd->istate = last_istate;
                    491:         errno = EILSEQ;
                    492:         return -1;
                    493:       outcount_ok:
                    494:         if (outcount < 0) {
                    495:           cd->istate = last_istate;
                    496:           errno = E2BIG;
                    497:           return -1;
                    498:         }
                    499:         #ifndef LIBICONV_PLUG
                    500:         if (cd->hooks.uc_hook)
                    501:           (*cd->hooks.uc_hook)(wc, cd->hooks.data);
                    502:         #endif
                    503:         if (!(outcount <= outleft)) abort();
                    504:         outptr += outcount;
                    505:         outleft -= outcount;
                    506:       outcount_zero:
                    507:         *outbuf = (char*) outptr;
                    508:         *outbytesleft = outleft;
                    509:       }
                    510:     }
                    511:     if (cd->ofuncs.xxx_reset) {
                    512:       unsigned char* outptr = (unsigned char*) *outbuf;
                    513:       size_t outleft = *outbytesleft;
                    514:       int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
                    515:       if (outcount < 0) {
                    516:         errno = E2BIG;
                    517:         return -1;
                    518:       }
                    519:       if (!(outcount <= outleft)) abort();
                    520:       *outbuf = (char*) (outptr + outcount);
                    521:       *outbytesleft = outleft - outcount;
                    522:     }
                    523:     memset(&cd->istate,'\0',sizeof(state_t));
                    524:     memset(&cd->ostate,'\0',sizeof(state_t));
                    525:     return result;
                    526:   }
                    527: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>