Annotation of embedaddon/libiconv/lib/loop_unicode.h, revision 1.1.1.2

1.1       misho       1: /*
                      2:  * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
                      3:  * This file is part of the GNU LIBICONV Library.
                      4:  *
                      5:  * The GNU LIBICONV Library is free software; you can redistribute it
                      6:  * and/or modify it under the terms of the GNU Library General Public
                      7:  * License as published by the Free Software Foundation; either version 2
                      8:  * of the License, or (at your option) any later version.
                      9:  *
                     10:  * The GNU LIBICONV Library is distributed in the hope that it will be
                     11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
                     12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     13:  * Library General Public License for more details.
                     14:  *
                     15:  * You should have received a copy of the GNU Library General Public
                     16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.2 ! misho      17:  * If not, see <https://www.gnu.org/licenses/>.
1.1       misho      18:  */
                     19: 
                     20: /* This file defines the conversion loop via Unicode as a pivot encoding. */
                     21: 
                     22: /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
                     23: static int unicode_transliterate (conv_t cd, ucs4_t wc,
                     24:                                   unsigned char* outptr, size_t outleft)
                     25: {
                     26:   if (cd->oflags & HAVE_HANGUL_JAMO) {
                     27:     /* Decompose Hangul into Jamo. Use double-width Jamo (contained
                     28:        in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
                     29:        (contained in Unicode only). */
                     30:     ucs4_t buf[3];
                     31:     int ret = johab_hangul_decompose(cd,buf,wc);
                     32:     if (ret != RET_ILUNI) {
                     33:       /* we know 1 <= ret <= 3 */
                     34:       state_t backup_state = cd->ostate;
                     35:       unsigned char* backup_outptr = outptr;
                     36:       size_t backup_outleft = outleft;
                     37:       int i, sub_outcount;
                     38:       for (i = 0; i < ret; i++) {
                     39:         if (outleft == 0) {
                     40:           sub_outcount = RET_TOOSMALL;
                     41:           goto johab_hangul_failed;
                     42:         }
                     43:         sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
                     44:         if (sub_outcount <= RET_ILUNI)
                     45:           goto johab_hangul_failed;
                     46:         if (!(sub_outcount <= outleft)) abort();
                     47:         outptr += sub_outcount; outleft -= sub_outcount;
                     48:       }
                     49:       return outptr-backup_outptr;
                     50:     johab_hangul_failed:
                     51:       cd->ostate = backup_state;
                     52:       outptr = backup_outptr;
                     53:       outleft = backup_outleft;
                     54:       if (sub_outcount != RET_ILUNI)
                     55:         return RET_TOOSMALL;
                     56:     }
                     57:   }
                     58:   {
                     59:     /* Try to use a variant, but postfix it with
                     60:        U+303E IDEOGRAPHIC VARIATION INDICATOR
                     61:        (cf. Ken Lunde's "CJKV information processing", p. 188). */
                     62:     int indx = -1;
                     63:     if (wc == 0x3006)
                     64:       indx = 0;
                     65:     else if (wc == 0x30f6)
                     66:       indx = 1;
                     67:     else if (wc >= 0x4e00 && wc < 0xa000)
                     68:       indx = cjk_variants_indx[wc-0x4e00];
                     69:     if (indx >= 0) {
                     70:       for (;; indx++) {
                     71:         ucs4_t buf[2];
                     72:         unsigned short variant = cjk_variants[indx];
                     73:         unsigned short last = variant & 0x8000;
                     74:         variant &= 0x7fff;
                     75:         variant += 0x3000;
                     76:         buf[0] = variant; buf[1] = 0x303e;
                     77:         {
                     78:           state_t backup_state = cd->ostate;
                     79:           unsigned char* backup_outptr = outptr;
                     80:           size_t backup_outleft = outleft;
                     81:           int i, sub_outcount;
                     82:           for (i = 0; i < 2; i++) {
                     83:             if (outleft == 0) {
                     84:               sub_outcount = RET_TOOSMALL;
                     85:               goto variant_failed;
                     86:             }
                     87:             sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
                     88:             if (sub_outcount <= RET_ILUNI)
                     89:               goto variant_failed;
                     90:             if (!(sub_outcount <= outleft)) abort();
                     91:             outptr += sub_outcount; outleft -= sub_outcount;
                     92:           }
                     93:           return outptr-backup_outptr;
                     94:         variant_failed:
                     95:           cd->ostate = backup_state;
                     96:           outptr = backup_outptr;
                     97:           outleft = backup_outleft;
                     98:           if (sub_outcount != RET_ILUNI)
                     99:             return RET_TOOSMALL;
                    100:         }
                    101:         if (last)
                    102:           break;
                    103:       }
                    104:     }
                    105:   }
                    106:   if (wc >= 0x2018 && wc <= 0x201a) {
                    107:     /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
                    108:     ucs4_t substitute =
                    109:       (cd->oflags & HAVE_QUOTATION_MARKS
                    110:        ? (wc == 0x201a ? 0x2018 : wc)
                    111:        : (cd->oflags & HAVE_ACCENTS
                    112:           ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
                    113:           : 0x0027 /* use apostrophe */
                    114:       )  );
                    115:     int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
                    116:     if (outcount != RET_ILUNI)
                    117:       return outcount;
                    118:   }
                    119:   {
                    120:     /* Use the transliteration table. */
                    121:     int indx = translit_index(wc);
                    122:     if (indx >= 0) {
                    123:       const unsigned int * cp = &translit_data[indx];
                    124:       unsigned int num = *cp++;
                    125:       state_t backup_state = cd->ostate;
                    126:       unsigned char* backup_outptr = outptr;
                    127:       size_t backup_outleft = outleft;
                    128:       unsigned int i;
                    129:       int sub_outcount;
                    130:       for (i = 0; i < num; i++) {
                    131:         if (outleft == 0) {
                    132:           sub_outcount = RET_TOOSMALL;
                    133:           goto translit_failed;
                    134:         }
                    135:         sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
                    136:         if (sub_outcount == RET_ILUNI)
                    137:           /* Recursive transliteration. */
                    138:           sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
                    139:         if (sub_outcount <= RET_ILUNI)
                    140:           goto translit_failed;
                    141:         if (!(sub_outcount <= outleft)) abort();
                    142:         outptr += sub_outcount; outleft -= sub_outcount;
                    143:       }
                    144:       return outptr-backup_outptr;
                    145:     translit_failed:
                    146:       cd->ostate = backup_state;
                    147:       outptr = backup_outptr;
                    148:       outleft = backup_outleft;
                    149:       if (sub_outcount != RET_ILUNI)
                    150:         return RET_TOOSMALL;
                    151:     }
                    152:   }
                    153:   return RET_ILUNI;
                    154: }
                    155: 
                    156: #ifndef LIBICONV_PLUG
                    157: 
                    158: struct uc_to_mb_fallback_locals {
                    159:   unsigned char* l_outbuf;
                    160:   size_t l_outbytesleft;
                    161:   int l_errno;
                    162: };
                    163: 
                    164: static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
                    165:                                         void* callback_arg)
                    166: {
                    167:   struct uc_to_mb_fallback_locals * plocals =
                    168:     (struct uc_to_mb_fallback_locals *) callback_arg;
                    169:   /* Do nothing if already encountered an error in a previous call. */
                    170:   if (plocals->l_errno == 0) {
                    171:     /* Attempt to copy the passed buffer to the output buffer. */
                    172:     if (plocals->l_outbytesleft < buflen)
                    173:       plocals->l_errno = E2BIG;
                    174:     else {
                    175:       memcpy(plocals->l_outbuf, buf, buflen);
                    176:       plocals->l_outbuf += buflen;
                    177:       plocals->l_outbytesleft -= buflen;
                    178:     }
                    179:   }
                    180: }
                    181: 
                    182: struct mb_to_uc_fallback_locals {
                    183:   conv_t l_cd;
                    184:   unsigned char* l_outbuf;
                    185:   size_t l_outbytesleft;
                    186:   int l_errno;
                    187: };
                    188: 
                    189: static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
                    190:                                         void* callback_arg)
                    191: {
                    192:   struct mb_to_uc_fallback_locals * plocals =
                    193:     (struct mb_to_uc_fallback_locals *) callback_arg;
                    194:   /* Do nothing if already encountered an error in a previous call. */
                    195:   if (plocals->l_errno == 0) {
                    196:     /* Attempt to convert the passed buffer to the target encoding. */
                    197:     conv_t cd = plocals->l_cd;
                    198:     unsigned char* outptr = plocals->l_outbuf;
                    199:     size_t outleft = plocals->l_outbytesleft;
                    200:     for (; buflen > 0; buf++, buflen--) {
                    201:       ucs4_t wc = *buf;
                    202:       int outcount;
                    203:       if (outleft == 0) {
                    204:         plocals->l_errno = E2BIG;
                    205:         break;
                    206:       }
                    207:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
                    208:       if (outcount != RET_ILUNI)
                    209:         goto outcount_ok;
                    210:       /* Handle Unicode tag characters (range U+E0000..U+E007F). */
                    211:       if ((wc >> 7) == (0xe0000 >> 7))
                    212:         goto outcount_zero;
                    213:       /* Try transliteration. */
                    214:       if (cd->transliterate) {
                    215:         outcount = unicode_transliterate(cd,wc,outptr,outleft);
                    216:         if (outcount != RET_ILUNI)
                    217:           goto outcount_ok;
                    218:       }
                    219:       if (cd->discard_ilseq) {
                    220:         outcount = 0;
                    221:         goto outcount_ok;
                    222:       }
                    223:       #ifndef LIBICONV_PLUG
                    224:       else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
                    225:         struct uc_to_mb_fallback_locals locals;
                    226:         locals.l_outbuf = outptr;
                    227:         locals.l_outbytesleft = outleft;
                    228:         locals.l_errno = 0;
                    229:         cd->fallbacks.uc_to_mb_fallback(wc,
                    230:                                         uc_to_mb_write_replacement,
                    231:                                         &locals,
                    232:                                         cd->fallbacks.data);
                    233:         if (locals.l_errno != 0) {
                    234:           plocals->l_errno = locals.l_errno;
                    235:           break;
                    236:         }
                    237:         outptr = locals.l_outbuf;
                    238:         outleft = locals.l_outbytesleft;
                    239:         outcount = 0;
                    240:         goto outcount_ok;
                    241:       }
                    242:       #endif
                    243:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
                    244:       if (outcount != RET_ILUNI)
                    245:         goto outcount_ok;
                    246:       plocals->l_errno = EILSEQ;
                    247:       break;
                    248:     outcount_ok:
                    249:       if (outcount < 0) {
                    250:         plocals->l_errno = E2BIG;
                    251:         break;
                    252:       }
                    253:       #ifndef LIBICONV_PLUG
                    254:       if (cd->hooks.uc_hook)
                    255:         (*cd->hooks.uc_hook)(wc, cd->hooks.data);
                    256:       #endif
                    257:       if (!(outcount <= outleft)) abort();
                    258:       outptr += outcount; outleft -= outcount;
                    259:     outcount_zero: ;
                    260:     }
                    261:     plocals->l_outbuf = outptr;
                    262:     plocals->l_outbytesleft = outleft;
                    263:   }
                    264: }
                    265: 
                    266: #endif /* !LIBICONV_PLUG */
                    267: 
                    268: static size_t unicode_loop_convert (iconv_t icd,
                    269:                                     const char* * inbuf, size_t *inbytesleft,
                    270:                                     char* * outbuf, size_t *outbytesleft)
                    271: {
                    272:   conv_t cd = (conv_t) icd;
                    273:   size_t result = 0;
                    274:   const unsigned char* inptr = (const unsigned char*) *inbuf;
                    275:   size_t inleft = *inbytesleft;
                    276:   unsigned char* outptr = (unsigned char*) *outbuf;
                    277:   size_t outleft = *outbytesleft;
                    278:   while (inleft > 0) {
                    279:     state_t last_istate = cd->istate;
                    280:     ucs4_t wc;
                    281:     int incount;
                    282:     int outcount;
                    283:     incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
                    284:     if (incount < 0) {
                    285:       if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
                    286:         /* Case 1: invalid input, possibly after a shift sequence */
                    287:         incount = DECODE_SHIFT_ILSEQ(incount);
                    288:         if (cd->discard_ilseq) {
                    289:           switch (cd->iindex) {
                    290:             case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
                    291:             case ei_utf32: case ei_utf32be: case ei_utf32le:
                    292:             case ei_ucs4internal: case ei_ucs4swapped:
                    293:               incount += 4; break;
                    294:             case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
                    295:             case ei_utf16: case ei_utf16be: case ei_utf16le:
                    296:             case ei_ucs2internal: case ei_ucs2swapped:
                    297:               incount += 2; break;
                    298:             default:
                    299:               incount += 1; break;
                    300:           }
                    301:           goto outcount_zero;
                    302:         }
                    303:         #ifndef LIBICONV_PLUG
                    304:         else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
                    305:           unsigned int incount2;
                    306:           struct mb_to_uc_fallback_locals locals;
                    307:           switch (cd->iindex) {
                    308:             case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
                    309:             case ei_utf32: case ei_utf32be: case ei_utf32le:
                    310:             case ei_ucs4internal: case ei_ucs4swapped:
                    311:               incount2 = 4; break;
                    312:             case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
                    313:             case ei_utf16: case ei_utf16be: case ei_utf16le:
                    314:             case ei_ucs2internal: case ei_ucs2swapped:
                    315:               incount2 = 2; break;
                    316:             default:
                    317:               incount2 = 1; break;
                    318:           }
                    319:           locals.l_cd = cd;
                    320:           locals.l_outbuf = outptr;
                    321:           locals.l_outbytesleft = outleft;
                    322:           locals.l_errno = 0;
                    323:           cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
                    324:                                           mb_to_uc_write_replacement,
                    325:                                           &locals,
                    326:                                           cd->fallbacks.data);
                    327:           if (locals.l_errno != 0) {
                    328:             inptr += incount; inleft -= incount;
                    329:             errno = locals.l_errno;
                    330:             result = -1;
                    331:             break;
                    332:           }
                    333:           incount += incount2;
                    334:           outptr = locals.l_outbuf;
                    335:           outleft = locals.l_outbytesleft;
                    336:           result += 1;
                    337:           goto outcount_zero;
                    338:         }
                    339:         #endif
                    340:         inptr += incount; inleft -= incount;
                    341:         errno = EILSEQ;
                    342:         result = -1;
                    343:         break;
                    344:       }
                    345:       if (incount == RET_TOOFEW(0)) {
                    346:         /* Case 2: not enough bytes available to detect anything */
                    347:         errno = EINVAL;
                    348:         result = -1;
                    349:         break;
                    350:       }
                    351:       /* Case 3: k bytes read, but only a shift sequence */
                    352:       incount = DECODE_TOOFEW(incount);
                    353:     } else {
                    354:       /* Case 4: k bytes read, making up a wide character */
                    355:       if (outleft == 0) {
                    356:         cd->istate = last_istate;
                    357:         errno = E2BIG;
                    358:         result = -1;
                    359:         break;
                    360:       }
                    361:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
                    362:       if (outcount != RET_ILUNI)
                    363:         goto outcount_ok;
                    364:       /* Handle Unicode tag characters (range U+E0000..U+E007F). */
                    365:       if ((wc >> 7) == (0xe0000 >> 7))
                    366:         goto outcount_zero;
                    367:       /* Try transliteration. */
                    368:       result++;
                    369:       if (cd->transliterate) {
                    370:         outcount = unicode_transliterate(cd,wc,outptr,outleft);
                    371:         if (outcount != RET_ILUNI)
                    372:           goto outcount_ok;
                    373:       }
                    374:       if (cd->discard_ilseq) {
                    375:         outcount = 0;
                    376:         goto outcount_ok;
                    377:       }
                    378:       #ifndef LIBICONV_PLUG
                    379:       else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
                    380:         struct uc_to_mb_fallback_locals locals;
                    381:         locals.l_outbuf = outptr;
                    382:         locals.l_outbytesleft = outleft;
                    383:         locals.l_errno = 0;
                    384:         cd->fallbacks.uc_to_mb_fallback(wc,
                    385:                                         uc_to_mb_write_replacement,
                    386:                                         &locals,
                    387:                                         cd->fallbacks.data);
                    388:         if (locals.l_errno != 0) {
                    389:           cd->istate = last_istate;
                    390:           errno = locals.l_errno;
                    391:           return -1;
                    392:         }
                    393:         outptr = locals.l_outbuf;
                    394:         outleft = locals.l_outbytesleft;
                    395:         outcount = 0;
                    396:         goto outcount_ok;
                    397:       }
                    398:       #endif
                    399:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
                    400:       if (outcount != RET_ILUNI)
                    401:         goto outcount_ok;
                    402:       cd->istate = last_istate;
                    403:       errno = EILSEQ;
                    404:       result = -1;
                    405:       break;
                    406:     outcount_ok:
                    407:       if (outcount < 0) {
                    408:         cd->istate = last_istate;
                    409:         errno = E2BIG;
                    410:         result = -1;
                    411:         break;
                    412:       }
                    413:       #ifndef LIBICONV_PLUG
                    414:       if (cd->hooks.uc_hook)
                    415:         (*cd->hooks.uc_hook)(wc, cd->hooks.data);
                    416:       #endif
                    417:       if (!(outcount <= outleft)) abort();
                    418:       outptr += outcount; outleft -= outcount;
                    419:     }
                    420:   outcount_zero:
                    421:     if (!(incount <= inleft)) abort();
                    422:     inptr += incount; inleft -= incount;
                    423:   }
                    424:   *inbuf = (const char*) inptr;
                    425:   *inbytesleft = inleft;
                    426:   *outbuf = (char*) outptr;
                    427:   *outbytesleft = outleft;
                    428:   return result;
                    429: }
                    430: 
                    431: static size_t unicode_loop_reset (iconv_t icd,
                    432:                                   char* * outbuf, size_t *outbytesleft)
                    433: {
                    434:   conv_t cd = (conv_t) icd;
                    435:   if (outbuf == NULL || *outbuf == NULL) {
                    436:     /* Reset the states. */
                    437:     memset(&cd->istate,'\0',sizeof(state_t));
                    438:     memset(&cd->ostate,'\0',sizeof(state_t));
                    439:     return 0;
                    440:   } else {
                    441:     size_t result = 0;
                    442:     if (cd->ifuncs.xxx_flushwc) {
                    443:       state_t last_istate = cd->istate;
                    444:       ucs4_t wc;
                    445:       if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
                    446:         unsigned char* outptr = (unsigned char*) *outbuf;
                    447:         size_t outleft = *outbytesleft;
                    448:         int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
                    449:         if (outcount != RET_ILUNI)
                    450:           goto outcount_ok;
                    451:         /* Handle Unicode tag characters (range U+E0000..U+E007F). */
                    452:         if ((wc >> 7) == (0xe0000 >> 7))
                    453:           goto outcount_zero;
                    454:         /* Try transliteration. */
                    455:         result++;
                    456:         if (cd->transliterate) {
                    457:           outcount = unicode_transliterate(cd,wc,outptr,outleft);
                    458:           if (outcount != RET_ILUNI)
                    459:             goto outcount_ok;
                    460:         }
                    461:         if (cd->discard_ilseq) {
                    462:           outcount = 0;
                    463:           goto outcount_ok;
                    464:         }
                    465:         #ifndef LIBICONV_PLUG
                    466:         else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
                    467:           struct uc_to_mb_fallback_locals locals;
                    468:           locals.l_outbuf = outptr;
                    469:           locals.l_outbytesleft = outleft;
                    470:           locals.l_errno = 0;
                    471:           cd->fallbacks.uc_to_mb_fallback(wc,
                    472:                                           uc_to_mb_write_replacement,
                    473:                                           &locals,
                    474:                                           cd->fallbacks.data);
                    475:           if (locals.l_errno != 0) {
                    476:             cd->istate = last_istate;
                    477:             errno = locals.l_errno;
                    478:             return -1;
                    479:           }
                    480:           outptr = locals.l_outbuf;
                    481:           outleft = locals.l_outbytesleft;
                    482:           outcount = 0;
                    483:           goto outcount_ok;
                    484:         }
                    485:         #endif
                    486:         outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
                    487:         if (outcount != RET_ILUNI)
                    488:           goto outcount_ok;
                    489:         cd->istate = last_istate;
                    490:         errno = EILSEQ;
                    491:         return -1;
                    492:       outcount_ok:
                    493:         if (outcount < 0) {
                    494:           cd->istate = last_istate;
                    495:           errno = E2BIG;
                    496:           return -1;
                    497:         }
                    498:         #ifndef LIBICONV_PLUG
                    499:         if (cd->hooks.uc_hook)
                    500:           (*cd->hooks.uc_hook)(wc, cd->hooks.data);
                    501:         #endif
                    502:         if (!(outcount <= outleft)) abort();
                    503:         outptr += outcount;
                    504:         outleft -= outcount;
                    505:       outcount_zero:
                    506:         *outbuf = (char*) outptr;
                    507:         *outbytesleft = outleft;
                    508:       }
                    509:     }
                    510:     if (cd->ofuncs.xxx_reset) {
                    511:       unsigned char* outptr = (unsigned char*) *outbuf;
                    512:       size_t outleft = *outbytesleft;
                    513:       int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
                    514:       if (outcount < 0) {
                    515:         errno = E2BIG;
                    516:         return -1;
                    517:       }
                    518:       if (!(outcount <= outleft)) abort();
                    519:       *outbuf = (char*) (outptr + outcount);
                    520:       *outbytesleft = outleft - outcount;
                    521:     }
                    522:     memset(&cd->istate,'\0',sizeof(state_t));
                    523:     memset(&cd->ostate,'\0',sizeof(state_t));
                    524:     return result;
                    525:   }
                    526: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>