File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / loop_unicode.h
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (3 years, 3 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD
libiconv 1.16

    1: /*
    2:  * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, see <https://www.gnu.org/licenses/>.
   18:  */
   19: 
   20: /* This file defines the conversion loop via Unicode as a pivot encoding. */
   21: 
   22: /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
   23: static int unicode_transliterate (conv_t cd, ucs4_t wc,
   24:                                   unsigned char* outptr, size_t outleft)
   25: {
   26:   if (cd->oflags & HAVE_HANGUL_JAMO) {
   27:     /* Decompose Hangul into Jamo. Use double-width Jamo (contained
   28:        in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
   29:        (contained in Unicode only). */
   30:     ucs4_t buf[3];
   31:     int ret = johab_hangul_decompose(cd,buf,wc);
   32:     if (ret != RET_ILUNI) {
   33:       /* we know 1 <= ret <= 3 */
   34:       state_t backup_state = cd->ostate;
   35:       unsigned char* backup_outptr = outptr;
   36:       size_t backup_outleft = outleft;
   37:       int i, sub_outcount;
   38:       for (i = 0; i < ret; i++) {
   39:         if (outleft == 0) {
   40:           sub_outcount = RET_TOOSMALL;
   41:           goto johab_hangul_failed;
   42:         }
   43:         sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
   44:         if (sub_outcount <= RET_ILUNI)
   45:           goto johab_hangul_failed;
   46:         if (!(sub_outcount <= outleft)) abort();
   47:         outptr += sub_outcount; outleft -= sub_outcount;
   48:       }
   49:       return outptr-backup_outptr;
   50:     johab_hangul_failed:
   51:       cd->ostate = backup_state;
   52:       outptr = backup_outptr;
   53:       outleft = backup_outleft;
   54:       if (sub_outcount != RET_ILUNI)
   55:         return RET_TOOSMALL;
   56:     }
   57:   }
   58:   {
   59:     /* Try to use a variant, but postfix it with
   60:        U+303E IDEOGRAPHIC VARIATION INDICATOR
   61:        (cf. Ken Lunde's "CJKV information processing", p. 188). */
   62:     int indx = -1;
   63:     if (wc == 0x3006)
   64:       indx = 0;
   65:     else if (wc == 0x30f6)
   66:       indx = 1;
   67:     else if (wc >= 0x4e00 && wc < 0xa000)
   68:       indx = cjk_variants_indx[wc-0x4e00];
   69:     if (indx >= 0) {
   70:       for (;; indx++) {
   71:         ucs4_t buf[2];
   72:         unsigned short variant = cjk_variants[indx];
   73:         unsigned short last = variant & 0x8000;
   74:         variant &= 0x7fff;
   75:         variant += 0x3000;
   76:         buf[0] = variant; buf[1] = 0x303e;
   77:         {
   78:           state_t backup_state = cd->ostate;
   79:           unsigned char* backup_outptr = outptr;
   80:           size_t backup_outleft = outleft;
   81:           int i, sub_outcount;
   82:           for (i = 0; i < 2; i++) {
   83:             if (outleft == 0) {
   84:               sub_outcount = RET_TOOSMALL;
   85:               goto variant_failed;
   86:             }
   87:             sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
   88:             if (sub_outcount <= RET_ILUNI)
   89:               goto variant_failed;
   90:             if (!(sub_outcount <= outleft)) abort();
   91:             outptr += sub_outcount; outleft -= sub_outcount;
   92:           }
   93:           return outptr-backup_outptr;
   94:         variant_failed:
   95:           cd->ostate = backup_state;
   96:           outptr = backup_outptr;
   97:           outleft = backup_outleft;
   98:           if (sub_outcount != RET_ILUNI)
   99:             return RET_TOOSMALL;
  100:         }
  101:         if (last)
  102:           break;
  103:       }
  104:     }
  105:   }
  106:   if (wc >= 0x2018 && wc <= 0x201a) {
  107:     /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
  108:     ucs4_t substitute =
  109:       (cd->oflags & HAVE_QUOTATION_MARKS
  110:        ? (wc == 0x201a ? 0x2018 : wc)
  111:        : (cd->oflags & HAVE_ACCENTS
  112:           ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
  113:           : 0x0027 /* use apostrophe */
  114:       )  );
  115:     int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
  116:     if (outcount != RET_ILUNI)
  117:       return outcount;
  118:   }
  119:   {
  120:     /* Use the transliteration table. */
  121:     int indx = translit_index(wc);
  122:     if (indx >= 0) {
  123:       const unsigned int * cp = &translit_data[indx];
  124:       unsigned int num = *cp++;
  125:       state_t backup_state = cd->ostate;
  126:       unsigned char* backup_outptr = outptr;
  127:       size_t backup_outleft = outleft;
  128:       unsigned int i;
  129:       int sub_outcount;
  130:       for (i = 0; i < num; i++) {
  131:         if (outleft == 0) {
  132:           sub_outcount = RET_TOOSMALL;
  133:           goto translit_failed;
  134:         }
  135:         sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
  136:         if (sub_outcount == RET_ILUNI)
  137:           /* Recursive transliteration. */
  138:           sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
  139:         if (sub_outcount <= RET_ILUNI)
  140:           goto translit_failed;
  141:         if (!(sub_outcount <= outleft)) abort();
  142:         outptr += sub_outcount; outleft -= sub_outcount;
  143:       }
  144:       return outptr-backup_outptr;
  145:     translit_failed:
  146:       cd->ostate = backup_state;
  147:       outptr = backup_outptr;
  148:       outleft = backup_outleft;
  149:       if (sub_outcount != RET_ILUNI)
  150:         return RET_TOOSMALL;
  151:     }
  152:   }
  153:   return RET_ILUNI;
  154: }
  155: 
  156: #ifndef LIBICONV_PLUG
  157: 
  158: struct uc_to_mb_fallback_locals {
  159:   unsigned char* l_outbuf;
  160:   size_t l_outbytesleft;
  161:   int l_errno;
  162: };
  163: 
  164: static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
  165:                                         void* callback_arg)
  166: {
  167:   struct uc_to_mb_fallback_locals * plocals =
  168:     (struct uc_to_mb_fallback_locals *) callback_arg;
  169:   /* Do nothing if already encountered an error in a previous call. */
  170:   if (plocals->l_errno == 0) {
  171:     /* Attempt to copy the passed buffer to the output buffer. */
  172:     if (plocals->l_outbytesleft < buflen)
  173:       plocals->l_errno = E2BIG;
  174:     else {
  175:       memcpy(plocals->l_outbuf, buf, buflen);
  176:       plocals->l_outbuf += buflen;
  177:       plocals->l_outbytesleft -= buflen;
  178:     }
  179:   }
  180: }
  181: 
  182: struct mb_to_uc_fallback_locals {
  183:   conv_t l_cd;
  184:   unsigned char* l_outbuf;
  185:   size_t l_outbytesleft;
  186:   int l_errno;
  187: };
  188: 
  189: static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
  190:                                         void* callback_arg)
  191: {
  192:   struct mb_to_uc_fallback_locals * plocals =
  193:     (struct mb_to_uc_fallback_locals *) callback_arg;
  194:   /* Do nothing if already encountered an error in a previous call. */
  195:   if (plocals->l_errno == 0) {
  196:     /* Attempt to convert the passed buffer to the target encoding. */
  197:     conv_t cd = plocals->l_cd;
  198:     unsigned char* outptr = plocals->l_outbuf;
  199:     size_t outleft = plocals->l_outbytesleft;
  200:     for (; buflen > 0; buf++, buflen--) {
  201:       ucs4_t wc = *buf;
  202:       int outcount;
  203:       if (outleft == 0) {
  204:         plocals->l_errno = E2BIG;
  205:         break;
  206:       }
  207:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
  208:       if (outcount != RET_ILUNI)
  209:         goto outcount_ok;
  210:       /* Handle Unicode tag characters (range U+E0000..U+E007F). */
  211:       if ((wc >> 7) == (0xe0000 >> 7))
  212:         goto outcount_zero;
  213:       /* Try transliteration. */
  214:       if (cd->transliterate) {
  215:         outcount = unicode_transliterate(cd,wc,outptr,outleft);
  216:         if (outcount != RET_ILUNI)
  217:           goto outcount_ok;
  218:       }
  219:       if (cd->discard_ilseq) {
  220:         outcount = 0;
  221:         goto outcount_ok;
  222:       }
  223:       #ifndef LIBICONV_PLUG
  224:       else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
  225:         struct uc_to_mb_fallback_locals locals;
  226:         locals.l_outbuf = outptr;
  227:         locals.l_outbytesleft = outleft;
  228:         locals.l_errno = 0;
  229:         cd->fallbacks.uc_to_mb_fallback(wc,
  230:                                         uc_to_mb_write_replacement,
  231:                                         &locals,
  232:                                         cd->fallbacks.data);
  233:         if (locals.l_errno != 0) {
  234:           plocals->l_errno = locals.l_errno;
  235:           break;
  236:         }
  237:         outptr = locals.l_outbuf;
  238:         outleft = locals.l_outbytesleft;
  239:         outcount = 0;
  240:         goto outcount_ok;
  241:       }
  242:       #endif
  243:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
  244:       if (outcount != RET_ILUNI)
  245:         goto outcount_ok;
  246:       plocals->l_errno = EILSEQ;
  247:       break;
  248:     outcount_ok:
  249:       if (outcount < 0) {
  250:         plocals->l_errno = E2BIG;
  251:         break;
  252:       }
  253:       #ifndef LIBICONV_PLUG
  254:       if (cd->hooks.uc_hook)
  255:         (*cd->hooks.uc_hook)(wc, cd->hooks.data);
  256:       #endif
  257:       if (!(outcount <= outleft)) abort();
  258:       outptr += outcount; outleft -= outcount;
  259:     outcount_zero: ;
  260:     }
  261:     plocals->l_outbuf = outptr;
  262:     plocals->l_outbytesleft = outleft;
  263:   }
  264: }
  265: 
  266: #endif /* !LIBICONV_PLUG */
  267: 
  268: static size_t unicode_loop_convert (iconv_t icd,
  269:                                     const char* * inbuf, size_t *inbytesleft,
  270:                                     char* * outbuf, size_t *outbytesleft)
  271: {
  272:   conv_t cd = (conv_t) icd;
  273:   size_t result = 0;
  274:   const unsigned char* inptr = (const unsigned char*) *inbuf;
  275:   size_t inleft = *inbytesleft;
  276:   unsigned char* outptr = (unsigned char*) *outbuf;
  277:   size_t outleft = *outbytesleft;
  278:   while (inleft > 0) {
  279:     state_t last_istate = cd->istate;
  280:     ucs4_t wc;
  281:     int incount;
  282:     int outcount;
  283:     incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
  284:     if (incount < 0) {
  285:       if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
  286:         /* Case 1: invalid input, possibly after a shift sequence */
  287:         incount = DECODE_SHIFT_ILSEQ(incount);
  288:         if (cd->discard_ilseq) {
  289:           switch (cd->iindex) {
  290:             case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
  291:             case ei_utf32: case ei_utf32be: case ei_utf32le:
  292:             case ei_ucs4internal: case ei_ucs4swapped:
  293:               incount += 4; break;
  294:             case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
  295:             case ei_utf16: case ei_utf16be: case ei_utf16le:
  296:             case ei_ucs2internal: case ei_ucs2swapped:
  297:               incount += 2; break;
  298:             default:
  299:               incount += 1; break;
  300:           }
  301:           goto outcount_zero;
  302:         }
  303:         #ifndef LIBICONV_PLUG
  304:         else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
  305:           unsigned int incount2;
  306:           struct mb_to_uc_fallback_locals locals;
  307:           switch (cd->iindex) {
  308:             case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
  309:             case ei_utf32: case ei_utf32be: case ei_utf32le:
  310:             case ei_ucs4internal: case ei_ucs4swapped:
  311:               incount2 = 4; break;
  312:             case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
  313:             case ei_utf16: case ei_utf16be: case ei_utf16le:
  314:             case ei_ucs2internal: case ei_ucs2swapped:
  315:               incount2 = 2; break;
  316:             default:
  317:               incount2 = 1; break;
  318:           }
  319:           locals.l_cd = cd;
  320:           locals.l_outbuf = outptr;
  321:           locals.l_outbytesleft = outleft;
  322:           locals.l_errno = 0;
  323:           cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
  324:                                           mb_to_uc_write_replacement,
  325:                                           &locals,
  326:                                           cd->fallbacks.data);
  327:           if (locals.l_errno != 0) {
  328:             inptr += incount; inleft -= incount;
  329:             errno = locals.l_errno;
  330:             result = -1;
  331:             break;
  332:           }
  333:           incount += incount2;
  334:           outptr = locals.l_outbuf;
  335:           outleft = locals.l_outbytesleft;
  336:           result += 1;
  337:           goto outcount_zero;
  338:         }
  339:         #endif
  340:         inptr += incount; inleft -= incount;
  341:         errno = EILSEQ;
  342:         result = -1;
  343:         break;
  344:       }
  345:       if (incount == RET_TOOFEW(0)) {
  346:         /* Case 2: not enough bytes available to detect anything */
  347:         errno = EINVAL;
  348:         result = -1;
  349:         break;
  350:       }
  351:       /* Case 3: k bytes read, but only a shift sequence */
  352:       incount = DECODE_TOOFEW(incount);
  353:     } else {
  354:       /* Case 4: k bytes read, making up a wide character */
  355:       if (outleft == 0) {
  356:         cd->istate = last_istate;
  357:         errno = E2BIG;
  358:         result = -1;
  359:         break;
  360:       }
  361:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
  362:       if (outcount != RET_ILUNI)
  363:         goto outcount_ok;
  364:       /* Handle Unicode tag characters (range U+E0000..U+E007F). */
  365:       if ((wc >> 7) == (0xe0000 >> 7))
  366:         goto outcount_zero;
  367:       /* Try transliteration. */
  368:       result++;
  369:       if (cd->transliterate) {
  370:         outcount = unicode_transliterate(cd,wc,outptr,outleft);
  371:         if (outcount != RET_ILUNI)
  372:           goto outcount_ok;
  373:       }
  374:       if (cd->discard_ilseq) {
  375:         outcount = 0;
  376:         goto outcount_ok;
  377:       }
  378:       #ifndef LIBICONV_PLUG
  379:       else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
  380:         struct uc_to_mb_fallback_locals locals;
  381:         locals.l_outbuf = outptr;
  382:         locals.l_outbytesleft = outleft;
  383:         locals.l_errno = 0;
  384:         cd->fallbacks.uc_to_mb_fallback(wc,
  385:                                         uc_to_mb_write_replacement,
  386:                                         &locals,
  387:                                         cd->fallbacks.data);
  388:         if (locals.l_errno != 0) {
  389:           cd->istate = last_istate;
  390:           errno = locals.l_errno;
  391:           return -1;
  392:         }
  393:         outptr = locals.l_outbuf;
  394:         outleft = locals.l_outbytesleft;
  395:         outcount = 0;
  396:         goto outcount_ok;
  397:       }
  398:       #endif
  399:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
  400:       if (outcount != RET_ILUNI)
  401:         goto outcount_ok;
  402:       cd->istate = last_istate;
  403:       errno = EILSEQ;
  404:       result = -1;
  405:       break;
  406:     outcount_ok:
  407:       if (outcount < 0) {
  408:         cd->istate = last_istate;
  409:         errno = E2BIG;
  410:         result = -1;
  411:         break;
  412:       }
  413:       #ifndef LIBICONV_PLUG
  414:       if (cd->hooks.uc_hook)
  415:         (*cd->hooks.uc_hook)(wc, cd->hooks.data);
  416:       #endif
  417:       if (!(outcount <= outleft)) abort();
  418:       outptr += outcount; outleft -= outcount;
  419:     }
  420:   outcount_zero:
  421:     if (!(incount <= inleft)) abort();
  422:     inptr += incount; inleft -= incount;
  423:   }
  424:   *inbuf = (const char*) inptr;
  425:   *inbytesleft = inleft;
  426:   *outbuf = (char*) outptr;
  427:   *outbytesleft = outleft;
  428:   return result;
  429: }
  430: 
  431: static size_t unicode_loop_reset (iconv_t icd,
  432:                                   char* * outbuf, size_t *outbytesleft)
  433: {
  434:   conv_t cd = (conv_t) icd;
  435:   if (outbuf == NULL || *outbuf == NULL) {
  436:     /* Reset the states. */
  437:     memset(&cd->istate,'\0',sizeof(state_t));
  438:     memset(&cd->ostate,'\0',sizeof(state_t));
  439:     return 0;
  440:   } else {
  441:     size_t result = 0;
  442:     if (cd->ifuncs.xxx_flushwc) {
  443:       state_t last_istate = cd->istate;
  444:       ucs4_t wc;
  445:       if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
  446:         unsigned char* outptr = (unsigned char*) *outbuf;
  447:         size_t outleft = *outbytesleft;
  448:         int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
  449:         if (outcount != RET_ILUNI)
  450:           goto outcount_ok;
  451:         /* Handle Unicode tag characters (range U+E0000..U+E007F). */
  452:         if ((wc >> 7) == (0xe0000 >> 7))
  453:           goto outcount_zero;
  454:         /* Try transliteration. */
  455:         result++;
  456:         if (cd->transliterate) {
  457:           outcount = unicode_transliterate(cd,wc,outptr,outleft);
  458:           if (outcount != RET_ILUNI)
  459:             goto outcount_ok;
  460:         }
  461:         if (cd->discard_ilseq) {
  462:           outcount = 0;
  463:           goto outcount_ok;
  464:         }
  465:         #ifndef LIBICONV_PLUG
  466:         else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
  467:           struct uc_to_mb_fallback_locals locals;
  468:           locals.l_outbuf = outptr;
  469:           locals.l_outbytesleft = outleft;
  470:           locals.l_errno = 0;
  471:           cd->fallbacks.uc_to_mb_fallback(wc,
  472:                                           uc_to_mb_write_replacement,
  473:                                           &locals,
  474:                                           cd->fallbacks.data);
  475:           if (locals.l_errno != 0) {
  476:             cd->istate = last_istate;
  477:             errno = locals.l_errno;
  478:             return -1;
  479:           }
  480:           outptr = locals.l_outbuf;
  481:           outleft = locals.l_outbytesleft;
  482:           outcount = 0;
  483:           goto outcount_ok;
  484:         }
  485:         #endif
  486:         outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
  487:         if (outcount != RET_ILUNI)
  488:           goto outcount_ok;
  489:         cd->istate = last_istate;
  490:         errno = EILSEQ;
  491:         return -1;
  492:       outcount_ok:
  493:         if (outcount < 0) {
  494:           cd->istate = last_istate;
  495:           errno = E2BIG;
  496:           return -1;
  497:         }
  498:         #ifndef LIBICONV_PLUG
  499:         if (cd->hooks.uc_hook)
  500:           (*cd->hooks.uc_hook)(wc, cd->hooks.data);
  501:         #endif
  502:         if (!(outcount <= outleft)) abort();
  503:         outptr += outcount;
  504:         outleft -= outcount;
  505:       outcount_zero:
  506:         *outbuf = (char*) outptr;
  507:         *outbytesleft = outleft;
  508:       }
  509:     }
  510:     if (cd->ofuncs.xxx_reset) {
  511:       unsigned char* outptr = (unsigned char*) *outbuf;
  512:       size_t outleft = *outbytesleft;
  513:       int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
  514:       if (outcount < 0) {
  515:         errno = E2BIG;
  516:         return -1;
  517:       }
  518:       if (!(outcount <= outleft)) abort();
  519:       *outbuf = (char*) (outptr + outcount);
  520:       *outbytesleft = outleft - outcount;
  521:     }
  522:     memset(&cd->istate,'\0',sizeof(state_t));
  523:     memset(&cd->ostate,'\0',sizeof(state_t));
  524:     return result;
  525:   }
  526: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>