File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / loop_unicode.h
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 22:57:48 2012 UTC (12 years, 5 months ago) by misho
CVS tags: MAIN, HEAD
Initial revision

    1: /*
    2:  * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
   18:  * Fifth Floor, Boston, MA 02110-1301, USA.
   19:  */
   20: 
   21: /* This file defines the conversion loop via Unicode as a pivot encoding. */
   22: 
   23: /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
   24: static int unicode_transliterate (conv_t cd, ucs4_t wc,
   25:                                   unsigned char* outptr, size_t outleft)
   26: {
   27:   if (cd->oflags & HAVE_HANGUL_JAMO) {
   28:     /* Decompose Hangul into Jamo. Use double-width Jamo (contained
   29:        in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
   30:        (contained in Unicode only). */
   31:     ucs4_t buf[3];
   32:     int ret = johab_hangul_decompose(cd,buf,wc);
   33:     if (ret != RET_ILUNI) {
   34:       /* we know 1 <= ret <= 3 */
   35:       state_t backup_state = cd->ostate;
   36:       unsigned char* backup_outptr = outptr;
   37:       size_t backup_outleft = outleft;
   38:       int i, sub_outcount;
   39:       for (i = 0; i < ret; i++) {
   40:         if (outleft == 0) {
   41:           sub_outcount = RET_TOOSMALL;
   42:           goto johab_hangul_failed;
   43:         }
   44:         sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
   45:         if (sub_outcount <= RET_ILUNI)
   46:           goto johab_hangul_failed;
   47:         if (!(sub_outcount <= outleft)) abort();
   48:         outptr += sub_outcount; outleft -= sub_outcount;
   49:       }
   50:       return outptr-backup_outptr;
   51:     johab_hangul_failed:
   52:       cd->ostate = backup_state;
   53:       outptr = backup_outptr;
   54:       outleft = backup_outleft;
   55:       if (sub_outcount != RET_ILUNI)
   56:         return RET_TOOSMALL;
   57:     }
   58:   }
   59:   {
   60:     /* Try to use a variant, but postfix it with
   61:        U+303E IDEOGRAPHIC VARIATION INDICATOR
   62:        (cf. Ken Lunde's "CJKV information processing", p. 188). */
   63:     int indx = -1;
   64:     if (wc == 0x3006)
   65:       indx = 0;
   66:     else if (wc == 0x30f6)
   67:       indx = 1;
   68:     else if (wc >= 0x4e00 && wc < 0xa000)
   69:       indx = cjk_variants_indx[wc-0x4e00];
   70:     if (indx >= 0) {
   71:       for (;; indx++) {
   72:         ucs4_t buf[2];
   73:         unsigned short variant = cjk_variants[indx];
   74:         unsigned short last = variant & 0x8000;
   75:         variant &= 0x7fff;
   76:         variant += 0x3000;
   77:         buf[0] = variant; buf[1] = 0x303e;
   78:         {
   79:           state_t backup_state = cd->ostate;
   80:           unsigned char* backup_outptr = outptr;
   81:           size_t backup_outleft = outleft;
   82:           int i, sub_outcount;
   83:           for (i = 0; i < 2; i++) {
   84:             if (outleft == 0) {
   85:               sub_outcount = RET_TOOSMALL;
   86:               goto variant_failed;
   87:             }
   88:             sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
   89:             if (sub_outcount <= RET_ILUNI)
   90:               goto variant_failed;
   91:             if (!(sub_outcount <= outleft)) abort();
   92:             outptr += sub_outcount; outleft -= sub_outcount;
   93:           }
   94:           return outptr-backup_outptr;
   95:         variant_failed:
   96:           cd->ostate = backup_state;
   97:           outptr = backup_outptr;
   98:           outleft = backup_outleft;
   99:           if (sub_outcount != RET_ILUNI)
  100:             return RET_TOOSMALL;
  101:         }
  102:         if (last)
  103:           break;
  104:       }
  105:     }
  106:   }
  107:   if (wc >= 0x2018 && wc <= 0x201a) {
  108:     /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
  109:     ucs4_t substitute =
  110:       (cd->oflags & HAVE_QUOTATION_MARKS
  111:        ? (wc == 0x201a ? 0x2018 : wc)
  112:        : (cd->oflags & HAVE_ACCENTS
  113:           ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
  114:           : 0x0027 /* use apostrophe */
  115:       )  );
  116:     int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
  117:     if (outcount != RET_ILUNI)
  118:       return outcount;
  119:   }
  120:   {
  121:     /* Use the transliteration table. */
  122:     int indx = translit_index(wc);
  123:     if (indx >= 0) {
  124:       const unsigned int * cp = &translit_data[indx];
  125:       unsigned int num = *cp++;
  126:       state_t backup_state = cd->ostate;
  127:       unsigned char* backup_outptr = outptr;
  128:       size_t backup_outleft = outleft;
  129:       unsigned int i;
  130:       int sub_outcount;
  131:       for (i = 0; i < num; i++) {
  132:         if (outleft == 0) {
  133:           sub_outcount = RET_TOOSMALL;
  134:           goto translit_failed;
  135:         }
  136:         sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
  137:         if (sub_outcount == RET_ILUNI)
  138:           /* Recursive transliteration. */
  139:           sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
  140:         if (sub_outcount <= RET_ILUNI)
  141:           goto translit_failed;
  142:         if (!(sub_outcount <= outleft)) abort();
  143:         outptr += sub_outcount; outleft -= sub_outcount;
  144:       }
  145:       return outptr-backup_outptr;
  146:     translit_failed:
  147:       cd->ostate = backup_state;
  148:       outptr = backup_outptr;
  149:       outleft = backup_outleft;
  150:       if (sub_outcount != RET_ILUNI)
  151:         return RET_TOOSMALL;
  152:     }
  153:   }
  154:   return RET_ILUNI;
  155: }
  156: 
  157: #ifndef LIBICONV_PLUG
  158: 
  159: struct uc_to_mb_fallback_locals {
  160:   unsigned char* l_outbuf;
  161:   size_t l_outbytesleft;
  162:   int l_errno;
  163: };
  164: 
  165: static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
  166:                                         void* callback_arg)
  167: {
  168:   struct uc_to_mb_fallback_locals * plocals =
  169:     (struct uc_to_mb_fallback_locals *) callback_arg;
  170:   /* Do nothing if already encountered an error in a previous call. */
  171:   if (plocals->l_errno == 0) {
  172:     /* Attempt to copy the passed buffer to the output buffer. */
  173:     if (plocals->l_outbytesleft < buflen)
  174:       plocals->l_errno = E2BIG;
  175:     else {
  176:       memcpy(plocals->l_outbuf, buf, buflen);
  177:       plocals->l_outbuf += buflen;
  178:       plocals->l_outbytesleft -= buflen;
  179:     }
  180:   }
  181: }
  182: 
  183: struct mb_to_uc_fallback_locals {
  184:   conv_t l_cd;
  185:   unsigned char* l_outbuf;
  186:   size_t l_outbytesleft;
  187:   int l_errno;
  188: };
  189: 
  190: static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
  191:                                         void* callback_arg)
  192: {
  193:   struct mb_to_uc_fallback_locals * plocals =
  194:     (struct mb_to_uc_fallback_locals *) callback_arg;
  195:   /* Do nothing if already encountered an error in a previous call. */
  196:   if (plocals->l_errno == 0) {
  197:     /* Attempt to convert the passed buffer to the target encoding. */
  198:     conv_t cd = plocals->l_cd;
  199:     unsigned char* outptr = plocals->l_outbuf;
  200:     size_t outleft = plocals->l_outbytesleft;
  201:     for (; buflen > 0; buf++, buflen--) {
  202:       ucs4_t wc = *buf;
  203:       int outcount;
  204:       if (outleft == 0) {
  205:         plocals->l_errno = E2BIG;
  206:         break;
  207:       }
  208:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
  209:       if (outcount != RET_ILUNI)
  210:         goto outcount_ok;
  211:       /* Handle Unicode tag characters (range U+E0000..U+E007F). */
  212:       if ((wc >> 7) == (0xe0000 >> 7))
  213:         goto outcount_zero;
  214:       /* Try transliteration. */
  215:       if (cd->transliterate) {
  216:         outcount = unicode_transliterate(cd,wc,outptr,outleft);
  217:         if (outcount != RET_ILUNI)
  218:           goto outcount_ok;
  219:       }
  220:       if (cd->discard_ilseq) {
  221:         outcount = 0;
  222:         goto outcount_ok;
  223:       }
  224:       #ifndef LIBICONV_PLUG
  225:       else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
  226:         struct uc_to_mb_fallback_locals locals;
  227:         locals.l_outbuf = outptr;
  228:         locals.l_outbytesleft = outleft;
  229:         locals.l_errno = 0;
  230:         cd->fallbacks.uc_to_mb_fallback(wc,
  231:                                         uc_to_mb_write_replacement,
  232:                                         &locals,
  233:                                         cd->fallbacks.data);
  234:         if (locals.l_errno != 0) {
  235:           plocals->l_errno = locals.l_errno;
  236:           break;
  237:         }
  238:         outptr = locals.l_outbuf;
  239:         outleft = locals.l_outbytesleft;
  240:         outcount = 0;
  241:         goto outcount_ok;
  242:       }
  243:       #endif
  244:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
  245:       if (outcount != RET_ILUNI)
  246:         goto outcount_ok;
  247:       plocals->l_errno = EILSEQ;
  248:       break;
  249:     outcount_ok:
  250:       if (outcount < 0) {
  251:         plocals->l_errno = E2BIG;
  252:         break;
  253:       }
  254:       #ifndef LIBICONV_PLUG
  255:       if (cd->hooks.uc_hook)
  256:         (*cd->hooks.uc_hook)(wc, cd->hooks.data);
  257:       #endif
  258:       if (!(outcount <= outleft)) abort();
  259:       outptr += outcount; outleft -= outcount;
  260:     outcount_zero: ;
  261:     }
  262:     plocals->l_outbuf = outptr;
  263:     plocals->l_outbytesleft = outleft;
  264:   }
  265: }
  266: 
  267: #endif /* !LIBICONV_PLUG */
  268: 
  269: static size_t unicode_loop_convert (iconv_t icd,
  270:                                     const char* * inbuf, size_t *inbytesleft,
  271:                                     char* * outbuf, size_t *outbytesleft)
  272: {
  273:   conv_t cd = (conv_t) icd;
  274:   size_t result = 0;
  275:   const unsigned char* inptr = (const unsigned char*) *inbuf;
  276:   size_t inleft = *inbytesleft;
  277:   unsigned char* outptr = (unsigned char*) *outbuf;
  278:   size_t outleft = *outbytesleft;
  279:   while (inleft > 0) {
  280:     state_t last_istate = cd->istate;
  281:     ucs4_t wc;
  282:     int incount;
  283:     int outcount;
  284:     incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
  285:     if (incount < 0) {
  286:       if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
  287:         /* Case 1: invalid input, possibly after a shift sequence */
  288:         incount = DECODE_SHIFT_ILSEQ(incount);
  289:         if (cd->discard_ilseq) {
  290:           switch (cd->iindex) {
  291:             case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
  292:             case ei_utf32: case ei_utf32be: case ei_utf32le:
  293:             case ei_ucs4internal: case ei_ucs4swapped:
  294:               incount += 4; break;
  295:             case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
  296:             case ei_utf16: case ei_utf16be: case ei_utf16le:
  297:             case ei_ucs2internal: case ei_ucs2swapped:
  298:               incount += 2; break;
  299:             default:
  300:               incount += 1; break;
  301:           }
  302:           goto outcount_zero;
  303:         }
  304:         #ifndef LIBICONV_PLUG
  305:         else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
  306:           unsigned int incount2;
  307:           struct mb_to_uc_fallback_locals locals;
  308:           switch (cd->iindex) {
  309:             case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
  310:             case ei_utf32: case ei_utf32be: case ei_utf32le:
  311:             case ei_ucs4internal: case ei_ucs4swapped:
  312:               incount2 = 4; break;
  313:             case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
  314:             case ei_utf16: case ei_utf16be: case ei_utf16le:
  315:             case ei_ucs2internal: case ei_ucs2swapped:
  316:               incount2 = 2; break;
  317:             default:
  318:               incount2 = 1; break;
  319:           }
  320:           locals.l_cd = cd;
  321:           locals.l_outbuf = outptr;
  322:           locals.l_outbytesleft = outleft;
  323:           locals.l_errno = 0;
  324:           cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
  325:                                           mb_to_uc_write_replacement,
  326:                                           &locals,
  327:                                           cd->fallbacks.data);
  328:           if (locals.l_errno != 0) {
  329:             inptr += incount; inleft -= incount;
  330:             errno = locals.l_errno;
  331:             result = -1;
  332:             break;
  333:           }
  334:           incount += incount2;
  335:           outptr = locals.l_outbuf;
  336:           outleft = locals.l_outbytesleft;
  337:           result += 1;
  338:           goto outcount_zero;
  339:         }
  340:         #endif
  341:         inptr += incount; inleft -= incount;
  342:         errno = EILSEQ;
  343:         result = -1;
  344:         break;
  345:       }
  346:       if (incount == RET_TOOFEW(0)) {
  347:         /* Case 2: not enough bytes available to detect anything */
  348:         errno = EINVAL;
  349:         result = -1;
  350:         break;
  351:       }
  352:       /* Case 3: k bytes read, but only a shift sequence */
  353:       incount = DECODE_TOOFEW(incount);
  354:     } else {
  355:       /* Case 4: k bytes read, making up a wide character */
  356:       if (outleft == 0) {
  357:         cd->istate = last_istate;
  358:         errno = E2BIG;
  359:         result = -1;
  360:         break;
  361:       }
  362:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
  363:       if (outcount != RET_ILUNI)
  364:         goto outcount_ok;
  365:       /* Handle Unicode tag characters (range U+E0000..U+E007F). */
  366:       if ((wc >> 7) == (0xe0000 >> 7))
  367:         goto outcount_zero;
  368:       /* Try transliteration. */
  369:       result++;
  370:       if (cd->transliterate) {
  371:         outcount = unicode_transliterate(cd,wc,outptr,outleft);
  372:         if (outcount != RET_ILUNI)
  373:           goto outcount_ok;
  374:       }
  375:       if (cd->discard_ilseq) {
  376:         outcount = 0;
  377:         goto outcount_ok;
  378:       }
  379:       #ifndef LIBICONV_PLUG
  380:       else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
  381:         struct uc_to_mb_fallback_locals locals;
  382:         locals.l_outbuf = outptr;
  383:         locals.l_outbytesleft = outleft;
  384:         locals.l_errno = 0;
  385:         cd->fallbacks.uc_to_mb_fallback(wc,
  386:                                         uc_to_mb_write_replacement,
  387:                                         &locals,
  388:                                         cd->fallbacks.data);
  389:         if (locals.l_errno != 0) {
  390:           cd->istate = last_istate;
  391:           errno = locals.l_errno;
  392:           return -1;
  393:         }
  394:         outptr = locals.l_outbuf;
  395:         outleft = locals.l_outbytesleft;
  396:         outcount = 0;
  397:         goto outcount_ok;
  398:       }
  399:       #endif
  400:       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
  401:       if (outcount != RET_ILUNI)
  402:         goto outcount_ok;
  403:       cd->istate = last_istate;
  404:       errno = EILSEQ;
  405:       result = -1;
  406:       break;
  407:     outcount_ok:
  408:       if (outcount < 0) {
  409:         cd->istate = last_istate;
  410:         errno = E2BIG;
  411:         result = -1;
  412:         break;
  413:       }
  414:       #ifndef LIBICONV_PLUG
  415:       if (cd->hooks.uc_hook)
  416:         (*cd->hooks.uc_hook)(wc, cd->hooks.data);
  417:       #endif
  418:       if (!(outcount <= outleft)) abort();
  419:       outptr += outcount; outleft -= outcount;
  420:     }
  421:   outcount_zero:
  422:     if (!(incount <= inleft)) abort();
  423:     inptr += incount; inleft -= incount;
  424:   }
  425:   *inbuf = (const char*) inptr;
  426:   *inbytesleft = inleft;
  427:   *outbuf = (char*) outptr;
  428:   *outbytesleft = outleft;
  429:   return result;
  430: }
  431: 
  432: static size_t unicode_loop_reset (iconv_t icd,
  433:                                   char* * outbuf, size_t *outbytesleft)
  434: {
  435:   conv_t cd = (conv_t) icd;
  436:   if (outbuf == NULL || *outbuf == NULL) {
  437:     /* Reset the states. */
  438:     memset(&cd->istate,'\0',sizeof(state_t));
  439:     memset(&cd->ostate,'\0',sizeof(state_t));
  440:     return 0;
  441:   } else {
  442:     size_t result = 0;
  443:     if (cd->ifuncs.xxx_flushwc) {
  444:       state_t last_istate = cd->istate;
  445:       ucs4_t wc;
  446:       if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
  447:         unsigned char* outptr = (unsigned char*) *outbuf;
  448:         size_t outleft = *outbytesleft;
  449:         int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
  450:         if (outcount != RET_ILUNI)
  451:           goto outcount_ok;
  452:         /* Handle Unicode tag characters (range U+E0000..U+E007F). */
  453:         if ((wc >> 7) == (0xe0000 >> 7))
  454:           goto outcount_zero;
  455:         /* Try transliteration. */
  456:         result++;
  457:         if (cd->transliterate) {
  458:           outcount = unicode_transliterate(cd,wc,outptr,outleft);
  459:           if (outcount != RET_ILUNI)
  460:             goto outcount_ok;
  461:         }
  462:         if (cd->discard_ilseq) {
  463:           outcount = 0;
  464:           goto outcount_ok;
  465:         }
  466:         #ifndef LIBICONV_PLUG
  467:         else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
  468:           struct uc_to_mb_fallback_locals locals;
  469:           locals.l_outbuf = outptr;
  470:           locals.l_outbytesleft = outleft;
  471:           locals.l_errno = 0;
  472:           cd->fallbacks.uc_to_mb_fallback(wc,
  473:                                           uc_to_mb_write_replacement,
  474:                                           &locals,
  475:                                           cd->fallbacks.data);
  476:           if (locals.l_errno != 0) {
  477:             cd->istate = last_istate;
  478:             errno = locals.l_errno;
  479:             return -1;
  480:           }
  481:           outptr = locals.l_outbuf;
  482:           outleft = locals.l_outbytesleft;
  483:           outcount = 0;
  484:           goto outcount_ok;
  485:         }
  486:         #endif
  487:         outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
  488:         if (outcount != RET_ILUNI)
  489:           goto outcount_ok;
  490:         cd->istate = last_istate;
  491:         errno = EILSEQ;
  492:         return -1;
  493:       outcount_ok:
  494:         if (outcount < 0) {
  495:           cd->istate = last_istate;
  496:           errno = E2BIG;
  497:           return -1;
  498:         }
  499:         #ifndef LIBICONV_PLUG
  500:         if (cd->hooks.uc_hook)
  501:           (*cd->hooks.uc_hook)(wc, cd->hooks.data);
  502:         #endif
  503:         if (!(outcount <= outleft)) abort();
  504:         outptr += outcount;
  505:         outleft -= outcount;
  506:       outcount_zero:
  507:         *outbuf = (char*) outptr;
  508:         *outbytesleft = outleft;
  509:       }
  510:     }
  511:     if (cd->ofuncs.xxx_reset) {
  512:       unsigned char* outptr = (unsigned char*) *outbuf;
  513:       size_t outleft = *outbytesleft;
  514:       int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
  515:       if (outcount < 0) {
  516:         errno = E2BIG;
  517:         return -1;
  518:       }
  519:       if (!(outcount <= outleft)) abort();
  520:       *outbuf = (char*) (outptr + outcount);
  521:       *outbytesleft = outleft - outcount;
  522:     }
  523:     memset(&cd->istate,'\0',sizeof(state_t));
  524:     memset(&cd->ostate,'\0',sizeof(state_t));
  525:     return result;
  526:   }
  527: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>