File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / loop_wchar.h
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue May 29 09:29:43 2012 UTC (12 years, 1 month ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_14p0, v1_14, HEAD
libiconv v1.14

    1: /*
    2:  * Copyright (C) 2000-2002, 2005-2006, 2008-2009, 2011 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
   18:  * Fifth Floor, Boston, MA 02110-1301, USA.
   19:  */
   20: 
   21: /* This file defines three conversion loops:
   22:      - from wchar_t to anything else,
   23:      - from anything else to wchar_t,
   24:      - from wchar_t to wchar_t.
   25:  */
   26: 
   27: #if HAVE_WCRTOMB || HAVE_MBRTOWC
   28: /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
   29:    <wchar.h>.
   30:    BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
   31:    included before <wchar.h>.
   32:    In some builds of uClibc, <wchar.h> is nonexistent and wchar_t is defined
   33:    by <stddef.h>.  */
   34: # include <stddef.h>
   35: # include <stdio.h>
   36: # include <time.h>
   37: # include <wchar.h>
   38: # define BUF_SIZE 64  /* assume MB_LEN_MAX <= 64 */
   39:   /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
   40:   extern size_t mbrtowc ();
   41: # ifdef mbstate_t
   42: #  define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
   43: #  define mbsinit(ps) 1
   44: # endif
   45: # ifndef mbsinit
   46: #  if !HAVE_MBSINIT
   47: #   define mbsinit(ps) 1
   48: #  endif
   49: # endif
   50: #endif
   51: 
   52: /*
   53:  * The first two conversion loops have an extended conversion descriptor.
   54:  */
   55: struct wchar_conv_struct {
   56:   struct conv_struct parent;
   57: #if HAVE_WCRTOMB || HAVE_MBRTOWC
   58:   mbstate_t state;
   59: #endif
   60: };
   61: 
   62: 
   63: #if HAVE_WCRTOMB
   64: 
   65: /* From wchar_t to anything else. */
   66: 
   67: #ifndef LIBICONV_PLUG
   68: 
   69: #if 0
   70: 
   71: struct wc_to_mb_fallback_locals {
   72:   struct wchar_conv_struct * l_wcd;
   73:   char* l_outbuf;
   74:   size_t l_outbytesleft;
   75:   int l_errno;
   76: };
   77: 
   78: /* A callback that writes a string given in the locale encoding. */
   79: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
   80:                                         void* callback_arg)
   81: {
   82:   struct wc_to_mb_fallback_locals * plocals =
   83:     (struct wc_to_mb_fallback_locals *) callback_arg;
   84:   /* Do nothing if already encountered an error in a previous call. */
   85:   if (plocals->l_errno == 0) {
   86:     /* Attempt to convert the passed buffer to the target encoding.
   87:        Here we don't support characters split across multiple calls. */
   88:     const char* bufptr = buf;
   89:     size_t bufleft = buflen;
   90:     size_t res = unicode_loop_convert(&plocals->l_wcd->parent,
   91:                                       &bufptr,&bufleft,
   92:                                       &plocals->l_outbuf,&plocals->l_outbytesleft);
   93:     if (res == (size_t)(-1)) {
   94:       if (errno == EILSEQ || errno == EINVAL)
   95:         /* Invalid buf contents. */
   96:         plocals->l_errno = EILSEQ;
   97:       else if (errno == E2BIG)
   98:         /* Output buffer too small. */
   99:         plocals->l_errno = E2BIG;
  100:       else 
  101:         abort();
  102:     } else {
  103:       /* Successful conversion. */
  104:       if (bufleft > 0)
  105:         abort();
  106:     }
  107:   }
  108: }
  109: 
  110: #else
  111: 
  112: struct wc_to_mb_fallback_locals {
  113:   char* l_outbuf;
  114:   size_t l_outbytesleft;
  115:   int l_errno;
  116: };
  117: 
  118: /* A callback that writes a string given in the target encoding. */
  119: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
  120:                                         void* callback_arg)
  121: {
  122:   struct wc_to_mb_fallback_locals * plocals =
  123:     (struct wc_to_mb_fallback_locals *) callback_arg;
  124:   /* Do nothing if already encountered an error in a previous call. */
  125:   if (plocals->l_errno == 0) {
  126:     /* Attempt to copy the passed buffer to the output buffer. */
  127:     if (plocals->l_outbytesleft < buflen)
  128:       plocals->l_errno = E2BIG;
  129:     else {
  130:       memcpy(plocals->l_outbuf, buf, buflen);
  131:       plocals->l_outbuf += buflen;
  132:       plocals->l_outbytesleft -= buflen;
  133:     }
  134:   }
  135: }
  136: 
  137: #endif
  138: 
  139: #endif /* !LIBICONV_PLUG */
  140: 
  141: static size_t wchar_from_loop_convert (iconv_t icd,
  142:                                        const char* * inbuf, size_t *inbytesleft,
  143:                                        char* * outbuf, size_t *outbytesleft)
  144: {
  145:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  146:   size_t result = 0;
  147:   while (*inbytesleft >= sizeof(wchar_t)) {
  148:     const wchar_t * inptr = (const wchar_t *) *inbuf;
  149:     size_t inleft = *inbytesleft;
  150:     char buf[BUF_SIZE];
  151:     mbstate_t state = wcd->state;
  152:     size_t bufcount = 0;
  153:     while (inleft >= sizeof(wchar_t)) {
  154:       /* Convert one wchar_t to multibyte representation. */
  155:       size_t count = wcrtomb(buf+bufcount,*inptr,&state);
  156:       if (count == (size_t)(-1)) {
  157:         /* Invalid input. */
  158:         if (wcd->parent.discard_ilseq) {
  159:           count = 0;
  160:         }
  161:         #ifndef LIBICONV_PLUG
  162:         else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) {
  163:           /* Drop the contents of buf[] accumulated so far, and instead
  164:              pass all queued wide characters to the fallback handler. */
  165:           struct wc_to_mb_fallback_locals locals;
  166:           const wchar_t * fallback_inptr;
  167:           #if 0
  168:           locals.l_wcd = wcd;
  169:           #endif
  170:           locals.l_outbuf = *outbuf;
  171:           locals.l_outbytesleft = *outbytesleft;
  172:           locals.l_errno = 0;
  173:           for (fallback_inptr = (const wchar_t *) *inbuf;
  174:                fallback_inptr <= inptr;
  175:                fallback_inptr++)
  176:             wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr,
  177:                                                     wc_to_mb_write_replacement,
  178:                                                     &locals,
  179:                                                     wcd->parent.fallbacks.data);
  180:           if (locals.l_errno != 0) {
  181:             errno = locals.l_errno;
  182:             return -1;
  183:           }
  184:           wcd->state = state;
  185:           *inbuf = (const char *) (inptr + 1);
  186:           *inbytesleft = inleft - sizeof(wchar_t);
  187:           *outbuf = locals.l_outbuf;
  188:           *outbytesleft = locals.l_outbytesleft;
  189:           result += 1;
  190:           break;
  191:         }
  192:         #endif
  193:         else {
  194:           errno = EILSEQ;
  195:           return -1;
  196:         }
  197:       }
  198:       inptr++;
  199:       inleft -= sizeof(wchar_t);
  200:       bufcount += count;
  201:       if (count == 0) {
  202:         /* Continue, append next wchar_t. */
  203:       } else {
  204:         /* Attempt to convert the accumulated multibyte representations
  205:            to the target encoding. */
  206:         const char* bufptr = buf;
  207:         size_t bufleft = bufcount;
  208:         char* outptr = *outbuf;
  209:         size_t outleft = *outbytesleft;
  210:         size_t res = unicode_loop_convert(&wcd->parent,
  211:                                           &bufptr,&bufleft,
  212:                                           &outptr,&outleft);
  213:         if (res == (size_t)(-1)) {
  214:           if (errno == EILSEQ)
  215:             /* Invalid input. */
  216:             return -1;
  217:           else if (errno == E2BIG)
  218:             /* Output buffer too small. */
  219:             return -1;
  220:           else if (errno == EINVAL) {
  221:             /* Continue, append next wchar_t, but avoid buffer overrun. */
  222:             if (bufcount + MB_CUR_MAX > BUF_SIZE)
  223:               abort();
  224:           } else
  225:             abort();
  226:         } else {
  227:           /* Successful conversion. */
  228:           wcd->state = state;
  229:           *inbuf = (const char *) inptr;
  230:           *inbytesleft = inleft;
  231:           *outbuf = outptr;
  232:           *outbytesleft = outleft;
  233:           result += res;
  234:           break;
  235:         }
  236:       }
  237:     }
  238:   }
  239:   return result;
  240: }
  241: 
  242: static size_t wchar_from_loop_reset (iconv_t icd,
  243:                                      char* * outbuf, size_t *outbytesleft)
  244: {
  245:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  246:   if (outbuf == NULL || *outbuf == NULL) {
  247:     /* Reset the states. */
  248:     memset(&wcd->state,'\0',sizeof(mbstate_t));
  249:     return unicode_loop_reset(&wcd->parent,NULL,NULL);
  250:   } else {
  251:     if (!mbsinit(&wcd->state)) {
  252:       mbstate_t state = wcd->state;
  253:       char buf[BUF_SIZE];
  254:       size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
  255:       if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
  256:         abort();
  257:       else {
  258:         const char* bufptr = buf;
  259:         size_t bufleft = bufcount-1;
  260:         char* outptr = *outbuf;
  261:         size_t outleft = *outbytesleft;
  262:         size_t res = unicode_loop_convert(&wcd->parent,
  263:                                           &bufptr,&bufleft,
  264:                                           &outptr,&outleft);
  265:         if (res == (size_t)(-1)) {
  266:           if (errno == E2BIG)
  267:             return -1;
  268:           else
  269:             abort();
  270:         } else {
  271:           res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
  272:           if (res == (size_t)(-1))
  273:             return res;
  274:           else {
  275:             /* Successful. */
  276:             wcd->state = state;
  277:             *outbuf = outptr;
  278:             *outbytesleft = outleft;
  279:             return 0;
  280:           }
  281:         }
  282:       }
  283:     } else
  284:       return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
  285:   }
  286: }
  287: 
  288: #endif
  289: 
  290: 
  291: #if HAVE_MBRTOWC
  292: 
  293: /* From anything else to wchar_t. */
  294: 
  295: #ifndef LIBICONV_PLUG
  296: 
  297: struct mb_to_wc_fallback_locals {
  298:   char* l_outbuf;
  299:   size_t l_outbytesleft;
  300:   int l_errno;
  301: };
  302: 
  303: static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen,
  304:                                         void* callback_arg)
  305: {
  306:   struct mb_to_wc_fallback_locals * plocals =
  307:     (struct mb_to_wc_fallback_locals *) callback_arg;
  308:   /* Do nothing if already encountered an error in a previous call. */
  309:   if (plocals->l_errno == 0) {
  310:     /* Attempt to copy the passed buffer to the output buffer. */
  311:     if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen)
  312:       plocals->l_errno = E2BIG;
  313:     else {
  314:       for (; buflen > 0; buf++, buflen--) {
  315:         *(wchar_t*) plocals->l_outbuf = *buf;
  316:         plocals->l_outbuf += sizeof(wchar_t);
  317:         plocals->l_outbytesleft -= sizeof(wchar_t);
  318:       }
  319:     }
  320:   }
  321: }
  322: 
  323: #endif /* !LIBICONV_PLUG */
  324: 
  325: static size_t wchar_to_loop_convert (iconv_t icd,
  326:                                      const char* * inbuf, size_t *inbytesleft,
  327:                                      char* * outbuf, size_t *outbytesleft)
  328: {
  329:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  330:   size_t result = 0;
  331:   while (*inbytesleft > 0) {
  332:     size_t incount;
  333:     for (incount = 1; ; ) {
  334:       /* Here incount <= *inbytesleft. */
  335:       char buf[BUF_SIZE];
  336:       const char* inptr = *inbuf;
  337:       size_t inleft = incount;
  338:       char* bufptr = buf;
  339:       size_t bufleft = BUF_SIZE;
  340:       size_t res = unicode_loop_convert(&wcd->parent,
  341:                                         &inptr,&inleft,
  342:                                         &bufptr,&bufleft);
  343:       if (res == (size_t)(-1)) {
  344:         if (errno == EILSEQ)
  345:           /* Invalid input. */
  346:           return -1;
  347:         else if (errno == EINVAL) {
  348:           /* Incomplete input. Next try with one more input byte. */
  349:         } else
  350:           /* E2BIG shouldn't occur. */
  351:           abort();
  352:       } else {
  353:         /* Successful conversion. */
  354:         size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
  355:         mbstate_t state = wcd->state;
  356:         wchar_t wc;
  357:         res = mbrtowc(&wc,buf,bufcount,&state);
  358:         if (res == (size_t)(-2)) {
  359:           /* Next try with one more input byte. */
  360:         } else {
  361:           if (res == (size_t)(-1)) {
  362:             /* Invalid input. */
  363:             if (wcd->parent.discard_ilseq) {
  364:             }
  365:             #ifndef LIBICONV_PLUG
  366:             else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) {
  367:               /* Drop the contents of buf[] accumulated so far, and instead
  368:                  pass all queued chars to the fallback handler. */
  369:               struct mb_to_wc_fallback_locals locals;
  370:               locals.l_outbuf = *outbuf;
  371:               locals.l_outbytesleft = *outbytesleft;
  372:               locals.l_errno = 0;
  373:               wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount,
  374:                                                       mb_to_wc_write_replacement,
  375:                                                       &locals,
  376:                                                       wcd->parent.fallbacks.data);
  377:               if (locals.l_errno != 0) {
  378:                 errno = locals.l_errno;
  379:                 return -1;
  380:               }
  381:               /* Restoring the state is not needed because it is the initial
  382:                  state anyway: For all known locale encodings, the multibyte
  383:                  to wchar_t conversion doesn't have shift state, and we have
  384:                  excluded partial accumulated characters. */
  385:               /* wcd->state = state; */
  386:               *inbuf += incount;
  387:               *inbytesleft -= incount;
  388:               *outbuf = locals.l_outbuf;
  389:               *outbytesleft = locals.l_outbytesleft;
  390:               result += 1;
  391:               break;
  392:             }
  393:             #endif
  394:             else
  395:               return -1;
  396:           } else {
  397:             if (*outbytesleft < sizeof(wchar_t)) {
  398:               errno = E2BIG;
  399:               return -1;
  400:             }
  401:             *(wchar_t*) *outbuf = wc;
  402:             /* Restoring the state is not needed because it is the initial
  403:                state anyway: For all known locale encodings, the multibyte
  404:                to wchar_t conversion doesn't have shift state, and we have
  405:                excluded partial accumulated characters. */
  406:             /* wcd->state = state; */
  407:             *outbuf += sizeof(wchar_t);
  408:             *outbytesleft -= sizeof(wchar_t);
  409:           }
  410:           *inbuf += incount;
  411:           *inbytesleft -= incount;
  412:           result += res;
  413:           break;
  414:         }
  415:       }
  416:       incount++;
  417:       if (incount > *inbytesleft) {
  418:         /* Incomplete input. */
  419:         errno = EINVAL;
  420:         return -1;
  421:       }
  422:     }
  423:   }
  424:   return result;
  425: }
  426: 
  427: static size_t wchar_to_loop_reset (iconv_t icd,
  428:                                    char* * outbuf, size_t *outbytesleft)
  429: {
  430:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  431:   size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
  432:   if (res == (size_t)(-1))
  433:     return res;
  434:   memset(&wcd->state,0,sizeof(mbstate_t));
  435:   return 0;
  436: }
  437: 
  438: #endif
  439: 
  440: 
  441: /* From wchar_t to wchar_t. */
  442: 
  443: static size_t wchar_id_loop_convert (iconv_t icd,
  444:                                      const char* * inbuf, size_t *inbytesleft,
  445:                                      char* * outbuf, size_t *outbytesleft)
  446: {
  447:   struct conv_struct * cd = (struct conv_struct *) icd;
  448:   const wchar_t* inptr = (const wchar_t*) *inbuf;
  449:   size_t inleft = *inbytesleft / sizeof(wchar_t);
  450:   wchar_t* outptr = (wchar_t*) *outbuf;
  451:   size_t outleft = *outbytesleft / sizeof(wchar_t);
  452:   size_t count = (inleft <= outleft ? inleft : outleft);
  453:   if (count > 0) {
  454:     *inbytesleft -= count * sizeof(wchar_t);
  455:     *outbytesleft -= count * sizeof(wchar_t);
  456:     do {
  457:       wchar_t wc = *inptr++;
  458:       *outptr++ = wc;
  459:       #ifndef LIBICONV_PLUG
  460:       if (cd->hooks.wc_hook)
  461:         (*cd->hooks.wc_hook)(wc, cd->hooks.data);
  462:       #endif
  463:     } while (--count > 0);
  464:     *inbuf = (const char*) inptr;
  465:     *outbuf = (char*) outptr;
  466:   }
  467:   return 0;
  468: }
  469: 
  470: static size_t wchar_id_loop_reset (iconv_t icd,
  471:                                    char* * outbuf, size_t *outbytesleft)
  472: {
  473:   return 0;
  474: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>