File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / loop_wchar.h
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 22:57:48 2012 UTC (12 years, 4 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_13_1, HEAD
libiconv

    1: /*
    2:  * Copyright (C) 2000-2002, 2005-2006, 2008 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
   18:  * Fifth Floor, Boston, MA 02110-1301, USA.
   19:  */
   20: 
   21: /* This file defines three conversion loops:
   22:      - from wchar_t to anything else,
   23:      - from anything else to wchar_t,
   24:      - from wchar_t to wchar_t.
   25:  */
   26: 
   27: #if HAVE_WCRTOMB || HAVE_MBRTOWC
   28: # include <wchar.h>
   29: # define BUF_SIZE 64  /* assume MB_LEN_MAX <= 64 */
   30:   /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
   31:   extern size_t mbrtowc ();
   32: # ifdef mbstate_t
   33: #  define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
   34: #  define mbsinit(ps) 1
   35: # endif
   36: # ifndef mbsinit
   37: #  if !HAVE_MBSINIT
   38: #   define mbsinit(ps) 1
   39: #  endif
   40: # endif
   41: #endif
   42: 
   43: /*
   44:  * The first two conversion loops have an extended conversion descriptor.
   45:  */
   46: struct wchar_conv_struct {
   47:   struct conv_struct parent;
   48: #if HAVE_WCRTOMB || HAVE_MBRTOWC
   49:   mbstate_t state;
   50: #endif
   51: };
   52: 
   53: 
   54: #if HAVE_WCRTOMB
   55: 
   56: /* From wchar_t to anything else. */
   57: 
   58: #ifndef LIBICONV_PLUG
   59: 
   60: #if 0
   61: 
   62: struct wc_to_mb_fallback_locals {
   63:   struct wchar_conv_struct * l_wcd;
   64:   char* l_outbuf;
   65:   size_t l_outbytesleft;
   66:   int l_errno;
   67: };
   68: 
   69: /* A callback that writes a string given in the locale encoding. */
   70: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
   71:                                         void* callback_arg)
   72: {
   73:   struct wc_to_mb_fallback_locals * plocals =
   74:     (struct wc_to_mb_fallback_locals *) callback_arg;
   75:   /* Do nothing if already encountered an error in a previous call. */
   76:   if (plocals->l_errno == 0) {
   77:     /* Attempt to convert the passed buffer to the target encoding.
   78:        Here we don't support characters split across multiple calls. */
   79:     const char* bufptr = buf;
   80:     size_t bufleft = buflen;
   81:     size_t res = unicode_loop_convert(&plocals->l_wcd->parent,
   82:                                       &bufptr,&bufleft,
   83:                                       &plocals->l_outbuf,&plocals->l_outbytesleft);
   84:     if (res == (size_t)(-1)) {
   85:       if (errno == EILSEQ || errno == EINVAL)
   86:         /* Invalid buf contents. */
   87:         plocals->l_errno = EILSEQ;
   88:       else if (errno == E2BIG)
   89:         /* Output buffer too small. */
   90:         plocals->l_errno = E2BIG;
   91:       else 
   92:         abort();
   93:     } else {
   94:       /* Successful conversion. */
   95:       if (bufleft > 0)
   96:         abort();
   97:     }
   98:   }
   99: }
  100: 
  101: #else
  102: 
  103: struct wc_to_mb_fallback_locals {
  104:   char* l_outbuf;
  105:   size_t l_outbytesleft;
  106:   int l_errno;
  107: };
  108: 
  109: /* A callback that writes a string given in the target encoding. */
  110: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
  111:                                         void* callback_arg)
  112: {
  113:   struct wc_to_mb_fallback_locals * plocals =
  114:     (struct wc_to_mb_fallback_locals *) callback_arg;
  115:   /* Do nothing if already encountered an error in a previous call. */
  116:   if (plocals->l_errno == 0) {
  117:     /* Attempt to copy the passed buffer to the output buffer. */
  118:     if (plocals->l_outbytesleft < buflen)
  119:       plocals->l_errno = E2BIG;
  120:     else {
  121:       memcpy(plocals->l_outbuf, buf, buflen);
  122:       plocals->l_outbuf += buflen;
  123:       plocals->l_outbytesleft -= buflen;
  124:     }
  125:   }
  126: }
  127: 
  128: #endif
  129: 
  130: #endif /* !LIBICONV_PLUG */
  131: 
  132: static size_t wchar_from_loop_convert (iconv_t icd,
  133:                                        const char* * inbuf, size_t *inbytesleft,
  134:                                        char* * outbuf, size_t *outbytesleft)
  135: {
  136:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  137:   size_t result = 0;
  138:   while (*inbytesleft >= sizeof(wchar_t)) {
  139:     const wchar_t * inptr = (const wchar_t *) *inbuf;
  140:     size_t inleft = *inbytesleft;
  141:     char buf[BUF_SIZE];
  142:     mbstate_t state = wcd->state;
  143:     size_t bufcount = 0;
  144:     while (inleft >= sizeof(wchar_t)) {
  145:       /* Convert one wchar_t to multibyte representation. */
  146:       size_t count = wcrtomb(buf+bufcount,*inptr,&state);
  147:       if (count == (size_t)(-1)) {
  148:         /* Invalid input. */
  149:         if (wcd->parent.discard_ilseq) {
  150:           count = 0;
  151:         }
  152:         #ifndef LIBICONV_PLUG
  153:         else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) {
  154:           /* Drop the contents of buf[] accumulated so far, and instead
  155:              pass all queued wide characters to the fallback handler. */
  156:           struct wc_to_mb_fallback_locals locals;
  157:           const wchar_t * fallback_inptr;
  158:           #if 0
  159:           locals.l_wcd = wcd;
  160:           #endif
  161:           locals.l_outbuf = *outbuf;
  162:           locals.l_outbytesleft = *outbytesleft;
  163:           locals.l_errno = 0;
  164:           for (fallback_inptr = (const wchar_t *) *inbuf;
  165:                fallback_inptr <= inptr;
  166:                fallback_inptr++)
  167:             wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr,
  168:                                                     wc_to_mb_write_replacement,
  169:                                                     &locals,
  170:                                                     wcd->parent.fallbacks.data);
  171:           if (locals.l_errno != 0) {
  172:             errno = locals.l_errno;
  173:             return -1;
  174:           }
  175:           wcd->state = state;
  176:           *inbuf = (const char *) (inptr + 1);
  177:           *inbytesleft = inleft - sizeof(wchar_t);
  178:           *outbuf = locals.l_outbuf;
  179:           *outbytesleft = locals.l_outbytesleft;
  180:           result += 1;
  181:           break;
  182:         }
  183:         #endif
  184:         else {
  185:           errno = EILSEQ;
  186:           return -1;
  187:         }
  188:       }
  189:       inptr++;
  190:       inleft -= sizeof(wchar_t);
  191:       bufcount += count;
  192:       if (count == 0) {
  193:         /* Continue, append next wchar_t. */
  194:       } else {
  195:         /* Attempt to convert the accumulated multibyte representations
  196:            to the target encoding. */
  197:         const char* bufptr = buf;
  198:         size_t bufleft = bufcount;
  199:         char* outptr = *outbuf;
  200:         size_t outleft = *outbytesleft;
  201:         size_t res = unicode_loop_convert(&wcd->parent,
  202:                                           &bufptr,&bufleft,
  203:                                           &outptr,&outleft);
  204:         if (res == (size_t)(-1)) {
  205:           if (errno == EILSEQ)
  206:             /* Invalid input. */
  207:             return -1;
  208:           else if (errno == E2BIG)
  209:             /* Output buffer too small. */
  210:             return -1;
  211:           else if (errno == EINVAL) {
  212:             /* Continue, append next wchar_t, but avoid buffer overrun. */
  213:             if (bufcount + MB_CUR_MAX > BUF_SIZE)
  214:               abort();
  215:           } else
  216:             abort();
  217:         } else {
  218:           /* Successful conversion. */
  219:           wcd->state = state;
  220:           *inbuf = (const char *) inptr;
  221:           *inbytesleft = inleft;
  222:           *outbuf = outptr;
  223:           *outbytesleft = outleft;
  224:           result += res;
  225:           break;
  226:         }
  227:       }
  228:     }
  229:   }
  230:   return result;
  231: }
  232: 
  233: static size_t wchar_from_loop_reset (iconv_t icd,
  234:                                      char* * outbuf, size_t *outbytesleft)
  235: {
  236:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  237:   if (outbuf == NULL || *outbuf == NULL) {
  238:     /* Reset the states. */
  239:     memset(&wcd->state,'\0',sizeof(mbstate_t));
  240:     return unicode_loop_reset(&wcd->parent,NULL,NULL);
  241:   } else {
  242:     if (!mbsinit(&wcd->state)) {
  243:       mbstate_t state = wcd->state;
  244:       char buf[BUF_SIZE];
  245:       size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
  246:       if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
  247:         abort();
  248:       else {
  249:         const char* bufptr = buf;
  250:         size_t bufleft = bufcount-1;
  251:         char* outptr = *outbuf;
  252:         size_t outleft = *outbytesleft;
  253:         size_t res = unicode_loop_convert(&wcd->parent,
  254:                                           &bufptr,&bufleft,
  255:                                           &outptr,&outleft);
  256:         if (res == (size_t)(-1)) {
  257:           if (errno == E2BIG)
  258:             return -1;
  259:           else
  260:             abort();
  261:         } else {
  262:           res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
  263:           if (res == (size_t)(-1))
  264:             return res;
  265:           else {
  266:             /* Successful. */
  267:             wcd->state = state;
  268:             *outbuf = outptr;
  269:             *outbytesleft = outleft;
  270:             return 0;
  271:           }
  272:         }
  273:       }
  274:     } else
  275:       return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
  276:   }
  277: }
  278: 
  279: #endif
  280: 
  281: 
  282: #if HAVE_MBRTOWC
  283: 
  284: /* From anything else to wchar_t. */
  285: 
  286: #ifndef LIBICONV_PLUG
  287: 
  288: struct mb_to_wc_fallback_locals {
  289:   char* l_outbuf;
  290:   size_t l_outbytesleft;
  291:   int l_errno;
  292: };
  293: 
  294: static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen,
  295:                                         void* callback_arg)
  296: {
  297:   struct mb_to_wc_fallback_locals * plocals =
  298:     (struct mb_to_wc_fallback_locals *) callback_arg;
  299:   /* Do nothing if already encountered an error in a previous call. */
  300:   if (plocals->l_errno == 0) {
  301:     /* Attempt to copy the passed buffer to the output buffer. */
  302:     if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen)
  303:       plocals->l_errno = E2BIG;
  304:     else {
  305:       for (; buflen > 0; buf++, buflen--) {
  306:         *(wchar_t*) plocals->l_outbuf = *buf;
  307:         plocals->l_outbuf += sizeof(wchar_t);
  308:         plocals->l_outbytesleft -= sizeof(wchar_t);
  309:       }
  310:     }
  311:   }
  312: }
  313: 
  314: #endif /* !LIBICONV_PLUG */
  315: 
  316: static size_t wchar_to_loop_convert (iconv_t icd,
  317:                                      const char* * inbuf, size_t *inbytesleft,
  318:                                      char* * outbuf, size_t *outbytesleft)
  319: {
  320:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  321:   size_t result = 0;
  322:   while (*inbytesleft > 0) {
  323:     size_t incount;
  324:     for (incount = 1; incount <= *inbytesleft; incount++) {
  325:       char buf[BUF_SIZE];
  326:       const char* inptr = *inbuf;
  327:       size_t inleft = incount;
  328:       char* bufptr = buf;
  329:       size_t bufleft = BUF_SIZE;
  330:       size_t res = unicode_loop_convert(&wcd->parent,
  331:                                         &inptr,&inleft,
  332:                                         &bufptr,&bufleft);
  333:       if (res == (size_t)(-1)) {
  334:         if (errno == EILSEQ)
  335:           /* Invalid input. */
  336:           return -1;
  337:         else if (errno == EINVAL) {
  338:           /* Incomplete input. Next try with one more input byte. */
  339:         } else
  340:           /* E2BIG shouldn't occur. */
  341:           abort();
  342:       } else {
  343:         /* Successful conversion. */
  344:         size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
  345:         mbstate_t state = wcd->state;
  346:         wchar_t wc;
  347:         res = mbrtowc(&wc,buf,bufcount,&state);
  348:         if (res == (size_t)(-2)) {
  349:           /* Next try with one more input byte. */
  350:         } else {
  351:           if (res == (size_t)(-1)) {
  352:             /* Invalid input. */
  353:             if (wcd->parent.discard_ilseq) {
  354:             }
  355:             #ifndef LIBICONV_PLUG
  356:             else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) {
  357:               /* Drop the contents of buf[] accumulated so far, and instead
  358:                  pass all queued chars to the fallback handler. */
  359:               struct mb_to_wc_fallback_locals locals;
  360:               locals.l_outbuf = *outbuf;
  361:               locals.l_outbytesleft = *outbytesleft;
  362:               locals.l_errno = 0;
  363:               wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount,
  364:                                                       mb_to_wc_write_replacement,
  365:                                                       &locals,
  366:                                                       wcd->parent.fallbacks.data);
  367:               if (locals.l_errno != 0) {
  368:                 errno = locals.l_errno;
  369:                 return -1;
  370:               }
  371:               /* Restoring the state is not needed because it is the initial
  372:                  state anyway: For all known locale encodings, the multibyte
  373:                  to wchar_t conversion doesn't have shift state, and we have
  374:                  excluded partial accumulated characters. */
  375:               /* wcd->state = state; */
  376:               *inbuf += incount;
  377:               *inbytesleft -= incount;
  378:               *outbuf = locals.l_outbuf;
  379:               *outbytesleft = locals.l_outbytesleft;
  380:               result += 1;
  381:               break;
  382:             }
  383:             #endif
  384:             else
  385:               return -1;
  386:           } else {
  387:             if (*outbytesleft < sizeof(wchar_t)) {
  388:               errno = E2BIG;
  389:               return -1;
  390:             }
  391:             *(wchar_t*) *outbuf = wc;
  392:             /* Restoring the state is not needed because it is the initial
  393:                state anyway: For all known locale encodings, the multibyte
  394:                to wchar_t conversion doesn't have shift state, and we have
  395:                excluded partial accumulated characters. */
  396:             /* wcd->state = state; */
  397:             *outbuf += sizeof(wchar_t);
  398:             *outbytesleft -= sizeof(wchar_t);
  399:           }
  400:           *inbuf += incount;
  401:           *inbytesleft -= incount;
  402:           result += res;
  403:           break;
  404:         }
  405:       }
  406:     }
  407:   }
  408:   return result;
  409: }
  410: 
  411: static size_t wchar_to_loop_reset (iconv_t icd,
  412:                                    char* * outbuf, size_t *outbytesleft)
  413: {
  414:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  415:   size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
  416:   if (res == (size_t)(-1))
  417:     return res;
  418:   memset(&wcd->state,0,sizeof(mbstate_t));
  419:   return 0;
  420: }
  421: 
  422: #endif
  423: 
  424: 
  425: /* From wchar_t to wchar_t. */
  426: 
  427: static size_t wchar_id_loop_convert (iconv_t icd,
  428:                                      const char* * inbuf, size_t *inbytesleft,
  429:                                      char* * outbuf, size_t *outbytesleft)
  430: {
  431:   struct conv_struct * cd = (struct conv_struct *) icd;
  432:   const wchar_t* inptr = (const wchar_t*) *inbuf;
  433:   size_t inleft = *inbytesleft / sizeof(wchar_t);
  434:   wchar_t* outptr = (wchar_t*) *outbuf;
  435:   size_t outleft = *outbytesleft / sizeof(wchar_t);
  436:   size_t count = (inleft <= outleft ? inleft : outleft);
  437:   if (count > 0) {
  438:     *inbytesleft -= count * sizeof(wchar_t);
  439:     *outbytesleft -= count * sizeof(wchar_t);
  440:     do {
  441:       wchar_t wc = *inptr++;
  442:       *outptr++ = wc;
  443:       #ifndef LIBICONV_PLUG
  444:       if (cd->hooks.wc_hook)
  445:         (*cd->hooks.wc_hook)(wc, cd->hooks.data);
  446:       #endif
  447:     } while (--count > 0);
  448:     *inbuf = (const char*) inptr;
  449:     *outbuf = (char*) outptr;
  450:   }
  451:   return 0;
  452: }
  453: 
  454: static size_t wchar_id_loop_reset (iconv_t icd,
  455:                                    char* * outbuf, size_t *outbytesleft)
  456: {
  457:   return 0;
  458: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>