File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / loop_wchar.h
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (3 years, 3 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD
libiconv 1.16

    1: /*
    2:  * Copyright (C) 2000-2002, 2005-2006, 2008-2009, 2011 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, see <https://www.gnu.org/licenses/>.
   18:  */
   19: 
   20: /* This file defines three conversion loops:
   21:      - from wchar_t to anything else,
   22:      - from anything else to wchar_t,
   23:      - from wchar_t to wchar_t.
   24:  */
   25: 
   26: #if HAVE_WCRTOMB || HAVE_MBRTOWC
   27: /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
   28:    <wchar.h>.
   29:    BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
   30:    included before <wchar.h>.
   31:    In some builds of uClibc, <wchar.h> is nonexistent and wchar_t is defined
   32:    by <stddef.h>.  */
   33: # include <stddef.h>
   34: # include <stdio.h>
   35: # include <time.h>
   36: # include <wchar.h>
   37: # define BUF_SIZE 64  /* assume MB_LEN_MAX <= 64 */
   38:   /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
   39:   extern size_t mbrtowc ();
   40: # ifdef mbstate_t
   41: #  define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
   42: #  define mbsinit(ps) 1
   43: # endif
   44: # ifndef mbsinit
   45: #  if !HAVE_MBSINIT
   46: #   define mbsinit(ps) 1
   47: #  endif
   48: # endif
   49: #endif
   50: 
   51: /*
   52:  * The first two conversion loops have an extended conversion descriptor.
   53:  */
   54: struct wchar_conv_struct {
   55:   struct conv_struct parent;
   56: #if HAVE_WCRTOMB || HAVE_MBRTOWC
   57:   mbstate_t state;
   58: #endif
   59: };
   60: 
   61: 
   62: #if HAVE_WCRTOMB
   63: 
   64: /* From wchar_t to anything else. */
   65: 
   66: #ifndef LIBICONV_PLUG
   67: 
   68: #if 0
   69: 
   70: struct wc_to_mb_fallback_locals {
   71:   struct wchar_conv_struct * l_wcd;
   72:   char* l_outbuf;
   73:   size_t l_outbytesleft;
   74:   int l_errno;
   75: };
   76: 
   77: /* A callback that writes a string given in the locale encoding. */
   78: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
   79:                                         void* callback_arg)
   80: {
   81:   struct wc_to_mb_fallback_locals * plocals =
   82:     (struct wc_to_mb_fallback_locals *) callback_arg;
   83:   /* Do nothing if already encountered an error in a previous call. */
   84:   if (plocals->l_errno == 0) {
   85:     /* Attempt to convert the passed buffer to the target encoding.
   86:        Here we don't support characters split across multiple calls. */
   87:     const char* bufptr = buf;
   88:     size_t bufleft = buflen;
   89:     size_t res = unicode_loop_convert(&plocals->l_wcd->parent,
   90:                                       &bufptr,&bufleft,
   91:                                       &plocals->l_outbuf,&plocals->l_outbytesleft);
   92:     if (res == (size_t)(-1)) {
   93:       if (errno == EILSEQ || errno == EINVAL)
   94:         /* Invalid buf contents. */
   95:         plocals->l_errno = EILSEQ;
   96:       else if (errno == E2BIG)
   97:         /* Output buffer too small. */
   98:         plocals->l_errno = E2BIG;
   99:       else 
  100:         abort();
  101:     } else {
  102:       /* Successful conversion. */
  103:       if (bufleft > 0)
  104:         abort();
  105:     }
  106:   }
  107: }
  108: 
  109: #else
  110: 
  111: struct wc_to_mb_fallback_locals {
  112:   char* l_outbuf;
  113:   size_t l_outbytesleft;
  114:   int l_errno;
  115: };
  116: 
  117: /* A callback that writes a string given in the target encoding. */
  118: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
  119:                                         void* callback_arg)
  120: {
  121:   struct wc_to_mb_fallback_locals * plocals =
  122:     (struct wc_to_mb_fallback_locals *) callback_arg;
  123:   /* Do nothing if already encountered an error in a previous call. */
  124:   if (plocals->l_errno == 0) {
  125:     /* Attempt to copy the passed buffer to the output buffer. */
  126:     if (plocals->l_outbytesleft < buflen)
  127:       plocals->l_errno = E2BIG;
  128:     else {
  129:       memcpy(plocals->l_outbuf, buf, buflen);
  130:       plocals->l_outbuf += buflen;
  131:       plocals->l_outbytesleft -= buflen;
  132:     }
  133:   }
  134: }
  135: 
  136: #endif
  137: 
  138: #endif /* !LIBICONV_PLUG */
  139: 
  140: static size_t wchar_from_loop_convert (iconv_t icd,
  141:                                        const char* * inbuf, size_t *inbytesleft,
  142:                                        char* * outbuf, size_t *outbytesleft)
  143: {
  144:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  145:   size_t result = 0;
  146:   while (*inbytesleft >= sizeof(wchar_t)) {
  147:     const wchar_t * inptr = (const wchar_t *) *inbuf;
  148:     size_t inleft = *inbytesleft;
  149:     char buf[BUF_SIZE];
  150:     mbstate_t state = wcd->state;
  151:     size_t bufcount = 0;
  152:     while (inleft >= sizeof(wchar_t)) {
  153:       /* Convert one wchar_t to multibyte representation. */
  154:       size_t count = wcrtomb(buf+bufcount,*inptr,&state);
  155:       if (count == (size_t)(-1)) {
  156:         /* Invalid input. */
  157:         if (wcd->parent.discard_ilseq) {
  158:           count = 0;
  159:         }
  160:         #ifndef LIBICONV_PLUG
  161:         else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) {
  162:           /* Drop the contents of buf[] accumulated so far, and instead
  163:              pass all queued wide characters to the fallback handler. */
  164:           struct wc_to_mb_fallback_locals locals;
  165:           const wchar_t * fallback_inptr;
  166:           #if 0
  167:           locals.l_wcd = wcd;
  168:           #endif
  169:           locals.l_outbuf = *outbuf;
  170:           locals.l_outbytesleft = *outbytesleft;
  171:           locals.l_errno = 0;
  172:           for (fallback_inptr = (const wchar_t *) *inbuf;
  173:                fallback_inptr <= inptr;
  174:                fallback_inptr++)
  175:             wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr,
  176:                                                     wc_to_mb_write_replacement,
  177:                                                     &locals,
  178:                                                     wcd->parent.fallbacks.data);
  179:           if (locals.l_errno != 0) {
  180:             errno = locals.l_errno;
  181:             return -1;
  182:           }
  183:           wcd->state = state;
  184:           *inbuf = (const char *) (inptr + 1);
  185:           *inbytesleft = inleft - sizeof(wchar_t);
  186:           *outbuf = locals.l_outbuf;
  187:           *outbytesleft = locals.l_outbytesleft;
  188:           result += 1;
  189:           break;
  190:         }
  191:         #endif
  192:         else {
  193:           errno = EILSEQ;
  194:           return -1;
  195:         }
  196:       }
  197:       inptr++;
  198:       inleft -= sizeof(wchar_t);
  199:       bufcount += count;
  200:       if (count == 0) {
  201:         /* Continue, append next wchar_t. */
  202:       } else {
  203:         /* Attempt to convert the accumulated multibyte representations
  204:            to the target encoding. */
  205:         const char* bufptr = buf;
  206:         size_t bufleft = bufcount;
  207:         char* outptr = *outbuf;
  208:         size_t outleft = *outbytesleft;
  209:         size_t res = unicode_loop_convert(&wcd->parent,
  210:                                           &bufptr,&bufleft,
  211:                                           &outptr,&outleft);
  212:         if (res == (size_t)(-1)) {
  213:           if (errno == EILSEQ)
  214:             /* Invalid input. */
  215:             return -1;
  216:           else if (errno == E2BIG)
  217:             /* Output buffer too small. */
  218:             return -1;
  219:           else if (errno == EINVAL) {
  220:             /* Continue, append next wchar_t, but avoid buffer overrun. */
  221:             if (bufcount + MB_CUR_MAX > BUF_SIZE)
  222:               abort();
  223:           } else
  224:             abort();
  225:         } else {
  226:           /* Successful conversion. */
  227:           wcd->state = state;
  228:           *inbuf = (const char *) inptr;
  229:           *inbytesleft = inleft;
  230:           *outbuf = outptr;
  231:           *outbytesleft = outleft;
  232:           result += res;
  233:           break;
  234:         }
  235:       }
  236:     }
  237:   }
  238:   return result;
  239: }
  240: 
  241: static size_t wchar_from_loop_reset (iconv_t icd,
  242:                                      char* * outbuf, size_t *outbytesleft)
  243: {
  244:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  245:   if (outbuf == NULL || *outbuf == NULL) {
  246:     /* Reset the states. */
  247:     memset(&wcd->state,'\0',sizeof(mbstate_t));
  248:     return unicode_loop_reset(&wcd->parent,NULL,NULL);
  249:   } else {
  250:     if (!mbsinit(&wcd->state)) {
  251:       mbstate_t state = wcd->state;
  252:       char buf[BUF_SIZE];
  253:       size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
  254:       if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
  255:         abort();
  256:       else {
  257:         const char* bufptr = buf;
  258:         size_t bufleft = bufcount-1;
  259:         char* outptr = *outbuf;
  260:         size_t outleft = *outbytesleft;
  261:         size_t res = unicode_loop_convert(&wcd->parent,
  262:                                           &bufptr,&bufleft,
  263:                                           &outptr,&outleft);
  264:         if (res == (size_t)(-1)) {
  265:           if (errno == E2BIG)
  266:             return -1;
  267:           else
  268:             abort();
  269:         } else {
  270:           res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
  271:           if (res == (size_t)(-1))
  272:             return res;
  273:           else {
  274:             /* Successful. */
  275:             wcd->state = state;
  276:             *outbuf = outptr;
  277:             *outbytesleft = outleft;
  278:             return 0;
  279:           }
  280:         }
  281:       }
  282:     } else
  283:       return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
  284:   }
  285: }
  286: 
  287: #endif
  288: 
  289: 
  290: #if HAVE_MBRTOWC
  291: 
  292: /* From anything else to wchar_t. */
  293: 
  294: #ifndef LIBICONV_PLUG
  295: 
  296: struct mb_to_wc_fallback_locals {
  297:   char* l_outbuf;
  298:   size_t l_outbytesleft;
  299:   int l_errno;
  300: };
  301: 
  302: static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen,
  303:                                         void* callback_arg)
  304: {
  305:   struct mb_to_wc_fallback_locals * plocals =
  306:     (struct mb_to_wc_fallback_locals *) callback_arg;
  307:   /* Do nothing if already encountered an error in a previous call. */
  308:   if (plocals->l_errno == 0) {
  309:     /* Attempt to copy the passed buffer to the output buffer. */
  310:     if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen)
  311:       plocals->l_errno = E2BIG;
  312:     else {
  313:       for (; buflen > 0; buf++, buflen--) {
  314:         *(wchar_t*) plocals->l_outbuf = *buf;
  315:         plocals->l_outbuf += sizeof(wchar_t);
  316:         plocals->l_outbytesleft -= sizeof(wchar_t);
  317:       }
  318:     }
  319:   }
  320: }
  321: 
  322: #endif /* !LIBICONV_PLUG */
  323: 
  324: static size_t wchar_to_loop_convert (iconv_t icd,
  325:                                      const char* * inbuf, size_t *inbytesleft,
  326:                                      char* * outbuf, size_t *outbytesleft)
  327: {
  328:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  329:   size_t result = 0;
  330:   while (*inbytesleft > 0) {
  331:     size_t incount;
  332:     for (incount = 1; ; ) {
  333:       /* Here incount <= *inbytesleft. */
  334:       char buf[BUF_SIZE];
  335:       const char* inptr = *inbuf;
  336:       size_t inleft = incount;
  337:       char* bufptr = buf;
  338:       size_t bufleft = BUF_SIZE;
  339:       size_t res = unicode_loop_convert(&wcd->parent,
  340:                                         &inptr,&inleft,
  341:                                         &bufptr,&bufleft);
  342:       if (res == (size_t)(-1)) {
  343:         if (errno == EILSEQ)
  344:           /* Invalid input. */
  345:           return -1;
  346:         else if (errno == EINVAL) {
  347:           /* Incomplete input. Next try with one more input byte. */
  348:         } else
  349:           /* E2BIG shouldn't occur. */
  350:           abort();
  351:       } else {
  352:         /* Successful conversion. */
  353:         size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
  354:         mbstate_t state = wcd->state;
  355:         wchar_t wc;
  356:         res = mbrtowc(&wc,buf,bufcount,&state);
  357:         if (res == (size_t)(-2)) {
  358:           /* Next try with one more input byte. */
  359:         } else {
  360:           if (res == (size_t)(-1)) {
  361:             /* Invalid input. */
  362:             if (wcd->parent.discard_ilseq) {
  363:             }
  364:             #ifndef LIBICONV_PLUG
  365:             else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) {
  366:               /* Drop the contents of buf[] accumulated so far, and instead
  367:                  pass all queued chars to the fallback handler. */
  368:               struct mb_to_wc_fallback_locals locals;
  369:               locals.l_outbuf = *outbuf;
  370:               locals.l_outbytesleft = *outbytesleft;
  371:               locals.l_errno = 0;
  372:               wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount,
  373:                                                       mb_to_wc_write_replacement,
  374:                                                       &locals,
  375:                                                       wcd->parent.fallbacks.data);
  376:               if (locals.l_errno != 0) {
  377:                 errno = locals.l_errno;
  378:                 return -1;
  379:               }
  380:               /* Restoring the state is not needed because it is the initial
  381:                  state anyway: For all known locale encodings, the multibyte
  382:                  to wchar_t conversion doesn't have shift state, and we have
  383:                  excluded partial accumulated characters. */
  384:               /* wcd->state = state; */
  385:               *inbuf += incount;
  386:               *inbytesleft -= incount;
  387:               *outbuf = locals.l_outbuf;
  388:               *outbytesleft = locals.l_outbytesleft;
  389:               result += 1;
  390:               break;
  391:             }
  392:             #endif
  393:             else
  394:               return -1;
  395:           } else {
  396:             if (*outbytesleft < sizeof(wchar_t)) {
  397:               errno = E2BIG;
  398:               return -1;
  399:             }
  400:             *(wchar_t*) *outbuf = wc;
  401:             /* Restoring the state is not needed because it is the initial
  402:                state anyway: For all known locale encodings, the multibyte
  403:                to wchar_t conversion doesn't have shift state, and we have
  404:                excluded partial accumulated characters. */
  405:             /* wcd->state = state; */
  406:             *outbuf += sizeof(wchar_t);
  407:             *outbytesleft -= sizeof(wchar_t);
  408:           }
  409:           *inbuf += incount;
  410:           *inbytesleft -= incount;
  411:           result += res;
  412:           break;
  413:         }
  414:       }
  415:       incount++;
  416:       if (incount > *inbytesleft) {
  417:         /* Incomplete input. */
  418:         errno = EINVAL;
  419:         return -1;
  420:       }
  421:     }
  422:   }
  423:   return result;
  424: }
  425: 
  426: static size_t wchar_to_loop_reset (iconv_t icd,
  427:                                    char* * outbuf, size_t *outbytesleft)
  428: {
  429:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  430:   size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
  431:   if (res == (size_t)(-1))
  432:     return res;
  433:   memset(&wcd->state,0,sizeof(mbstate_t));
  434:   return 0;
  435: }
  436: 
  437: #endif
  438: 
  439: 
  440: /* From wchar_t to wchar_t. */
  441: 
  442: static size_t wchar_id_loop_convert (iconv_t icd,
  443:                                      const char* * inbuf, size_t *inbytesleft,
  444:                                      char* * outbuf, size_t *outbytesleft)
  445: {
  446:   struct conv_struct * cd = (struct conv_struct *) icd;
  447:   const wchar_t* inptr = (const wchar_t*) *inbuf;
  448:   size_t inleft = *inbytesleft / sizeof(wchar_t);
  449:   wchar_t* outptr = (wchar_t*) *outbuf;
  450:   size_t outleft = *outbytesleft / sizeof(wchar_t);
  451:   size_t count = (inleft <= outleft ? inleft : outleft);
  452:   if (count > 0) {
  453:     *inbytesleft -= count * sizeof(wchar_t);
  454:     *outbytesleft -= count * sizeof(wchar_t);
  455:     do {
  456:       wchar_t wc = *inptr++;
  457:       *outptr++ = wc;
  458:       #ifndef LIBICONV_PLUG
  459:       if (cd->hooks.wc_hook)
  460:         (*cd->hooks.wc_hook)(wc, cd->hooks.data);
  461:       #endif
  462:     } while (--count > 0);
  463:     *inbuf = (const char*) inptr;
  464:     *outbuf = (char*) outptr;
  465:   }
  466:   return 0;
  467: }
  468: 
  469: static size_t wchar_id_loop_reset (iconv_t icd,
  470:                                    char* * outbuf, size_t *outbytesleft)
  471: {
  472:   return 0;
  473: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>