Annotation of embedaddon/libiconv/lib/loop_wchar.h, revision 1.1.1.2

1.1       misho       1: /*
1.1.1.2 ! misho       2:  * Copyright (C) 2000-2002, 2005-2006, 2008-2009, 2011 Free Software Foundation, Inc.
1.1       misho       3:  * This file is part of the GNU LIBICONV Library.
                      4:  *
                      5:  * The GNU LIBICONV Library is free software; you can redistribute it
                      6:  * and/or modify it under the terms of the GNU Library General Public
                      7:  * License as published by the Free Software Foundation; either version 2
                      8:  * of the License, or (at your option) any later version.
                      9:  *
                     10:  * The GNU LIBICONV Library is distributed in the hope that it will be
                     11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
                     12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     13:  * Library General Public License for more details.
                     14:  *
                     15:  * You should have received a copy of the GNU Library General Public
                     16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
                     17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
                     18:  * Fifth Floor, Boston, MA 02110-1301, USA.
                     19:  */
                     20: 
                     21: /* This file defines three conversion loops:
                     22:      - from wchar_t to anything else,
                     23:      - from anything else to wchar_t,
                     24:      - from wchar_t to wchar_t.
                     25:  */
                     26: 
                     27: #if HAVE_WCRTOMB || HAVE_MBRTOWC
1.1.1.2 ! misho      28: /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
        !            29:    <wchar.h>.
        !            30:    BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
        !            31:    included before <wchar.h>.
        !            32:    In some builds of uClibc, <wchar.h> is nonexistent and wchar_t is defined
        !            33:    by <stddef.h>.  */
        !            34: # include <stddef.h>
        !            35: # include <stdio.h>
        !            36: # include <time.h>
1.1       misho      37: # include <wchar.h>
                     38: # define BUF_SIZE 64  /* assume MB_LEN_MAX <= 64 */
                     39:   /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
                     40:   extern size_t mbrtowc ();
                     41: # ifdef mbstate_t
                     42: #  define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
                     43: #  define mbsinit(ps) 1
                     44: # endif
                     45: # ifndef mbsinit
                     46: #  if !HAVE_MBSINIT
                     47: #   define mbsinit(ps) 1
                     48: #  endif
                     49: # endif
                     50: #endif
                     51: 
                     52: /*
                     53:  * The first two conversion loops have an extended conversion descriptor.
                     54:  */
                     55: struct wchar_conv_struct {
                     56:   struct conv_struct parent;
                     57: #if HAVE_WCRTOMB || HAVE_MBRTOWC
                     58:   mbstate_t state;
                     59: #endif
                     60: };
                     61: 
                     62: 
                     63: #if HAVE_WCRTOMB
                     64: 
                     65: /* From wchar_t to anything else. */
                     66: 
                     67: #ifndef LIBICONV_PLUG
                     68: 
                     69: #if 0
                     70: 
                     71: struct wc_to_mb_fallback_locals {
                     72:   struct wchar_conv_struct * l_wcd;
                     73:   char* l_outbuf;
                     74:   size_t l_outbytesleft;
                     75:   int l_errno;
                     76: };
                     77: 
                     78: /* A callback that writes a string given in the locale encoding. */
                     79: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
                     80:                                         void* callback_arg)
                     81: {
                     82:   struct wc_to_mb_fallback_locals * plocals =
                     83:     (struct wc_to_mb_fallback_locals *) callback_arg;
                     84:   /* Do nothing if already encountered an error in a previous call. */
                     85:   if (plocals->l_errno == 0) {
                     86:     /* Attempt to convert the passed buffer to the target encoding.
                     87:        Here we don't support characters split across multiple calls. */
                     88:     const char* bufptr = buf;
                     89:     size_t bufleft = buflen;
                     90:     size_t res = unicode_loop_convert(&plocals->l_wcd->parent,
                     91:                                       &bufptr,&bufleft,
                     92:                                       &plocals->l_outbuf,&plocals->l_outbytesleft);
                     93:     if (res == (size_t)(-1)) {
                     94:       if (errno == EILSEQ || errno == EINVAL)
                     95:         /* Invalid buf contents. */
                     96:         plocals->l_errno = EILSEQ;
                     97:       else if (errno == E2BIG)
                     98:         /* Output buffer too small. */
                     99:         plocals->l_errno = E2BIG;
                    100:       else 
                    101:         abort();
                    102:     } else {
                    103:       /* Successful conversion. */
                    104:       if (bufleft > 0)
                    105:         abort();
                    106:     }
                    107:   }
                    108: }
                    109: 
                    110: #else
                    111: 
                    112: struct wc_to_mb_fallback_locals {
                    113:   char* l_outbuf;
                    114:   size_t l_outbytesleft;
                    115:   int l_errno;
                    116: };
                    117: 
                    118: /* A callback that writes a string given in the target encoding. */
                    119: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
                    120:                                         void* callback_arg)
                    121: {
                    122:   struct wc_to_mb_fallback_locals * plocals =
                    123:     (struct wc_to_mb_fallback_locals *) callback_arg;
                    124:   /* Do nothing if already encountered an error in a previous call. */
                    125:   if (plocals->l_errno == 0) {
                    126:     /* Attempt to copy the passed buffer to the output buffer. */
                    127:     if (plocals->l_outbytesleft < buflen)
                    128:       plocals->l_errno = E2BIG;
                    129:     else {
                    130:       memcpy(plocals->l_outbuf, buf, buflen);
                    131:       plocals->l_outbuf += buflen;
                    132:       plocals->l_outbytesleft -= buflen;
                    133:     }
                    134:   }
                    135: }
                    136: 
                    137: #endif
                    138: 
                    139: #endif /* !LIBICONV_PLUG */
                    140: 
                    141: static size_t wchar_from_loop_convert (iconv_t icd,
                    142:                                        const char* * inbuf, size_t *inbytesleft,
                    143:                                        char* * outbuf, size_t *outbytesleft)
                    144: {
                    145:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
                    146:   size_t result = 0;
                    147:   while (*inbytesleft >= sizeof(wchar_t)) {
                    148:     const wchar_t * inptr = (const wchar_t *) *inbuf;
                    149:     size_t inleft = *inbytesleft;
                    150:     char buf[BUF_SIZE];
                    151:     mbstate_t state = wcd->state;
                    152:     size_t bufcount = 0;
                    153:     while (inleft >= sizeof(wchar_t)) {
                    154:       /* Convert one wchar_t to multibyte representation. */
                    155:       size_t count = wcrtomb(buf+bufcount,*inptr,&state);
                    156:       if (count == (size_t)(-1)) {
                    157:         /* Invalid input. */
                    158:         if (wcd->parent.discard_ilseq) {
                    159:           count = 0;
                    160:         }
                    161:         #ifndef LIBICONV_PLUG
                    162:         else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) {
                    163:           /* Drop the contents of buf[] accumulated so far, and instead
                    164:              pass all queued wide characters to the fallback handler. */
                    165:           struct wc_to_mb_fallback_locals locals;
                    166:           const wchar_t * fallback_inptr;
                    167:           #if 0
                    168:           locals.l_wcd = wcd;
                    169:           #endif
                    170:           locals.l_outbuf = *outbuf;
                    171:           locals.l_outbytesleft = *outbytesleft;
                    172:           locals.l_errno = 0;
                    173:           for (fallback_inptr = (const wchar_t *) *inbuf;
                    174:                fallback_inptr <= inptr;
                    175:                fallback_inptr++)
                    176:             wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr,
                    177:                                                     wc_to_mb_write_replacement,
                    178:                                                     &locals,
                    179:                                                     wcd->parent.fallbacks.data);
                    180:           if (locals.l_errno != 0) {
                    181:             errno = locals.l_errno;
                    182:             return -1;
                    183:           }
                    184:           wcd->state = state;
                    185:           *inbuf = (const char *) (inptr + 1);
                    186:           *inbytesleft = inleft - sizeof(wchar_t);
                    187:           *outbuf = locals.l_outbuf;
                    188:           *outbytesleft = locals.l_outbytesleft;
                    189:           result += 1;
                    190:           break;
                    191:         }
                    192:         #endif
                    193:         else {
                    194:           errno = EILSEQ;
                    195:           return -1;
                    196:         }
                    197:       }
                    198:       inptr++;
                    199:       inleft -= sizeof(wchar_t);
                    200:       bufcount += count;
                    201:       if (count == 0) {
                    202:         /* Continue, append next wchar_t. */
                    203:       } else {
                    204:         /* Attempt to convert the accumulated multibyte representations
                    205:            to the target encoding. */
                    206:         const char* bufptr = buf;
                    207:         size_t bufleft = bufcount;
                    208:         char* outptr = *outbuf;
                    209:         size_t outleft = *outbytesleft;
                    210:         size_t res = unicode_loop_convert(&wcd->parent,
                    211:                                           &bufptr,&bufleft,
                    212:                                           &outptr,&outleft);
                    213:         if (res == (size_t)(-1)) {
                    214:           if (errno == EILSEQ)
                    215:             /* Invalid input. */
                    216:             return -1;
                    217:           else if (errno == E2BIG)
                    218:             /* Output buffer too small. */
                    219:             return -1;
                    220:           else if (errno == EINVAL) {
                    221:             /* Continue, append next wchar_t, but avoid buffer overrun. */
                    222:             if (bufcount + MB_CUR_MAX > BUF_SIZE)
                    223:               abort();
                    224:           } else
                    225:             abort();
                    226:         } else {
                    227:           /* Successful conversion. */
                    228:           wcd->state = state;
                    229:           *inbuf = (const char *) inptr;
                    230:           *inbytesleft = inleft;
                    231:           *outbuf = outptr;
                    232:           *outbytesleft = outleft;
                    233:           result += res;
                    234:           break;
                    235:         }
                    236:       }
                    237:     }
                    238:   }
                    239:   return result;
                    240: }
                    241: 
                    242: static size_t wchar_from_loop_reset (iconv_t icd,
                    243:                                      char* * outbuf, size_t *outbytesleft)
                    244: {
                    245:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
                    246:   if (outbuf == NULL || *outbuf == NULL) {
                    247:     /* Reset the states. */
                    248:     memset(&wcd->state,'\0',sizeof(mbstate_t));
                    249:     return unicode_loop_reset(&wcd->parent,NULL,NULL);
                    250:   } else {
                    251:     if (!mbsinit(&wcd->state)) {
                    252:       mbstate_t state = wcd->state;
                    253:       char buf[BUF_SIZE];
                    254:       size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
                    255:       if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
                    256:         abort();
                    257:       else {
                    258:         const char* bufptr = buf;
                    259:         size_t bufleft = bufcount-1;
                    260:         char* outptr = *outbuf;
                    261:         size_t outleft = *outbytesleft;
                    262:         size_t res = unicode_loop_convert(&wcd->parent,
                    263:                                           &bufptr,&bufleft,
                    264:                                           &outptr,&outleft);
                    265:         if (res == (size_t)(-1)) {
                    266:           if (errno == E2BIG)
                    267:             return -1;
                    268:           else
                    269:             abort();
                    270:         } else {
                    271:           res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
                    272:           if (res == (size_t)(-1))
                    273:             return res;
                    274:           else {
                    275:             /* Successful. */
                    276:             wcd->state = state;
                    277:             *outbuf = outptr;
                    278:             *outbytesleft = outleft;
                    279:             return 0;
                    280:           }
                    281:         }
                    282:       }
                    283:     } else
                    284:       return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
                    285:   }
                    286: }
                    287: 
                    288: #endif
                    289: 
                    290: 
                    291: #if HAVE_MBRTOWC
                    292: 
                    293: /* From anything else to wchar_t. */
                    294: 
                    295: #ifndef LIBICONV_PLUG
                    296: 
                    297: struct mb_to_wc_fallback_locals {
                    298:   char* l_outbuf;
                    299:   size_t l_outbytesleft;
                    300:   int l_errno;
                    301: };
                    302: 
                    303: static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen,
                    304:                                         void* callback_arg)
                    305: {
                    306:   struct mb_to_wc_fallback_locals * plocals =
                    307:     (struct mb_to_wc_fallback_locals *) callback_arg;
                    308:   /* Do nothing if already encountered an error in a previous call. */
                    309:   if (plocals->l_errno == 0) {
                    310:     /* Attempt to copy the passed buffer to the output buffer. */
                    311:     if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen)
                    312:       plocals->l_errno = E2BIG;
                    313:     else {
                    314:       for (; buflen > 0; buf++, buflen--) {
                    315:         *(wchar_t*) plocals->l_outbuf = *buf;
                    316:         plocals->l_outbuf += sizeof(wchar_t);
                    317:         plocals->l_outbytesleft -= sizeof(wchar_t);
                    318:       }
                    319:     }
                    320:   }
                    321: }
                    322: 
                    323: #endif /* !LIBICONV_PLUG */
                    324: 
                    325: static size_t wchar_to_loop_convert (iconv_t icd,
                    326:                                      const char* * inbuf, size_t *inbytesleft,
                    327:                                      char* * outbuf, size_t *outbytesleft)
                    328: {
                    329:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
                    330:   size_t result = 0;
                    331:   while (*inbytesleft > 0) {
                    332:     size_t incount;
1.1.1.2 ! misho     333:     for (incount = 1; ; ) {
        !           334:       /* Here incount <= *inbytesleft. */
1.1       misho     335:       char buf[BUF_SIZE];
                    336:       const char* inptr = *inbuf;
                    337:       size_t inleft = incount;
                    338:       char* bufptr = buf;
                    339:       size_t bufleft = BUF_SIZE;
                    340:       size_t res = unicode_loop_convert(&wcd->parent,
                    341:                                         &inptr,&inleft,
                    342:                                         &bufptr,&bufleft);
                    343:       if (res == (size_t)(-1)) {
                    344:         if (errno == EILSEQ)
                    345:           /* Invalid input. */
                    346:           return -1;
                    347:         else if (errno == EINVAL) {
                    348:           /* Incomplete input. Next try with one more input byte. */
                    349:         } else
                    350:           /* E2BIG shouldn't occur. */
                    351:           abort();
                    352:       } else {
                    353:         /* Successful conversion. */
                    354:         size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
                    355:         mbstate_t state = wcd->state;
                    356:         wchar_t wc;
                    357:         res = mbrtowc(&wc,buf,bufcount,&state);
                    358:         if (res == (size_t)(-2)) {
                    359:           /* Next try with one more input byte. */
                    360:         } else {
                    361:           if (res == (size_t)(-1)) {
                    362:             /* Invalid input. */
                    363:             if (wcd->parent.discard_ilseq) {
                    364:             }
                    365:             #ifndef LIBICONV_PLUG
                    366:             else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) {
                    367:               /* Drop the contents of buf[] accumulated so far, and instead
                    368:                  pass all queued chars to the fallback handler. */
                    369:               struct mb_to_wc_fallback_locals locals;
                    370:               locals.l_outbuf = *outbuf;
                    371:               locals.l_outbytesleft = *outbytesleft;
                    372:               locals.l_errno = 0;
                    373:               wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount,
                    374:                                                       mb_to_wc_write_replacement,
                    375:                                                       &locals,
                    376:                                                       wcd->parent.fallbacks.data);
                    377:               if (locals.l_errno != 0) {
                    378:                 errno = locals.l_errno;
                    379:                 return -1;
                    380:               }
                    381:               /* Restoring the state is not needed because it is the initial
                    382:                  state anyway: For all known locale encodings, the multibyte
                    383:                  to wchar_t conversion doesn't have shift state, and we have
                    384:                  excluded partial accumulated characters. */
                    385:               /* wcd->state = state; */
                    386:               *inbuf += incount;
                    387:               *inbytesleft -= incount;
                    388:               *outbuf = locals.l_outbuf;
                    389:               *outbytesleft = locals.l_outbytesleft;
                    390:               result += 1;
                    391:               break;
                    392:             }
                    393:             #endif
                    394:             else
                    395:               return -1;
                    396:           } else {
                    397:             if (*outbytesleft < sizeof(wchar_t)) {
                    398:               errno = E2BIG;
                    399:               return -1;
                    400:             }
                    401:             *(wchar_t*) *outbuf = wc;
                    402:             /* Restoring the state is not needed because it is the initial
                    403:                state anyway: For all known locale encodings, the multibyte
                    404:                to wchar_t conversion doesn't have shift state, and we have
                    405:                excluded partial accumulated characters. */
                    406:             /* wcd->state = state; */
                    407:             *outbuf += sizeof(wchar_t);
                    408:             *outbytesleft -= sizeof(wchar_t);
                    409:           }
                    410:           *inbuf += incount;
                    411:           *inbytesleft -= incount;
                    412:           result += res;
                    413:           break;
                    414:         }
                    415:       }
1.1.1.2 ! misho     416:       incount++;
        !           417:       if (incount > *inbytesleft) {
        !           418:         /* Incomplete input. */
        !           419:         errno = EINVAL;
        !           420:         return -1;
        !           421:       }
1.1       misho     422:     }
                    423:   }
                    424:   return result;
                    425: }
                    426: 
                    427: static size_t wchar_to_loop_reset (iconv_t icd,
                    428:                                    char* * outbuf, size_t *outbytesleft)
                    429: {
                    430:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
                    431:   size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
                    432:   if (res == (size_t)(-1))
                    433:     return res;
                    434:   memset(&wcd->state,0,sizeof(mbstate_t));
                    435:   return 0;
                    436: }
                    437: 
                    438: #endif
                    439: 
                    440: 
                    441: /* From wchar_t to wchar_t. */
                    442: 
                    443: static size_t wchar_id_loop_convert (iconv_t icd,
                    444:                                      const char* * inbuf, size_t *inbytesleft,
                    445:                                      char* * outbuf, size_t *outbytesleft)
                    446: {
                    447:   struct conv_struct * cd = (struct conv_struct *) icd;
                    448:   const wchar_t* inptr = (const wchar_t*) *inbuf;
                    449:   size_t inleft = *inbytesleft / sizeof(wchar_t);
                    450:   wchar_t* outptr = (wchar_t*) *outbuf;
                    451:   size_t outleft = *outbytesleft / sizeof(wchar_t);
                    452:   size_t count = (inleft <= outleft ? inleft : outleft);
                    453:   if (count > 0) {
                    454:     *inbytesleft -= count * sizeof(wchar_t);
                    455:     *outbytesleft -= count * sizeof(wchar_t);
                    456:     do {
                    457:       wchar_t wc = *inptr++;
                    458:       *outptr++ = wc;
                    459:       #ifndef LIBICONV_PLUG
                    460:       if (cd->hooks.wc_hook)
                    461:         (*cd->hooks.wc_hook)(wc, cd->hooks.data);
                    462:       #endif
                    463:     } while (--count > 0);
                    464:     *inbuf = (const char*) inptr;
                    465:     *outbuf = (char*) outptr;
                    466:   }
                    467:   return 0;
                    468: }
                    469: 
                    470: static size_t wchar_id_loop_reset (iconv_t icd,
                    471:                                    char* * outbuf, size_t *outbytesleft)
                    472: {
                    473:   return 0;
                    474: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>