Annotation of embedaddon/libiconv/lib/loop_wchar.h, revision 1.1

1.1     ! misho       1: /*
        !             2:  * Copyright (C) 2000-2002, 2005-2006, 2008 Free Software Foundation, Inc.
        !             3:  * This file is part of the GNU LIBICONV Library.
        !             4:  *
        !             5:  * The GNU LIBICONV Library is free software; you can redistribute it
        !             6:  * and/or modify it under the terms of the GNU Library General Public
        !             7:  * License as published by the Free Software Foundation; either version 2
        !             8:  * of the License, or (at your option) any later version.
        !             9:  *
        !            10:  * The GNU LIBICONV Library is distributed in the hope that it will be
        !            11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            13:  * Library General Public License for more details.
        !            14:  *
        !            15:  * You should have received a copy of the GNU Library General Public
        !            16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
        !            17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
        !            18:  * Fifth Floor, Boston, MA 02110-1301, USA.
        !            19:  */
        !            20: 
        !            21: /* This file defines three conversion loops:
        !            22:      - from wchar_t to anything else,
        !            23:      - from anything else to wchar_t,
        !            24:      - from wchar_t to wchar_t.
        !            25:  */
        !            26: 
        !            27: #if HAVE_WCRTOMB || HAVE_MBRTOWC
        !            28: # include <wchar.h>
        !            29: # define BUF_SIZE 64  /* assume MB_LEN_MAX <= 64 */
        !            30:   /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
        !            31:   extern size_t mbrtowc ();
        !            32: # ifdef mbstate_t
        !            33: #  define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
        !            34: #  define mbsinit(ps) 1
        !            35: # endif
        !            36: # ifndef mbsinit
        !            37: #  if !HAVE_MBSINIT
        !            38: #   define mbsinit(ps) 1
        !            39: #  endif
        !            40: # endif
        !            41: #endif
        !            42: 
        !            43: /*
        !            44:  * The first two conversion loops have an extended conversion descriptor.
        !            45:  */
        !            46: struct wchar_conv_struct {
        !            47:   struct conv_struct parent;
        !            48: #if HAVE_WCRTOMB || HAVE_MBRTOWC
        !            49:   mbstate_t state;
        !            50: #endif
        !            51: };
        !            52: 
        !            53: 
        !            54: #if HAVE_WCRTOMB
        !            55: 
        !            56: /* From wchar_t to anything else. */
        !            57: 
        !            58: #ifndef LIBICONV_PLUG
        !            59: 
        !            60: #if 0
        !            61: 
        !            62: struct wc_to_mb_fallback_locals {
        !            63:   struct wchar_conv_struct * l_wcd;
        !            64:   char* l_outbuf;
        !            65:   size_t l_outbytesleft;
        !            66:   int l_errno;
        !            67: };
        !            68: 
        !            69: /* A callback that writes a string given in the locale encoding. */
        !            70: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
        !            71:                                         void* callback_arg)
        !            72: {
        !            73:   struct wc_to_mb_fallback_locals * plocals =
        !            74:     (struct wc_to_mb_fallback_locals *) callback_arg;
        !            75:   /* Do nothing if already encountered an error in a previous call. */
        !            76:   if (plocals->l_errno == 0) {
        !            77:     /* Attempt to convert the passed buffer to the target encoding.
        !            78:        Here we don't support characters split across multiple calls. */
        !            79:     const char* bufptr = buf;
        !            80:     size_t bufleft = buflen;
        !            81:     size_t res = unicode_loop_convert(&plocals->l_wcd->parent,
        !            82:                                       &bufptr,&bufleft,
        !            83:                                       &plocals->l_outbuf,&plocals->l_outbytesleft);
        !            84:     if (res == (size_t)(-1)) {
        !            85:       if (errno == EILSEQ || errno == EINVAL)
        !            86:         /* Invalid buf contents. */
        !            87:         plocals->l_errno = EILSEQ;
        !            88:       else if (errno == E2BIG)
        !            89:         /* Output buffer too small. */
        !            90:         plocals->l_errno = E2BIG;
        !            91:       else 
        !            92:         abort();
        !            93:     } else {
        !            94:       /* Successful conversion. */
        !            95:       if (bufleft > 0)
        !            96:         abort();
        !            97:     }
        !            98:   }
        !            99: }
        !           100: 
        !           101: #else
        !           102: 
        !           103: struct wc_to_mb_fallback_locals {
        !           104:   char* l_outbuf;
        !           105:   size_t l_outbytesleft;
        !           106:   int l_errno;
        !           107: };
        !           108: 
        !           109: /* A callback that writes a string given in the target encoding. */
        !           110: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
        !           111:                                         void* callback_arg)
        !           112: {
        !           113:   struct wc_to_mb_fallback_locals * plocals =
        !           114:     (struct wc_to_mb_fallback_locals *) callback_arg;
        !           115:   /* Do nothing if already encountered an error in a previous call. */
        !           116:   if (plocals->l_errno == 0) {
        !           117:     /* Attempt to copy the passed buffer to the output buffer. */
        !           118:     if (plocals->l_outbytesleft < buflen)
        !           119:       plocals->l_errno = E2BIG;
        !           120:     else {
        !           121:       memcpy(plocals->l_outbuf, buf, buflen);
        !           122:       plocals->l_outbuf += buflen;
        !           123:       plocals->l_outbytesleft -= buflen;
        !           124:     }
        !           125:   }
        !           126: }
        !           127: 
        !           128: #endif
        !           129: 
        !           130: #endif /* !LIBICONV_PLUG */
        !           131: 
        !           132: static size_t wchar_from_loop_convert (iconv_t icd,
        !           133:                                        const char* * inbuf, size_t *inbytesleft,
        !           134:                                        char* * outbuf, size_t *outbytesleft)
        !           135: {
        !           136:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
        !           137:   size_t result = 0;
        !           138:   while (*inbytesleft >= sizeof(wchar_t)) {
        !           139:     const wchar_t * inptr = (const wchar_t *) *inbuf;
        !           140:     size_t inleft = *inbytesleft;
        !           141:     char buf[BUF_SIZE];
        !           142:     mbstate_t state = wcd->state;
        !           143:     size_t bufcount = 0;
        !           144:     while (inleft >= sizeof(wchar_t)) {
        !           145:       /* Convert one wchar_t to multibyte representation. */
        !           146:       size_t count = wcrtomb(buf+bufcount,*inptr,&state);
        !           147:       if (count == (size_t)(-1)) {
        !           148:         /* Invalid input. */
        !           149:         if (wcd->parent.discard_ilseq) {
        !           150:           count = 0;
        !           151:         }
        !           152:         #ifndef LIBICONV_PLUG
        !           153:         else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) {
        !           154:           /* Drop the contents of buf[] accumulated so far, and instead
        !           155:              pass all queued wide characters to the fallback handler. */
        !           156:           struct wc_to_mb_fallback_locals locals;
        !           157:           const wchar_t * fallback_inptr;
        !           158:           #if 0
        !           159:           locals.l_wcd = wcd;
        !           160:           #endif
        !           161:           locals.l_outbuf = *outbuf;
        !           162:           locals.l_outbytesleft = *outbytesleft;
        !           163:           locals.l_errno = 0;
        !           164:           for (fallback_inptr = (const wchar_t *) *inbuf;
        !           165:                fallback_inptr <= inptr;
        !           166:                fallback_inptr++)
        !           167:             wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr,
        !           168:                                                     wc_to_mb_write_replacement,
        !           169:                                                     &locals,
        !           170:                                                     wcd->parent.fallbacks.data);
        !           171:           if (locals.l_errno != 0) {
        !           172:             errno = locals.l_errno;
        !           173:             return -1;
        !           174:           }
        !           175:           wcd->state = state;
        !           176:           *inbuf = (const char *) (inptr + 1);
        !           177:           *inbytesleft = inleft - sizeof(wchar_t);
        !           178:           *outbuf = locals.l_outbuf;
        !           179:           *outbytesleft = locals.l_outbytesleft;
        !           180:           result += 1;
        !           181:           break;
        !           182:         }
        !           183:         #endif
        !           184:         else {
        !           185:           errno = EILSEQ;
        !           186:           return -1;
        !           187:         }
        !           188:       }
        !           189:       inptr++;
        !           190:       inleft -= sizeof(wchar_t);
        !           191:       bufcount += count;
        !           192:       if (count == 0) {
        !           193:         /* Continue, append next wchar_t. */
        !           194:       } else {
        !           195:         /* Attempt to convert the accumulated multibyte representations
        !           196:            to the target encoding. */
        !           197:         const char* bufptr = buf;
        !           198:         size_t bufleft = bufcount;
        !           199:         char* outptr = *outbuf;
        !           200:         size_t outleft = *outbytesleft;
        !           201:         size_t res = unicode_loop_convert(&wcd->parent,
        !           202:                                           &bufptr,&bufleft,
        !           203:                                           &outptr,&outleft);
        !           204:         if (res == (size_t)(-1)) {
        !           205:           if (errno == EILSEQ)
        !           206:             /* Invalid input. */
        !           207:             return -1;
        !           208:           else if (errno == E2BIG)
        !           209:             /* Output buffer too small. */
        !           210:             return -1;
        !           211:           else if (errno == EINVAL) {
        !           212:             /* Continue, append next wchar_t, but avoid buffer overrun. */
        !           213:             if (bufcount + MB_CUR_MAX > BUF_SIZE)
        !           214:               abort();
        !           215:           } else
        !           216:             abort();
        !           217:         } else {
        !           218:           /* Successful conversion. */
        !           219:           wcd->state = state;
        !           220:           *inbuf = (const char *) inptr;
        !           221:           *inbytesleft = inleft;
        !           222:           *outbuf = outptr;
        !           223:           *outbytesleft = outleft;
        !           224:           result += res;
        !           225:           break;
        !           226:         }
        !           227:       }
        !           228:     }
        !           229:   }
        !           230:   return result;
        !           231: }
        !           232: 
        !           233: static size_t wchar_from_loop_reset (iconv_t icd,
        !           234:                                      char* * outbuf, size_t *outbytesleft)
        !           235: {
        !           236:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
        !           237:   if (outbuf == NULL || *outbuf == NULL) {
        !           238:     /* Reset the states. */
        !           239:     memset(&wcd->state,'\0',sizeof(mbstate_t));
        !           240:     return unicode_loop_reset(&wcd->parent,NULL,NULL);
        !           241:   } else {
        !           242:     if (!mbsinit(&wcd->state)) {
        !           243:       mbstate_t state = wcd->state;
        !           244:       char buf[BUF_SIZE];
        !           245:       size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
        !           246:       if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
        !           247:         abort();
        !           248:       else {
        !           249:         const char* bufptr = buf;
        !           250:         size_t bufleft = bufcount-1;
        !           251:         char* outptr = *outbuf;
        !           252:         size_t outleft = *outbytesleft;
        !           253:         size_t res = unicode_loop_convert(&wcd->parent,
        !           254:                                           &bufptr,&bufleft,
        !           255:                                           &outptr,&outleft);
        !           256:         if (res == (size_t)(-1)) {
        !           257:           if (errno == E2BIG)
        !           258:             return -1;
        !           259:           else
        !           260:             abort();
        !           261:         } else {
        !           262:           res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
        !           263:           if (res == (size_t)(-1))
        !           264:             return res;
        !           265:           else {
        !           266:             /* Successful. */
        !           267:             wcd->state = state;
        !           268:             *outbuf = outptr;
        !           269:             *outbytesleft = outleft;
        !           270:             return 0;
        !           271:           }
        !           272:         }
        !           273:       }
        !           274:     } else
        !           275:       return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
        !           276:   }
        !           277: }
        !           278: 
        !           279: #endif
        !           280: 
        !           281: 
        !           282: #if HAVE_MBRTOWC
        !           283: 
        !           284: /* From anything else to wchar_t. */
        !           285: 
        !           286: #ifndef LIBICONV_PLUG
        !           287: 
        !           288: struct mb_to_wc_fallback_locals {
        !           289:   char* l_outbuf;
        !           290:   size_t l_outbytesleft;
        !           291:   int l_errno;
        !           292: };
        !           293: 
        !           294: static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen,
        !           295:                                         void* callback_arg)
        !           296: {
        !           297:   struct mb_to_wc_fallback_locals * plocals =
        !           298:     (struct mb_to_wc_fallback_locals *) callback_arg;
        !           299:   /* Do nothing if already encountered an error in a previous call. */
        !           300:   if (plocals->l_errno == 0) {
        !           301:     /* Attempt to copy the passed buffer to the output buffer. */
        !           302:     if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen)
        !           303:       plocals->l_errno = E2BIG;
        !           304:     else {
        !           305:       for (; buflen > 0; buf++, buflen--) {
        !           306:         *(wchar_t*) plocals->l_outbuf = *buf;
        !           307:         plocals->l_outbuf += sizeof(wchar_t);
        !           308:         plocals->l_outbytesleft -= sizeof(wchar_t);
        !           309:       }
        !           310:     }
        !           311:   }
        !           312: }
        !           313: 
        !           314: #endif /* !LIBICONV_PLUG */
        !           315: 
        !           316: static size_t wchar_to_loop_convert (iconv_t icd,
        !           317:                                      const char* * inbuf, size_t *inbytesleft,
        !           318:                                      char* * outbuf, size_t *outbytesleft)
        !           319: {
        !           320:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
        !           321:   size_t result = 0;
        !           322:   while (*inbytesleft > 0) {
        !           323:     size_t incount;
        !           324:     for (incount = 1; incount <= *inbytesleft; incount++) {
        !           325:       char buf[BUF_SIZE];
        !           326:       const char* inptr = *inbuf;
        !           327:       size_t inleft = incount;
        !           328:       char* bufptr = buf;
        !           329:       size_t bufleft = BUF_SIZE;
        !           330:       size_t res = unicode_loop_convert(&wcd->parent,
        !           331:                                         &inptr,&inleft,
        !           332:                                         &bufptr,&bufleft);
        !           333:       if (res == (size_t)(-1)) {
        !           334:         if (errno == EILSEQ)
        !           335:           /* Invalid input. */
        !           336:           return -1;
        !           337:         else if (errno == EINVAL) {
        !           338:           /* Incomplete input. Next try with one more input byte. */
        !           339:         } else
        !           340:           /* E2BIG shouldn't occur. */
        !           341:           abort();
        !           342:       } else {
        !           343:         /* Successful conversion. */
        !           344:         size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
        !           345:         mbstate_t state = wcd->state;
        !           346:         wchar_t wc;
        !           347:         res = mbrtowc(&wc,buf,bufcount,&state);
        !           348:         if (res == (size_t)(-2)) {
        !           349:           /* Next try with one more input byte. */
        !           350:         } else {
        !           351:           if (res == (size_t)(-1)) {
        !           352:             /* Invalid input. */
        !           353:             if (wcd->parent.discard_ilseq) {
        !           354:             }
        !           355:             #ifndef LIBICONV_PLUG
        !           356:             else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) {
        !           357:               /* Drop the contents of buf[] accumulated so far, and instead
        !           358:                  pass all queued chars to the fallback handler. */
        !           359:               struct mb_to_wc_fallback_locals locals;
        !           360:               locals.l_outbuf = *outbuf;
        !           361:               locals.l_outbytesleft = *outbytesleft;
        !           362:               locals.l_errno = 0;
        !           363:               wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount,
        !           364:                                                       mb_to_wc_write_replacement,
        !           365:                                                       &locals,
        !           366:                                                       wcd->parent.fallbacks.data);
        !           367:               if (locals.l_errno != 0) {
        !           368:                 errno = locals.l_errno;
        !           369:                 return -1;
        !           370:               }
        !           371:               /* Restoring the state is not needed because it is the initial
        !           372:                  state anyway: For all known locale encodings, the multibyte
        !           373:                  to wchar_t conversion doesn't have shift state, and we have
        !           374:                  excluded partial accumulated characters. */
        !           375:               /* wcd->state = state; */
        !           376:               *inbuf += incount;
        !           377:               *inbytesleft -= incount;
        !           378:               *outbuf = locals.l_outbuf;
        !           379:               *outbytesleft = locals.l_outbytesleft;
        !           380:               result += 1;
        !           381:               break;
        !           382:             }
        !           383:             #endif
        !           384:             else
        !           385:               return -1;
        !           386:           } else {
        !           387:             if (*outbytesleft < sizeof(wchar_t)) {
        !           388:               errno = E2BIG;
        !           389:               return -1;
        !           390:             }
        !           391:             *(wchar_t*) *outbuf = wc;
        !           392:             /* Restoring the state is not needed because it is the initial
        !           393:                state anyway: For all known locale encodings, the multibyte
        !           394:                to wchar_t conversion doesn't have shift state, and we have
        !           395:                excluded partial accumulated characters. */
        !           396:             /* wcd->state = state; */
        !           397:             *outbuf += sizeof(wchar_t);
        !           398:             *outbytesleft -= sizeof(wchar_t);
        !           399:           }
        !           400:           *inbuf += incount;
        !           401:           *inbytesleft -= incount;
        !           402:           result += res;
        !           403:           break;
        !           404:         }
        !           405:       }
        !           406:     }
        !           407:   }
        !           408:   return result;
        !           409: }
        !           410: 
        !           411: static size_t wchar_to_loop_reset (iconv_t icd,
        !           412:                                    char* * outbuf, size_t *outbytesleft)
        !           413: {
        !           414:   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
        !           415:   size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
        !           416:   if (res == (size_t)(-1))
        !           417:     return res;
        !           418:   memset(&wcd->state,0,sizeof(mbstate_t));
        !           419:   return 0;
        !           420: }
        !           421: 
        !           422: #endif
        !           423: 
        !           424: 
        !           425: /* From wchar_t to wchar_t. */
        !           426: 
        !           427: static size_t wchar_id_loop_convert (iconv_t icd,
        !           428:                                      const char* * inbuf, size_t *inbytesleft,
        !           429:                                      char* * outbuf, size_t *outbytesleft)
        !           430: {
        !           431:   struct conv_struct * cd = (struct conv_struct *) icd;
        !           432:   const wchar_t* inptr = (const wchar_t*) *inbuf;
        !           433:   size_t inleft = *inbytesleft / sizeof(wchar_t);
        !           434:   wchar_t* outptr = (wchar_t*) *outbuf;
        !           435:   size_t outleft = *outbytesleft / sizeof(wchar_t);
        !           436:   size_t count = (inleft <= outleft ? inleft : outleft);
        !           437:   if (count > 0) {
        !           438:     *inbytesleft -= count * sizeof(wchar_t);
        !           439:     *outbytesleft -= count * sizeof(wchar_t);
        !           440:     do {
        !           441:       wchar_t wc = *inptr++;
        !           442:       *outptr++ = wc;
        !           443:       #ifndef LIBICONV_PLUG
        !           444:       if (cd->hooks.wc_hook)
        !           445:         (*cd->hooks.wc_hook)(wc, cd->hooks.data);
        !           446:       #endif
        !           447:     } while (--count > 0);
        !           448:     *inbuf = (const char*) inptr;
        !           449:     *outbuf = (char*) outptr;
        !           450:   }
        !           451:   return 0;
        !           452: }
        !           453: 
        !           454: static size_t wchar_id_loop_reset (iconv_t icd,
        !           455:                                    char* * outbuf, size_t *outbytesleft)
        !           456: {
        !           457:   return 0;
        !           458: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>