File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / extras / iconv_string.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue May 29 09:29:43 2012 UTC (12 years, 7 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, v1_14p0, v1_14, HEAD
libiconv v1.14

    1: /* Copyright (C) 1999-2001, 2003, 2011 Bruno Haible.
    2:    This file is not part of the GNU LIBICONV Library.
    3:    This file is put into the public domain.  */
    4: 
    5: #include "iconv_string.h"
    6: #include <iconv.h>
    7: #include <errno.h>
    8: #include <stdlib.h>
    9: #include <string.h>
   10: 
   11: #define tmpbufsize 4096
   12: 
   13: int iconv_string (const char* tocode, const char* fromcode,
   14:                   const char* start, const char* end,
   15:                   char** resultp, size_t* lengthp)
   16: {
   17:   iconv_t cd = iconv_open(tocode,fromcode);
   18:   size_t length;
   19:   char* result;
   20:   if (cd == (iconv_t)(-1)) {
   21:     if (errno != EINVAL)
   22:       return -1;
   23:     /* Unsupported fromcode or tocode. Check whether the caller requested
   24:        autodetection. */
   25:     if (!strcmp(fromcode,"autodetect_utf8")) {
   26:       int ret;
   27:       /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
   28:          be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
   29:       ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp);
   30:       if (!(ret < 0 && errno == EILSEQ))
   31:         return ret;
   32:       ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp);
   33:       return ret;
   34:     }
   35:     if (!strcmp(fromcode,"autodetect_jp")) {
   36:       int ret;
   37:       /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
   38:          it will fail. */
   39:       ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp);
   40:       if (!(ret < 0 && errno == EILSEQ))
   41:         return ret;
   42:       /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
   43:          is unavoidable. People will condemn SHIFT_JIS.
   44:          If we tried SHIFT_JIS first, then some short EUC-JP inputs would
   45:          come out wrong, and people would condemn EUC-JP and Unix, which
   46:          would not be good. */
   47:       ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp);
   48:       if (!(ret < 0 && errno == EILSEQ))
   49:         return ret;
   50:       /* Finally try SHIFT_JIS. */
   51:       ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp);
   52:       return ret;
   53:     }
   54:     if (!strcmp(fromcode,"autodetect_kr")) {
   55:       int ret;
   56:       /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
   57:          it will fail. */
   58:       ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp);
   59:       if (!(ret < 0 && errno == EILSEQ))
   60:         return ret;
   61:       /* Finally try EUC-KR. */
   62:       ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp);
   63:       return ret;
   64:     }
   65:     errno = EINVAL;
   66:     return -1;
   67:   }
   68:   /* Determine the length we need. */
   69:   {
   70:     size_t count = 0;
   71:     char tmpbuf[tmpbufsize];
   72:     const char* inptr = start;
   73:     size_t insize = end-start;
   74:     while (insize > 0) {
   75:       char* outptr = tmpbuf;
   76:       size_t outsize = tmpbufsize;
   77:       size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
   78:       if (res == (size_t)(-1) && errno != E2BIG) {
   79:         int saved_errno = (errno == EINVAL ? EILSEQ : errno);
   80:         iconv_close(cd);
   81:         errno = saved_errno;
   82:         return -1;
   83:       }
   84:       count += outptr-tmpbuf;
   85:     }
   86:     {
   87:       char* outptr = tmpbuf;
   88:       size_t outsize = tmpbufsize;
   89:       size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
   90:       if (res == (size_t)(-1)) {
   91:         int saved_errno = errno;
   92:         iconv_close(cd);
   93:         errno = saved_errno;
   94:         return -1;
   95:       }
   96:       count += outptr-tmpbuf;
   97:     }
   98:     length = count;
   99:   }
  100:   if (lengthp != NULL)
  101:     *lengthp = length;
  102:   if (resultp == NULL) {
  103:     iconv_close(cd);
  104:     return 0;
  105:   }
  106:   result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length));
  107:   *resultp = result;
  108:   if (length == 0) {
  109:     iconv_close(cd);
  110:     return 0;
  111:   }
  112:   if (result == NULL) {
  113:     iconv_close(cd);
  114:     errno = ENOMEM;
  115:     return -1;
  116:   }
  117:   iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */
  118:   /* Do the conversion for real. */
  119:   {
  120:     const char* inptr = start;
  121:     size_t insize = end-start;
  122:     char* outptr = result;
  123:     size_t outsize = length;
  124:     while (insize > 0) {
  125:       size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
  126:       if (res == (size_t)(-1)) {
  127:         if (errno == EINVAL)
  128:           break;
  129:         else {
  130:           int saved_errno = errno;
  131:           iconv_close(cd);
  132:           errno = saved_errno;
  133:           return -1;
  134:         }
  135:       }
  136:     }
  137:     {
  138:       size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
  139:       if (res == (size_t)(-1)) {
  140:         int saved_errno = errno;
  141:         iconv_close(cd);
  142:         errno = saved_errno;
  143:         return -1;
  144:       }
  145:     }
  146:     if (outsize != 0) abort();
  147:   }
  148:   iconv_close(cd);
  149:   return 0;
  150: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>