File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / extras / iconv_string.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 22:57:48 2012 UTC (12 years, 4 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_13_1, HEAD
libiconv

    1: /* Copyright (C) 1999-2001, 2003 Bruno Haible.
    2:    This file is not part of the GNU LIBICONV Library.
    3:    This file is put into the public domain.  */
    4: 
    5: #include "iconv_string.h"
    6: #include <iconv.h>
    7: #include <errno.h>
    8: #include <stdlib.h>
    9: #include <string.h>
   10: 
   11: #define tmpbufsize 4096
   12: 
   13: int iconv_string (const char* tocode, const char* fromcode,
   14:                   const char* start, const char* end,
   15:                   char** resultp, size_t* lengthp)
   16: {
   17:   iconv_t cd = iconv_open(tocode,fromcode);
   18:   size_t length;
   19:   char* result;
   20:   if (cd == (iconv_t)(-1)) {
   21:     if (errno != EINVAL)
   22:       return -1;
   23:     /* Unsupported fromcode or tocode. Check whether the caller requested
   24:        autodetection. */
   25:     if (!strcmp(fromcode,"autodetect_utf8")) {
   26:       int ret;
   27:       /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
   28:          be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
   29:       ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp);
   30:       if (!(ret < 0 && errno == EILSEQ))
   31:         return ret;
   32:       ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp);
   33:       return ret;
   34:     }
   35:     if (!strcmp(fromcode,"autodetect_jp")) {
   36:       int ret;
   37:       /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
   38:          it will fail. */
   39:       ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp);
   40:       if (!(ret < 0 && errno == EILSEQ))
   41:         return ret;
   42:       /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
   43:          is unavoidable. People will condemn SHIFT_JIS.
   44:          If we tried SHIFT_JIS first, then some short EUC-JP inputs would
   45:          come out wrong, and people would condemn EUC-JP and Unix, which
   46:          would not be good. */
   47:       ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp);
   48:       if (!(ret < 0 && errno == EILSEQ))
   49:         return ret;
   50:       /* Finally try SHIFT_JIS. */
   51:       ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp);
   52:       return ret;
   53:     }
   54:     if (!strcmp(fromcode,"autodetect_kr")) {
   55:       int ret;
   56:       /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
   57:          it will fail. */
   58:       ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp);
   59:       if (!(ret < 0 && errno == EILSEQ))
   60:         return ret;
   61:       /* Finally try EUC-KR. */
   62:       ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp);
   63:       return ret;
   64:     }
   65:     errno = EINVAL;
   66:     return -1;
   67:   }
   68:   /* Determine the length we need. */
   69:   {
   70:     size_t count = 0;
   71:     char tmpbuf[tmpbufsize];
   72:     const char* inptr = start;
   73:     size_t insize = end-start;
   74:     while (insize > 0) {
   75:       char* outptr = tmpbuf;
   76:       size_t outsize = tmpbufsize;
   77:       size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
   78:       if (res == (size_t)(-1) && errno != E2BIG) {
   79:         if (errno == EINVAL)
   80:           break;
   81:         else {
   82:           int saved_errno = errno;
   83:           iconv_close(cd);
   84:           errno = saved_errno;
   85:           return -1;
   86:         }
   87:       }
   88:       count += outptr-tmpbuf;
   89:     }
   90:     {
   91:       char* outptr = tmpbuf;
   92:       size_t outsize = tmpbufsize;
   93:       size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
   94:       if (res == (size_t)(-1)) {
   95:         int saved_errno = errno;
   96:         iconv_close(cd);
   97:         errno = saved_errno;
   98:         return -1;
   99:       }
  100:       count += outptr-tmpbuf;
  101:     }
  102:     length = count;
  103:   }
  104:   if (lengthp != NULL)
  105:     *lengthp = length;
  106:   if (resultp == NULL) {
  107:     iconv_close(cd);
  108:     return 0;
  109:   }
  110:   result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length));
  111:   *resultp = result;
  112:   if (length == 0) {
  113:     iconv_close(cd);
  114:     return 0;
  115:   }
  116:   if (result == NULL) {
  117:     iconv_close(cd);
  118:     errno = ENOMEM;
  119:     return -1;
  120:   }
  121:   iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */
  122:   /* Do the conversion for real. */
  123:   {
  124:     const char* inptr = start;
  125:     size_t insize = end-start;
  126:     char* outptr = result;
  127:     size_t outsize = length;
  128:     while (insize > 0) {
  129:       size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
  130:       if (res == (size_t)(-1)) {
  131:         if (errno == EINVAL)
  132:           break;
  133:         else {
  134:           int saved_errno = errno;
  135:           iconv_close(cd);
  136:           errno = saved_errno;
  137:           return -1;
  138:         }
  139:       }
  140:     }
  141:     {
  142:       size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
  143:       if (res == (size_t)(-1)) {
  144:         int saved_errno = errno;
  145:         iconv_close(cd);
  146:         errno = saved_errno;
  147:         return -1;
  148:       }
  149:     }
  150:     if (outsize != 0) abort();
  151:   }
  152:   iconv_close(cd);
  153:   return 0;
  154: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>