File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iso2022_cn.h
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 22:57:48 2012 UTC (12 years, 5 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_14p0, v1_14, v1_13_1, HEAD
libiconv

    1: /*
    2:  * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
   18:  * Fifth Floor, Boston, MA 02110-1301, USA.
   19:  */
   20: 
   21: /*
   22:  * ISO-2022-CN
   23:  */
   24: 
   25: /* Specification: RFC 1922 */
   26: 
   27: #define ESC 0x1b
   28: #define SO  0x0e
   29: #define SI  0x0f
   30: 
   31: /*
   32:  * The state is composed of one of the following values
   33:  */
   34: #define STATE_ASCII          0
   35: #define STATE_TWOBYTE        1
   36: /*
   37:  * and one of the following values, << 8
   38:  */
   39: #define STATE2_NONE                   0
   40: #define STATE2_DESIGNATED_GB2312      1
   41: #define STATE2_DESIGNATED_CNS11643_1  2
   42: /*
   43:  * and one of the following values, << 16
   44:  */
   45: #define STATE3_NONE                   0
   46: #define STATE3_DESIGNATED_CNS11643_2  1
   47: 
   48: #define SPLIT_STATE \
   49:   unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16
   50: #define COMBINE_STATE \
   51:   state = (state3 << 16) | (state2 << 8) | state1
   52: 
   53: static int
   54: iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
   55: {
   56:   state_t state = conv->istate;
   57:   SPLIT_STATE;
   58:   int count = 0;
   59:   unsigned char c;
   60:   for (;;) {
   61:     c = *s;
   62:     if (c == ESC) {
   63:       if (n < count+4)
   64:         goto none;
   65:       if (s[1] == '$') {
   66:         if (s[2] == ')') {
   67:           if (s[3] == 'A') {
   68:             state2 = STATE2_DESIGNATED_GB2312;
   69:             s += 4; count += 4;
   70:             if (n < count+1)
   71:               goto none;
   72:             continue;
   73:           }
   74:           if (s[3] == 'G') {
   75:             state2 = STATE2_DESIGNATED_CNS11643_1;
   76:             s += 4; count += 4;
   77:             if (n < count+1)
   78:               goto none;
   79:             continue;
   80:           }
   81:         }
   82:         if (s[2] == '*') {
   83:           if (s[3] == 'H') {
   84:             state3 = STATE3_DESIGNATED_CNS11643_2;
   85:             s += 4; count += 4;
   86:             if (n < count+1)
   87:               goto none;
   88:             continue;
   89:           }
   90:         }
   91:       }
   92:       if (s[1] == 'N') {
   93:         switch (state3) {
   94:           case STATE3_NONE:
   95:             goto ilseq;
   96:           case STATE3_DESIGNATED_CNS11643_2:
   97:             if (s[2] < 0x80 && s[3] < 0x80) {
   98:               int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
   99:               if (ret == RET_ILSEQ)
  100:                 goto ilseq;
  101:               if (ret != 2) abort();
  102:               COMBINE_STATE;
  103:               conv->istate = state;
  104:               return count+4;
  105:             } else
  106:               goto ilseq;
  107:           default: abort();
  108:         }
  109:       }
  110:       goto ilseq;
  111:     }
  112:     if (c == SO) {
  113:       if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1)
  114:         goto ilseq;
  115:       state1 = STATE_TWOBYTE;
  116:       s++; count++;
  117:       if (n < count+1)
  118:         goto none;
  119:       continue;
  120:     }
  121:     if (c == SI) {
  122:       state1 = STATE_ASCII;
  123:       s++; count++;
  124:       if (n < count+1)
  125:         goto none;
  126:       continue;
  127:     }
  128:     break;
  129:   }
  130:   switch (state1) {
  131:     case STATE_ASCII:
  132:       if (c < 0x80) {
  133:         int ret = ascii_mbtowc(conv,pwc,s,1);
  134:         if (ret == RET_ILSEQ)
  135:           goto ilseq;
  136:         if (ret != 1) abort();
  137:         if (*pwc == 0x000a || *pwc == 0x000d) {
  138:           state2 = STATE2_NONE; state3 = STATE3_NONE;
  139:         }
  140:         COMBINE_STATE;
  141:         conv->istate = state;
  142:         return count+1;
  143:       } else
  144:         goto ilseq;
  145:     case STATE_TWOBYTE:
  146:       if (n < count+2)
  147:         goto none;
  148:       if (s[0] < 0x80 && s[1] < 0x80) {
  149:         int ret;
  150:         switch (state2) {
  151:           case STATE2_NONE:
  152:             goto ilseq;
  153:           case STATE2_DESIGNATED_GB2312:
  154:             ret = gb2312_mbtowc(conv,pwc,s,2); break;
  155:           case STATE2_DESIGNATED_CNS11643_1:
  156:             ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
  157:           default: abort();
  158:         }
  159:         if (ret == RET_ILSEQ)
  160:           goto ilseq;
  161:         if (ret != 2) abort();
  162:         COMBINE_STATE;
  163:         conv->istate = state;
  164:         return count+2;
  165:       } else
  166:         goto ilseq;
  167:     default: abort();
  168:   }
  169: 
  170: none:
  171:   COMBINE_STATE;
  172:   conv->istate = state;
  173:   return RET_TOOFEW(count);
  174: 
  175: ilseq:
  176:   COMBINE_STATE;
  177:   conv->istate = state;
  178:   return RET_SHIFT_ILSEQ(count);
  179: }
  180: 
  181: static int
  182: iso2022_cn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
  183: {
  184:   state_t state = conv->ostate;
  185:   SPLIT_STATE;
  186:   unsigned char buf[3];
  187:   int ret;
  188: 
  189:   /* There is no need to handle Unicode 3.1 tag characters and to look for
  190:      "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
  191: 
  192:   /* Try ASCII. */
  193:   ret = ascii_wctomb(conv,buf,wc,1);
  194:   if (ret != RET_ILUNI) {
  195:     if (ret != 1) abort();
  196:     if (buf[0] < 0x80) {
  197:       int count = (state1 == STATE_ASCII ? 1 : 2);
  198:       if (n < count)
  199:         return RET_TOOSMALL;
  200:       if (state1 != STATE_ASCII) {
  201:         r[0] = SI;
  202:         r += 1;
  203:         state1 = STATE_ASCII;
  204:       }
  205:       r[0] = buf[0];
  206:       if (wc == 0x000a || wc == 0x000d) {
  207:         state2 = STATE2_NONE; state3 = STATE3_NONE;
  208:       }
  209:       COMBINE_STATE;
  210:       conv->ostate = state;
  211:       return count;
  212:     }
  213:   }
  214: 
  215:   /* Try GB 2312-1980. */
  216:   ret = gb2312_wctomb(conv,buf,wc,2);
  217:   if (ret != RET_ILUNI) {
  218:     if (ret != 2) abort();
  219:     if (buf[0] < 0x80 && buf[1] < 0x80) {
  220:       int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  221:       if (n < count)
  222:         return RET_TOOSMALL;
  223:       if (state2 != STATE2_DESIGNATED_GB2312) {
  224:         r[0] = ESC;
  225:         r[1] = '$';
  226:         r[2] = ')';
  227:         r[3] = 'A';
  228:         r += 4;
  229:         state2 = STATE2_DESIGNATED_GB2312;
  230:       }
  231:       if (state1 != STATE_TWOBYTE) {
  232:         r[0] = SO;
  233:         r += 1;
  234:         state1 = STATE_TWOBYTE;
  235:       }
  236:       r[0] = buf[0];
  237:       r[1] = buf[1];
  238:       COMBINE_STATE;
  239:       conv->ostate = state;
  240:       return count;
  241:     }
  242:   }
  243: 
  244:   ret = cns11643_wctomb(conv,buf,wc,3);
  245:   if (ret != RET_ILUNI) {
  246:     if (ret != 3) abort();
  247: 
  248:     /* Try CNS 11643-1992 Plane 1. */
  249:     if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
  250:       int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  251:       if (n < count)
  252:         return RET_TOOSMALL;
  253:       if (state2 != STATE2_DESIGNATED_CNS11643_1) {
  254:         r[0] = ESC;
  255:         r[1] = '$';
  256:         r[2] = ')';
  257:         r[3] = 'G';
  258:         r += 4;
  259:         state2 = STATE2_DESIGNATED_CNS11643_1;
  260:       }
  261:       if (state1 != STATE_TWOBYTE) {
  262:         r[0] = SO;
  263:         r += 1;
  264:         state1 = STATE_TWOBYTE;
  265:       }
  266:       r[0] = buf[1];
  267:       r[1] = buf[2];
  268:       COMBINE_STATE;
  269:       conv->ostate = state;
  270:       return count;
  271:     }
  272: 
  273:     /* Try CNS 11643-1992 Plane 2. */
  274:     if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
  275:       int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
  276:       if (n < count)
  277:         return RET_TOOSMALL;
  278:       if (state3 != STATE3_DESIGNATED_CNS11643_2) {
  279:         r[0] = ESC;
  280:         r[1] = '$';
  281:         r[2] = '*';
  282:         r[3] = 'H';
  283:         r += 4;
  284:         state3 = STATE3_DESIGNATED_CNS11643_2;
  285:       }
  286:       r[0] = ESC;
  287:       r[1] = 'N';
  288:       r[2] = buf[1];
  289:       r[3] = buf[2];
  290:       COMBINE_STATE;
  291:       conv->ostate = state;
  292:       return count;
  293:     }
  294:   }
  295: 
  296:   return RET_ILUNI;
  297: }
  298: 
  299: static int
  300: iso2022_cn_reset (conv_t conv, unsigned char *r, int n)
  301: {
  302:   state_t state = conv->ostate;
  303:   SPLIT_STATE;
  304:   (void)state2;
  305:   (void)state3;
  306:   if (state1 != STATE_ASCII) {
  307:     if (n < 1)
  308:       return RET_TOOSMALL;
  309:     r[0] = SI;
  310:     /* conv->ostate = 0; will be done by the caller */
  311:     return 1;
  312:   } else
  313:     return 0;
  314: }
  315: 
  316: #undef COMBINE_STATE
  317: #undef SPLIT_STATE
  318: #undef STATE3_DESIGNATED_CNS11643_2
  319: #undef STATE3_NONE
  320: #undef STATE2_DESIGNATED_CNS11643_1
  321: #undef STATE2_DESIGNATED_GB2312
  322: #undef STATE2_NONE
  323: #undef STATE_TWOBYTE
  324: #undef STATE_ASCII

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>