File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iso2022_cn.h
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (3 years, 3 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD
libiconv 1.16

    1: /*
    2:  * Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, see <https://www.gnu.org/licenses/>.
   18:  */
   19: 
   20: /*
   21:  * ISO-2022-CN
   22:  */
   23: 
   24: /* Specification: RFC 1922 */
   25: 
   26: #define ESC 0x1b
   27: #define SO  0x0e
   28: #define SI  0x0f
   29: 
   30: /*
   31:  * The state is composed of one of the following values
   32:  */
   33: #define STATE_ASCII          0
   34: #define STATE_TWOBYTE        1
   35: /*
   36:  * and one of the following values, << 8
   37:  */
   38: #define STATE2_NONE                   0
   39: #define STATE2_DESIGNATED_GB2312      1
   40: #define STATE2_DESIGNATED_CNS11643_1  2
   41: /*
   42:  * and one of the following values, << 16
   43:  */
   44: #define STATE3_NONE                   0
   45: #define STATE3_DESIGNATED_CNS11643_2  1
   46: 
   47: #define SPLIT_STATE \
   48:   unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16
   49: #define COMBINE_STATE \
   50:   state = (state3 << 16) | (state2 << 8) | state1
   51: 
   52: static int
   53: iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
   54: {
   55:   state_t state = conv->istate;
   56:   SPLIT_STATE;
   57:   int count = 0;
   58:   unsigned char c;
   59:   for (;;) {
   60:     c = *s;
   61:     if (c == ESC) {
   62:       if (n < count+4)
   63:         goto none;
   64:       if (s[1] == '$') {
   65:         if (s[2] == ')') {
   66:           if (s[3] == 'A') {
   67:             state2 = STATE2_DESIGNATED_GB2312;
   68:             s += 4; count += 4;
   69:             if (n < count+1)
   70:               goto none;
   71:             continue;
   72:           }
   73:           if (s[3] == 'G') {
   74:             state2 = STATE2_DESIGNATED_CNS11643_1;
   75:             s += 4; count += 4;
   76:             if (n < count+1)
   77:               goto none;
   78:             continue;
   79:           }
   80:         }
   81:         if (s[2] == '*') {
   82:           if (s[3] == 'H') {
   83:             state3 = STATE3_DESIGNATED_CNS11643_2;
   84:             s += 4; count += 4;
   85:             if (n < count+1)
   86:               goto none;
   87:             continue;
   88:           }
   89:         }
   90:       }
   91:       if (s[1] == 'N') {
   92:         switch (state3) {
   93:           case STATE3_NONE:
   94:             goto ilseq;
   95:           case STATE3_DESIGNATED_CNS11643_2:
   96:             if (s[2] < 0x80 && s[3] < 0x80) {
   97:               int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
   98:               if (ret == RET_ILSEQ)
   99:                 goto ilseq;
  100:               if (ret != 2) abort();
  101:               COMBINE_STATE;
  102:               conv->istate = state;
  103:               return count+4;
  104:             } else
  105:               goto ilseq;
  106:           default: abort();
  107:         }
  108:       }
  109:       goto ilseq;
  110:     }
  111:     if (c == SO) {
  112:       if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1)
  113:         goto ilseq;
  114:       state1 = STATE_TWOBYTE;
  115:       s++; count++;
  116:       if (n < count+1)
  117:         goto none;
  118:       continue;
  119:     }
  120:     if (c == SI) {
  121:       state1 = STATE_ASCII;
  122:       s++; count++;
  123:       if (n < count+1)
  124:         goto none;
  125:       continue;
  126:     }
  127:     break;
  128:   }
  129:   switch (state1) {
  130:     case STATE_ASCII:
  131:       if (c < 0x80) {
  132:         int ret = ascii_mbtowc(conv,pwc,s,1);
  133:         if (ret == RET_ILSEQ)
  134:           goto ilseq;
  135:         if (ret != 1) abort();
  136:         if (*pwc == 0x000a || *pwc == 0x000d) {
  137:           state2 = STATE2_NONE; state3 = STATE3_NONE;
  138:         }
  139:         COMBINE_STATE;
  140:         conv->istate = state;
  141:         return count+1;
  142:       } else
  143:         goto ilseq;
  144:     case STATE_TWOBYTE:
  145:       if (n < count+2)
  146:         goto none;
  147:       if (s[0] < 0x80 && s[1] < 0x80) {
  148:         int ret;
  149:         switch (state2) {
  150:           case STATE2_NONE:
  151:             goto ilseq;
  152:           case STATE2_DESIGNATED_GB2312:
  153:             ret = gb2312_mbtowc(conv,pwc,s,2); break;
  154:           case STATE2_DESIGNATED_CNS11643_1:
  155:             ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
  156:           default: abort();
  157:         }
  158:         if (ret == RET_ILSEQ)
  159:           goto ilseq;
  160:         if (ret != 2) abort();
  161:         COMBINE_STATE;
  162:         conv->istate = state;
  163:         return count+2;
  164:       } else
  165:         goto ilseq;
  166:     default: abort();
  167:   }
  168: 
  169: none:
  170:   COMBINE_STATE;
  171:   conv->istate = state;
  172:   return RET_TOOFEW(count);
  173: 
  174: ilseq:
  175:   COMBINE_STATE;
  176:   conv->istate = state;
  177:   return RET_SHIFT_ILSEQ(count);
  178: }
  179: 
  180: static int
  181: iso2022_cn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
  182: {
  183:   state_t state = conv->ostate;
  184:   SPLIT_STATE;
  185:   unsigned char buf[3];
  186:   int ret;
  187: 
  188:   /* There is no need to handle Unicode 3.1 tag characters and to look for
  189:      "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
  190: 
  191:   /* Try ASCII. */
  192:   ret = ascii_wctomb(conv,buf,wc,1);
  193:   if (ret != RET_ILUNI) {
  194:     if (ret != 1) abort();
  195:     if (buf[0] < 0x80) {
  196:       int count = (state1 == STATE_ASCII ? 1 : 2);
  197:       if (n < count)
  198:         return RET_TOOSMALL;
  199:       if (state1 != STATE_ASCII) {
  200:         r[0] = SI;
  201:         r += 1;
  202:         state1 = STATE_ASCII;
  203:       }
  204:       r[0] = buf[0];
  205:       if (wc == 0x000a || wc == 0x000d) {
  206:         state2 = STATE2_NONE; state3 = STATE3_NONE;
  207:       }
  208:       COMBINE_STATE;
  209:       conv->ostate = state;
  210:       return count;
  211:     }
  212:   }
  213: 
  214:   /* Try GB 2312-1980. */
  215:   ret = gb2312_wctomb(conv,buf,wc,2);
  216:   if (ret != RET_ILUNI) {
  217:     if (ret != 2) abort();
  218:     if (buf[0] < 0x80 && buf[1] < 0x80) {
  219:       int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  220:       if (n < count)
  221:         return RET_TOOSMALL;
  222:       if (state2 != STATE2_DESIGNATED_GB2312) {
  223:         r[0] = ESC;
  224:         r[1] = '$';
  225:         r[2] = ')';
  226:         r[3] = 'A';
  227:         r += 4;
  228:         state2 = STATE2_DESIGNATED_GB2312;
  229:       }
  230:       if (state1 != STATE_TWOBYTE) {
  231:         r[0] = SO;
  232:         r += 1;
  233:         state1 = STATE_TWOBYTE;
  234:       }
  235:       r[0] = buf[0];
  236:       r[1] = buf[1];
  237:       COMBINE_STATE;
  238:       conv->ostate = state;
  239:       return count;
  240:     }
  241:   }
  242: 
  243:   ret = cns11643_wctomb(conv,buf,wc,3);
  244:   if (ret != RET_ILUNI) {
  245:     if (ret != 3) abort();
  246: 
  247:     /* Try CNS 11643-1992 Plane 1. */
  248:     if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
  249:       int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  250:       if (n < count)
  251:         return RET_TOOSMALL;
  252:       if (state2 != STATE2_DESIGNATED_CNS11643_1) {
  253:         r[0] = ESC;
  254:         r[1] = '$';
  255:         r[2] = ')';
  256:         r[3] = 'G';
  257:         r += 4;
  258:         state2 = STATE2_DESIGNATED_CNS11643_1;
  259:       }
  260:       if (state1 != STATE_TWOBYTE) {
  261:         r[0] = SO;
  262:         r += 1;
  263:         state1 = STATE_TWOBYTE;
  264:       }
  265:       r[0] = buf[1];
  266:       r[1] = buf[2];
  267:       COMBINE_STATE;
  268:       conv->ostate = state;
  269:       return count;
  270:     }
  271: 
  272:     /* Try CNS 11643-1992 Plane 2. */
  273:     if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
  274:       int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
  275:       if (n < count)
  276:         return RET_TOOSMALL;
  277:       if (state3 != STATE3_DESIGNATED_CNS11643_2) {
  278:         r[0] = ESC;
  279:         r[1] = '$';
  280:         r[2] = '*';
  281:         r[3] = 'H';
  282:         r += 4;
  283:         state3 = STATE3_DESIGNATED_CNS11643_2;
  284:       }
  285:       r[0] = ESC;
  286:       r[1] = 'N';
  287:       r[2] = buf[1];
  288:       r[3] = buf[2];
  289:       COMBINE_STATE;
  290:       conv->ostate = state;
  291:       return count;
  292:     }
  293:   }
  294: 
  295:   return RET_ILUNI;
  296: }
  297: 
  298: static int
  299: iso2022_cn_reset (conv_t conv, unsigned char *r, size_t n)
  300: {
  301:   state_t state = conv->ostate;
  302:   SPLIT_STATE;
  303:   (void)state2;
  304:   (void)state3;
  305:   if (state1 != STATE_ASCII) {
  306:     if (n < 1)
  307:       return RET_TOOSMALL;
  308:     r[0] = SI;
  309:     /* conv->ostate = 0; will be done by the caller */
  310:     return 1;
  311:   } else
  312:     return 0;
  313: }
  314: 
  315: #undef COMBINE_STATE
  316: #undef SPLIT_STATE
  317: #undef STATE3_DESIGNATED_CNS11643_2
  318: #undef STATE3_NONE
  319: #undef STATE2_DESIGNATED_CNS11643_1
  320: #undef STATE2_DESIGNATED_GB2312
  321: #undef STATE2_NONE
  322: #undef STATE_TWOBYTE
  323: #undef STATE_ASCII

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>