File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iso2022_kr.h
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 22:57:48 2012 UTC (12 years, 4 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_14p0, v1_14, v1_13_1, HEAD
libiconv

    1: /*
    2:  * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
   18:  * Fifth Floor, Boston, MA 02110-1301, USA.
   19:  */
   20: 
   21: /*
   22:  * ISO-2022-KR
   23:  */
   24: 
   25: /* Specification: RFC 1557 */
   26: 
   27: /* Note: CJK.INF says the SO designator needs to appear only once at the
   28:    beginning of a text, but to decrease the risk of ambiguities, when
   29:    producing ISO-2022-KR, we repeat the designator in every line containing
   30:    SO characters. RFC 1557 does not mandate this. */
   31: 
   32: #define ESC 0x1b
   33: #define SO  0x0e
   34: #define SI  0x0f
   35: 
   36: /*
   37:  * The state is composed of one of the following values
   38:  */
   39: #define STATE_ASCII          0
   40: #define STATE_TWOBYTE        1
   41: /*
   42:  * and one of the following values, << 8
   43:  */
   44: #define STATE2_NONE                0
   45: #define STATE2_DESIGNATED_KSC5601  1
   46: 
   47: #define SPLIT_STATE \
   48:   unsigned int state1 = state & 0xff, state2 = state >> 8
   49: #define COMBINE_STATE \
   50:   state = (state2 << 8) | state1
   51: 
   52: static int
   53: iso2022_kr_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
   54: {
   55:   state_t state = conv->istate;
   56:   SPLIT_STATE;
   57:   int count = 0;
   58:   unsigned char c;
   59:   for (;;) {
   60:     c = *s;
   61:     if (c == ESC) {
   62:       if (n < count+4)
   63:         goto none;
   64:       if (s[1] == '$') {
   65:         if (s[2] == ')') {
   66:           if (s[3] == 'C') {
   67:             state2 = STATE2_DESIGNATED_KSC5601;
   68:             s += 4; count += 4;
   69:             if (n < count+1)
   70:               goto none;
   71:             continue;
   72:           }
   73:         }
   74:       }
   75:       goto ilseq;
   76:     }
   77:     if (c == SO) {
   78:       if (state2 != STATE2_DESIGNATED_KSC5601)
   79:         goto ilseq;
   80:       state1 = STATE_TWOBYTE;
   81:       s++; count++;
   82:       if (n < count+1)
   83:         goto none;
   84:       continue;
   85:     }
   86:     if (c == SI) {
   87:       state1 = STATE_ASCII;
   88:       s++; count++;
   89:       if (n < count+1)
   90:         goto none;
   91:       continue;
   92:     }
   93:     break;
   94:   }
   95:   switch (state1) {
   96:     case STATE_ASCII:
   97:       if (c < 0x80) {
   98:         int ret = ascii_mbtowc(conv,pwc,s,1);
   99:         if (ret == RET_ILSEQ)
  100:           goto ilseq;
  101:         if (ret != 1) abort();
  102: #if 0 /* Accept ISO-2022-KR according to CJK.INF. */
  103:         if (*pwc == 0x000a || *pwc == 0x000d)
  104:           state2 = STATE2_NONE;
  105: #endif
  106:         COMBINE_STATE;
  107:         conv->istate = state;
  108:         return count+1;
  109:       } else
  110:         goto ilseq;
  111:     case STATE_TWOBYTE:
  112:       if (n < count+2)
  113:         goto none;
  114:       if (state2 != STATE2_DESIGNATED_KSC5601) abort();
  115:       if (s[0] < 0x80 && s[1] < 0x80) {
  116:         int ret = ksc5601_mbtowc(conv,pwc,s,2);
  117:         if (ret == RET_ILSEQ)
  118:           goto ilseq;
  119:         if (ret != 2) abort();
  120:         COMBINE_STATE;
  121:         conv->istate = state;
  122:         return count+2;
  123:       } else
  124:         goto ilseq;
  125:     default: abort();
  126:   }
  127: 
  128: none:
  129:   COMBINE_STATE;
  130:   conv->istate = state;
  131:   return RET_TOOFEW(count);
  132: 
  133: ilseq:
  134:   COMBINE_STATE;
  135:   conv->istate = state;
  136:   return RET_SHIFT_ILSEQ(count);
  137: }
  138: 
  139: static int
  140: iso2022_kr_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
  141: {
  142:   state_t state = conv->ostate;
  143:   SPLIT_STATE;
  144:   unsigned char buf[2];
  145:   int ret;
  146: 
  147:   /* Try ASCII. */
  148:   ret = ascii_wctomb(conv,buf,wc,1);
  149:   if (ret != RET_ILUNI) {
  150:     if (ret != 1) abort();
  151:     if (buf[0] < 0x80) {
  152:       int count = (state1 == STATE_ASCII ? 1 : 2);
  153:       if (n < count)
  154:         return RET_TOOSMALL;
  155:       if (state1 != STATE_ASCII) {
  156:         r[0] = SI;
  157:         r += 1;
  158:         state1 = STATE_ASCII;
  159:       }
  160:       r[0] = buf[0];
  161:       if (wc == 0x000a || wc == 0x000d)
  162:         state2 = STATE2_NONE;
  163:       COMBINE_STATE;
  164:       conv->ostate = state;
  165:       return count;
  166:     }
  167:   }
  168: 
  169:   /* Try KS C 5601-1992. */
  170:   ret = ksc5601_wctomb(conv,buf,wc,2);
  171:   if (ret != RET_ILUNI) {
  172:     if (ret != 2) abort();
  173:     if (buf[0] < 0x80 && buf[1] < 0x80) {
  174:       int count = (state2 == STATE2_DESIGNATED_KSC5601 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  175:       if (n < count)
  176:         return RET_TOOSMALL;
  177:       if (state2 != STATE2_DESIGNATED_KSC5601) {
  178:         r[0] = ESC;
  179:         r[1] = '$';
  180:         r[2] = ')';
  181:         r[3] = 'C';
  182:         r += 4;
  183:         state2 = STATE2_DESIGNATED_KSC5601;
  184:       }
  185:       if (state1 != STATE_TWOBYTE) {
  186:         r[0] = SO;
  187:         r += 1;
  188:         state1 = STATE_TWOBYTE;
  189:       }
  190:       r[0] = buf[0];
  191:       r[1] = buf[1];
  192:       COMBINE_STATE;
  193:       conv->ostate = state;
  194:       return count;
  195:     }
  196:   }
  197: 
  198:   return RET_ILUNI;
  199: }
  200: 
  201: static int
  202: iso2022_kr_reset (conv_t conv, unsigned char *r, int n)
  203: {
  204:   state_t state = conv->ostate;
  205:   SPLIT_STATE;
  206:   (void)state2;
  207:   if (state1 != STATE_ASCII) {
  208:     if (n < 1)
  209:       return RET_TOOSMALL;
  210:     r[0] = SI;
  211:     /* conv->ostate = 0; will be done by the caller */
  212:     return 1;
  213:   } else
  214:     return 0;
  215: }
  216: 
  217: #undef COMBINE_STATE
  218: #undef SPLIT_STATE
  219: #undef STATE2_DESIGNATED_KSC5601
  220: #undef STATE2_NONE
  221: #undef STATE_TWOBYTE
  222: #undef STATE_ASCII

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>