File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iso2022_cnext.h
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 22:57:48 2012 UTC (12 years, 5 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_14p0, v1_14, v1_13_1, HEAD
libiconv

    1: /*
    2:  * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
   18:  * Fifth Floor, Boston, MA 02110-1301, USA.
   19:  */
   20: 
   21: /*
   22:  * ISO-2022-CN-EXT
   23:  */
   24: 
   25: /* Specification: RFC 1922 */
   26: 
   27: #define ESC 0x1b
   28: #define SO  0x0e
   29: #define SI  0x0f
   30: 
   31: /*
   32:  * The state is composed of one of the following values
   33:  */
   34: #define STATE_ASCII          0
   35: #define STATE_TWOBYTE        1
   36: /*
   37:  * and one of the following values, << 8
   38:  */
   39: #define STATE2_NONE                   0
   40: #define STATE2_DESIGNATED_GB2312      1
   41: #define STATE2_DESIGNATED_CNS11643_1  2
   42: #define STATE2_DESIGNATED_ISO_IR_165  3
   43: /*
   44:  * and one of the following values, << 16
   45:  */
   46: #define STATE3_NONE                   0
   47: #define STATE3_DESIGNATED_CNS11643_2  1
   48: /*
   49:  * and one of the following values, << 24
   50:  */
   51: #define STATE4_NONE                   0
   52: #define STATE4_DESIGNATED_CNS11643_3  1
   53: #define STATE4_DESIGNATED_CNS11643_4  2
   54: #define STATE4_DESIGNATED_CNS11643_5  3
   55: #define STATE4_DESIGNATED_CNS11643_6  4
   56: #define STATE4_DESIGNATED_CNS11643_7  5
   57: 
   58: #define SPLIT_STATE \
   59:   unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
   60: #define COMBINE_STATE \
   61:   state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
   62: 
   63: static int
   64: iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
   65: {
   66:   state_t state = conv->istate;
   67:   SPLIT_STATE;
   68:   int count = 0;
   69:   unsigned char c;
   70:   for (;;) {
   71:     c = *s;
   72:     if (c == ESC) {
   73:       if (n < count+4)
   74:         goto none;
   75:       if (s[1] == '$') {
   76:         if (s[2] == ')') {
   77:           if (s[3] == 'A') {
   78:             state2 = STATE2_DESIGNATED_GB2312;
   79:             s += 4; count += 4;
   80:             if (n < count+1)
   81:               goto none;
   82:             continue;
   83:           }
   84:           if (s[3] == 'G') {
   85:             state2 = STATE2_DESIGNATED_CNS11643_1;
   86:             s += 4; count += 4;
   87:             if (n < count+1)
   88:               goto none;
   89:             continue;
   90:           }
   91:           if (s[3] == 'E') {
   92:             state2 = STATE2_DESIGNATED_ISO_IR_165;
   93:             s += 4; count += 4;
   94:             if (n < count+1)
   95:               goto none;
   96:             continue;
   97:           }
   98:         }
   99:         if (s[2] == '*') {
  100:           if (s[3] == 'H') {
  101:             state3 = STATE3_DESIGNATED_CNS11643_2;
  102:             s += 4; count += 4;
  103:             if (n < count+1)
  104:               goto none;
  105:             continue;
  106:           }
  107:         }
  108:         if (s[2] == '+') {
  109:           if (s[3] == 'I') {
  110:             state4 = STATE4_DESIGNATED_CNS11643_3;
  111:             s += 4; count += 4;
  112:             if (n < count+1)
  113:               goto none;
  114:             continue;
  115:           }
  116:           if (s[3] == 'J') {
  117:             state4 = STATE4_DESIGNATED_CNS11643_4;
  118:             s += 4; count += 4;
  119:             if (n < count+1)
  120:               goto none;
  121:             continue;
  122:           }
  123:           if (s[3] == 'K') {
  124:             state4 = STATE4_DESIGNATED_CNS11643_5;
  125:             s += 4; count += 4;
  126:             if (n < count+1)
  127:               goto none;
  128:             continue;
  129:           }
  130:           if (s[3] == 'L') {
  131:             state4 = STATE4_DESIGNATED_CNS11643_6;
  132:             s += 4; count += 4;
  133:             if (n < count+1)
  134:               goto none;
  135:             continue;
  136:           }
  137:           if (s[3] == 'M') {
  138:             state4 = STATE4_DESIGNATED_CNS11643_7;
  139:             s += 4; count += 4;
  140:             if (n < count+1)
  141:               goto none;
  142:             continue;
  143:           }
  144:         }
  145:       }
  146:       if (s[1] == 'N') {
  147:         switch (state3) {
  148:           case STATE3_NONE:
  149:             goto ilseq;
  150:           case STATE3_DESIGNATED_CNS11643_2:
  151:             if (s[2] < 0x80 && s[3] < 0x80) {
  152:               int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
  153:               if (ret == RET_ILSEQ)
  154:                 goto ilseq;
  155:               if (ret != 2) abort();
  156:               COMBINE_STATE;
  157:               conv->istate = state;
  158:               return count+4;
  159:             } else
  160:               goto ilseq;
  161:           default: abort();
  162:         }
  163:       }
  164:       if (s[1] == 'O') {
  165:         switch (state4) {
  166:           case STATE4_NONE:
  167:             goto ilseq;
  168:           case STATE4_DESIGNATED_CNS11643_3:
  169:             if (s[2] < 0x80 && s[3] < 0x80) {
  170:               int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
  171:               if (ret == RET_ILSEQ)
  172:                 goto ilseq;
  173:               if (ret != 2) abort();
  174:               COMBINE_STATE;
  175:               conv->istate = state;
  176:               return count+4;
  177:             } else
  178:               goto ilseq;
  179:           case STATE4_DESIGNATED_CNS11643_4:
  180:             if (s[2] < 0x80 && s[3] < 0x80) {
  181:               int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
  182:               if (ret == RET_ILSEQ)
  183:                 goto ilseq;
  184:               if (ret != 2) abort();
  185:               COMBINE_STATE;
  186:               conv->istate = state;
  187:               return count+4;
  188:             } else
  189:               goto ilseq;
  190:           case STATE4_DESIGNATED_CNS11643_5:
  191:             if (s[2] < 0x80 && s[3] < 0x80) {
  192:               int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
  193:               if (ret == RET_ILSEQ)
  194:                 goto ilseq;
  195:               if (ret != 2) abort();
  196:               COMBINE_STATE;
  197:               conv->istate = state;
  198:               return count+4;
  199:             } else
  200:               goto ilseq;
  201:           case STATE4_DESIGNATED_CNS11643_6:
  202:             if (s[2] < 0x80 && s[3] < 0x80) {
  203:               int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
  204:               if (ret == RET_ILSEQ)
  205:                 goto ilseq;
  206:               if (ret != 2) abort();
  207:               COMBINE_STATE;
  208:               conv->istate = state;
  209:               return count+4;
  210:             } else
  211:               goto ilseq;
  212:           case STATE4_DESIGNATED_CNS11643_7:
  213:             if (s[2] < 0x80 && s[3] < 0x80) {
  214:               int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
  215:               if (ret == RET_ILSEQ)
  216:                 goto ilseq;
  217:               if (ret != 2) abort();
  218:               COMBINE_STATE;
  219:               conv->istate = state;
  220:               return count+4;
  221:             } else
  222:               goto ilseq;
  223:           default: abort();
  224:         }
  225:       }
  226:       goto ilseq;
  227:     }
  228:     if (c == SO) {
  229:       if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
  230:         goto ilseq;
  231:       state1 = STATE_TWOBYTE;
  232:       s++; count++;
  233:       if (n < count+1)
  234:         goto none;
  235:       continue;
  236:     }
  237:     if (c == SI) {
  238:       state1 = STATE_ASCII;
  239:       s++; count++;
  240:       if (n < count+1)
  241:         goto none;
  242:       continue;
  243:     }
  244:     break;
  245:   }
  246:   switch (state1) {
  247:     case STATE_ASCII:
  248:       if (c < 0x80) {
  249:         int ret = ascii_mbtowc(conv,pwc,s,1);
  250:         if (ret == RET_ILSEQ)
  251:           goto ilseq;
  252:         if (ret != 1) abort();
  253:         if (*pwc == 0x000a || *pwc == 0x000d) {
  254:           state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
  255:         }
  256:         COMBINE_STATE;
  257:         conv->istate = state;
  258:         return count+1;
  259:       } else
  260:         goto ilseq;
  261:     case STATE_TWOBYTE:
  262:       if (n < count+2)
  263:         goto none;
  264:       if (s[0] < 0x80 && s[1] < 0x80) {
  265:         int ret;
  266:         switch (state2) {
  267:           case STATE2_NONE:
  268:             goto ilseq;
  269:           case STATE2_DESIGNATED_GB2312:
  270:             ret = gb2312_mbtowc(conv,pwc,s,2); break;
  271:           case STATE2_DESIGNATED_CNS11643_1:
  272:             ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
  273:           case STATE2_DESIGNATED_ISO_IR_165:
  274:             ret = isoir165_mbtowc(conv,pwc,s,2); break;
  275:           default: abort();
  276:         }
  277:         if (ret == RET_ILSEQ)
  278:           goto ilseq;
  279:         if (ret != 2) abort();
  280:         COMBINE_STATE;
  281:         conv->istate = state;
  282:         return count+2;
  283:       } else
  284:         goto ilseq;
  285:     default: abort();
  286:   }
  287: 
  288: none:
  289:   COMBINE_STATE;
  290:   conv->istate = state;
  291:   return RET_TOOFEW(count);
  292: 
  293: ilseq:
  294:   COMBINE_STATE;
  295:   conv->istate = state;
  296:   return RET_SHIFT_ILSEQ(count);
  297: }
  298: 
  299: static int
  300: iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
  301: {
  302:   state_t state = conv->ostate;
  303:   SPLIT_STATE;
  304:   unsigned char buf[3];
  305:   int ret;
  306: 
  307:   /* There is no need to handle Unicode 3.1 tag characters and to look for
  308:      "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
  309: 
  310:   /* Try ASCII. */
  311:   ret = ascii_wctomb(conv,buf,wc,1);
  312:   if (ret != RET_ILUNI) {
  313:     if (ret != 1) abort();
  314:     if (buf[0] < 0x80) {
  315:       int count = (state1 == STATE_ASCII ? 1 : 2);
  316:       if (n < count)
  317:         return RET_TOOSMALL;
  318:       if (state1 != STATE_ASCII) {
  319:         r[0] = SI;
  320:         r += 1;
  321:         state1 = STATE_ASCII;
  322:       }
  323:       r[0] = buf[0];
  324:       if (wc == 0x000a || wc == 0x000d) {
  325:         state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
  326:       }
  327:       COMBINE_STATE;
  328:       conv->ostate = state;
  329:       return count;
  330:     }
  331:   }
  332: 
  333:   /* Try GB 2312-1980. */
  334:   ret = gb2312_wctomb(conv,buf,wc,2);
  335:   if (ret != RET_ILUNI) {
  336:     if (ret != 2) abort();
  337:     if (buf[0] < 0x80 && buf[1] < 0x80) {
  338:       int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  339:       if (n < count)
  340:         return RET_TOOSMALL;
  341:       if (state2 != STATE2_DESIGNATED_GB2312) {
  342:         r[0] = ESC;
  343:         r[1] = '$';
  344:         r[2] = ')';
  345:         r[3] = 'A';
  346:         r += 4;
  347:         state2 = STATE2_DESIGNATED_GB2312;
  348:       }
  349:       if (state1 != STATE_TWOBYTE) {
  350:         r[0] = SO;
  351:         r += 1;
  352:         state1 = STATE_TWOBYTE;
  353:       }
  354:       r[0] = buf[0];
  355:       r[1] = buf[1];
  356:       COMBINE_STATE;
  357:       conv->ostate = state;
  358:       return count;
  359:     }
  360:   }
  361: 
  362:   ret = cns11643_wctomb(conv,buf,wc,3);
  363:   if (ret != RET_ILUNI) {
  364:     if (ret != 3) abort();
  365: 
  366:     /* Try CNS 11643-1992 Plane 1. */
  367:     if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
  368:       int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  369:       if (n < count)
  370:         return RET_TOOSMALL;
  371:       if (state2 != STATE2_DESIGNATED_CNS11643_1) {
  372:         r[0] = ESC;
  373:         r[1] = '$';
  374:         r[2] = ')';
  375:         r[3] = 'G';
  376:         r += 4;
  377:         state2 = STATE2_DESIGNATED_CNS11643_1;
  378:       }
  379:       if (state1 != STATE_TWOBYTE) {
  380:         r[0] = SO;
  381:         r += 1;
  382:         state1 = STATE_TWOBYTE;
  383:       }
  384:       r[0] = buf[1];
  385:       r[1] = buf[2];
  386:       COMBINE_STATE;
  387:       conv->ostate = state;
  388:       return count;
  389:     }
  390: 
  391:     /* Try CNS 11643-1992 Plane 2. */
  392:     if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
  393:       int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
  394:       if (n < count)
  395:         return RET_TOOSMALL;
  396:       if (state3 != STATE3_DESIGNATED_CNS11643_2) {
  397:         r[0] = ESC;
  398:         r[1] = '$';
  399:         r[2] = '*';
  400:         r[3] = 'H';
  401:         r += 4;
  402:         state3 = STATE3_DESIGNATED_CNS11643_2;
  403:       }
  404:       r[0] = ESC;
  405:       r[1] = 'N';
  406:       r[2] = buf[1];
  407:       r[3] = buf[2];
  408:       COMBINE_STATE;
  409:       conv->ostate = state;
  410:       return count;
  411:     }
  412: 
  413:     /* Try CNS 11643-1992 Plane 3. */
  414:     if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) {
  415:       int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4;
  416:       if (n < count)
  417:         return RET_TOOSMALL;
  418:       if (state4 != STATE4_DESIGNATED_CNS11643_3) {
  419:         r[0] = ESC;
  420:         r[1] = '$';
  421:         r[2] = '+';
  422:         r[3] = 'I';
  423:         r += 4;
  424:         state4 = STATE4_DESIGNATED_CNS11643_3;
  425:       }
  426:       r[0] = ESC;
  427:       r[1] = 'O';
  428:       r[2] = buf[1];
  429:       r[3] = buf[2];
  430:       COMBINE_STATE;
  431:       conv->ostate = state;
  432:       return count;
  433:     }
  434: 
  435:     /* Try CNS 11643-1992 Plane 4. */
  436:     if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) {
  437:       int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4;
  438:       if (n < count)
  439:         return RET_TOOSMALL;
  440:       if (state4 != STATE4_DESIGNATED_CNS11643_4) {
  441:         r[0] = ESC;
  442:         r[1] = '$';
  443:         r[2] = '+';
  444:         r[3] = 'J';
  445:         r += 4;
  446:         state4 = STATE4_DESIGNATED_CNS11643_4;
  447:       }
  448:       r[0] = ESC;
  449:       r[1] = 'O';
  450:       r[2] = buf[1];
  451:       r[3] = buf[2];
  452:       COMBINE_STATE;
  453:       conv->ostate = state;
  454:       return count;
  455:     }
  456: 
  457:     /* Try CNS 11643-1992 Plane 5. */
  458:     if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) {
  459:       int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4;
  460:       if (n < count)
  461:         return RET_TOOSMALL;
  462:       if (state4 != STATE4_DESIGNATED_CNS11643_5) {
  463:         r[0] = ESC;
  464:         r[1] = '$';
  465:         r[2] = '+';
  466:         r[3] = 'K';
  467:         r += 4;
  468:         state4 = STATE4_DESIGNATED_CNS11643_5;
  469:       }
  470:       r[0] = ESC;
  471:       r[1] = 'O';
  472:       r[2] = buf[1];
  473:       r[3] = buf[2];
  474:       COMBINE_STATE;
  475:       conv->ostate = state;
  476:       return count;
  477:     }
  478: 
  479:     /* Try CNS 11643-1992 Plane 6. */
  480:     if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) {
  481:       int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4;
  482:       if (n < count)
  483:         return RET_TOOSMALL;
  484:       if (state4 != STATE4_DESIGNATED_CNS11643_6) {
  485:         r[0] = ESC;
  486:         r[1] = '$';
  487:         r[2] = '+';
  488:         r[3] = 'L';
  489:         r += 4;
  490:         state4 = STATE4_DESIGNATED_CNS11643_6;
  491:       }
  492:       r[0] = ESC;
  493:       r[1] = 'O';
  494:       r[2] = buf[1];
  495:       r[3] = buf[2];
  496:       COMBINE_STATE;
  497:       conv->ostate = state;
  498:       return count;
  499:     }
  500: 
  501:     /* Try CNS 11643-1992 Plane 7. */
  502:     if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) {
  503:       int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4;
  504:       if (n < count)
  505:         return RET_TOOSMALL;
  506:       if (state4 != STATE4_DESIGNATED_CNS11643_7) {
  507:         r[0] = ESC;
  508:         r[1] = '$';
  509:         r[2] = '+';
  510:         r[3] = 'M';
  511:         r += 4;
  512:         state4 = STATE4_DESIGNATED_CNS11643_7;
  513:       }
  514:       r[0] = ESC;
  515:       r[1] = 'O';
  516:       r[2] = buf[1];
  517:       r[3] = buf[2];
  518:       COMBINE_STATE;
  519:       conv->ostate = state;
  520:       return count;
  521:     }
  522: 
  523:   }
  524: 
  525:   /* Try ISO-IR-165. */
  526:   ret = isoir165_wctomb(conv,buf,wc,2);
  527:   if (ret != RET_ILUNI) {
  528:     if (ret != 2) abort();
  529:     if (buf[0] < 0x80 && buf[1] < 0x80) {
  530:       int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  531:       if (n < count)
  532:         return RET_TOOSMALL;
  533:       if (state2 != STATE2_DESIGNATED_ISO_IR_165) {
  534:         r[0] = ESC;
  535:         r[1] = '$';
  536:         r[2] = ')';
  537:         r[3] = 'E';
  538:         r += 4;
  539:         state2 = STATE2_DESIGNATED_ISO_IR_165;
  540:       }
  541:       if (state1 != STATE_TWOBYTE) {
  542:         r[0] = SO;
  543:         r += 1;
  544:         state1 = STATE_TWOBYTE;
  545:       }
  546:       r[0] = buf[0];
  547:       r[1] = buf[1];
  548:       COMBINE_STATE;
  549:       conv->ostate = state;
  550:       return count;
  551:     }
  552:   }
  553: 
  554:   return RET_ILUNI;
  555: }
  556: 
  557: static int
  558: iso2022_cn_ext_reset (conv_t conv, unsigned char *r, int n)
  559: {
  560:   state_t state = conv->ostate;
  561:   SPLIT_STATE;
  562:   (void)state2;
  563:   (void)state3;
  564:   (void)state4;
  565:   if (state1 != STATE_ASCII) {
  566:     if (n < 1)
  567:       return RET_TOOSMALL;
  568:     r[0] = SI;
  569:     /* conv->ostate = 0; will be done by the caller */
  570:     return 1;
  571:   } else
  572:     return 0;
  573: }
  574: 
  575: #undef COMBINE_STATE
  576: #undef SPLIT_STATE
  577: #undef STATE4_DESIGNATED_CNS11643_7
  578: #undef STATE4_DESIGNATED_CNS11643_6
  579: #undef STATE4_DESIGNATED_CNS11643_5
  580: #undef STATE4_DESIGNATED_CNS11643_4
  581: #undef STATE4_DESIGNATED_CNS11643_3
  582: #undef STATE4_NONE
  583: #undef STATE3_DESIGNATED_CNS11643_2
  584: #undef STATE3_NONE
  585: #undef STATE2_DESIGNATED_ISO_IR_165
  586: #undef STATE2_DESIGNATED_CNS11643_1
  587: #undef STATE2_DESIGNATED_GB2312
  588: #undef STATE2_NONE
  589: #undef STATE_TWOBYTE
  590: #undef STATE_ASCII

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>