File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iso2022_cnext.h
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (3 years, 6 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD
libiconv 1.16

    1: /*
    2:  * Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, see <https://www.gnu.org/licenses/>.
   18:  */
   19: 
   20: /*
   21:  * ISO-2022-CN-EXT
   22:  */
   23: 
   24: /* Specification: RFC 1922 */
   25: 
   26: #define ESC 0x1b
   27: #define SO  0x0e
   28: #define SI  0x0f
   29: 
   30: /*
   31:  * The state is composed of one of the following values
   32:  */
   33: #define STATE_ASCII          0
   34: #define STATE_TWOBYTE        1
   35: /*
   36:  * and one of the following values, << 8
   37:  */
   38: #define STATE2_NONE                   0
   39: #define STATE2_DESIGNATED_GB2312      1
   40: #define STATE2_DESIGNATED_CNS11643_1  2
   41: #define STATE2_DESIGNATED_ISO_IR_165  3
   42: /*
   43:  * and one of the following values, << 16
   44:  */
   45: #define STATE3_NONE                   0
   46: #define STATE3_DESIGNATED_CNS11643_2  1
   47: /*
   48:  * and one of the following values, << 24
   49:  */
   50: #define STATE4_NONE                   0
   51: #define STATE4_DESIGNATED_CNS11643_3  1
   52: #define STATE4_DESIGNATED_CNS11643_4  2
   53: #define STATE4_DESIGNATED_CNS11643_5  3
   54: #define STATE4_DESIGNATED_CNS11643_6  4
   55: #define STATE4_DESIGNATED_CNS11643_7  5
   56: 
   57: #define SPLIT_STATE \
   58:   unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
   59: #define COMBINE_STATE \
   60:   state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
   61: 
   62: static int
   63: iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
   64: {
   65:   state_t state = conv->istate;
   66:   SPLIT_STATE;
   67:   int count = 0;
   68:   unsigned char c;
   69:   for (;;) {
   70:     c = *s;
   71:     if (c == ESC) {
   72:       if (n < count+4)
   73:         goto none;
   74:       if (s[1] == '$') {
   75:         if (s[2] == ')') {
   76:           if (s[3] == 'A') {
   77:             state2 = STATE2_DESIGNATED_GB2312;
   78:             s += 4; count += 4;
   79:             if (n < count+1)
   80:               goto none;
   81:             continue;
   82:           }
   83:           if (s[3] == 'G') {
   84:             state2 = STATE2_DESIGNATED_CNS11643_1;
   85:             s += 4; count += 4;
   86:             if (n < count+1)
   87:               goto none;
   88:             continue;
   89:           }
   90:           if (s[3] == 'E') {
   91:             state2 = STATE2_DESIGNATED_ISO_IR_165;
   92:             s += 4; count += 4;
   93:             if (n < count+1)
   94:               goto none;
   95:             continue;
   96:           }
   97:         }
   98:         if (s[2] == '*') {
   99:           if (s[3] == 'H') {
  100:             state3 = STATE3_DESIGNATED_CNS11643_2;
  101:             s += 4; count += 4;
  102:             if (n < count+1)
  103:               goto none;
  104:             continue;
  105:           }
  106:         }
  107:         if (s[2] == '+') {
  108:           if (s[3] == 'I') {
  109:             state4 = STATE4_DESIGNATED_CNS11643_3;
  110:             s += 4; count += 4;
  111:             if (n < count+1)
  112:               goto none;
  113:             continue;
  114:           }
  115:           if (s[3] == 'J') {
  116:             state4 = STATE4_DESIGNATED_CNS11643_4;
  117:             s += 4; count += 4;
  118:             if (n < count+1)
  119:               goto none;
  120:             continue;
  121:           }
  122:           if (s[3] == 'K') {
  123:             state4 = STATE4_DESIGNATED_CNS11643_5;
  124:             s += 4; count += 4;
  125:             if (n < count+1)
  126:               goto none;
  127:             continue;
  128:           }
  129:           if (s[3] == 'L') {
  130:             state4 = STATE4_DESIGNATED_CNS11643_6;
  131:             s += 4; count += 4;
  132:             if (n < count+1)
  133:               goto none;
  134:             continue;
  135:           }
  136:           if (s[3] == 'M') {
  137:             state4 = STATE4_DESIGNATED_CNS11643_7;
  138:             s += 4; count += 4;
  139:             if (n < count+1)
  140:               goto none;
  141:             continue;
  142:           }
  143:         }
  144:       }
  145:       if (s[1] == 'N') {
  146:         switch (state3) {
  147:           case STATE3_NONE:
  148:             goto ilseq;
  149:           case STATE3_DESIGNATED_CNS11643_2:
  150:             if (s[2] < 0x80 && s[3] < 0x80) {
  151:               int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
  152:               if (ret == RET_ILSEQ)
  153:                 goto ilseq;
  154:               if (ret != 2) abort();
  155:               COMBINE_STATE;
  156:               conv->istate = state;
  157:               return count+4;
  158:             } else
  159:               goto ilseq;
  160:           default: abort();
  161:         }
  162:       }
  163:       if (s[1] == 'O') {
  164:         switch (state4) {
  165:           case STATE4_NONE:
  166:             goto ilseq;
  167:           case STATE4_DESIGNATED_CNS11643_3:
  168:             if (s[2] < 0x80 && s[3] < 0x80) {
  169:               int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
  170:               if (ret == RET_ILSEQ)
  171:                 goto ilseq;
  172:               if (ret != 2) abort();
  173:               COMBINE_STATE;
  174:               conv->istate = state;
  175:               return count+4;
  176:             } else
  177:               goto ilseq;
  178:           case STATE4_DESIGNATED_CNS11643_4:
  179:             if (s[2] < 0x80 && s[3] < 0x80) {
  180:               int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
  181:               if (ret == RET_ILSEQ)
  182:                 goto ilseq;
  183:               if (ret != 2) abort();
  184:               COMBINE_STATE;
  185:               conv->istate = state;
  186:               return count+4;
  187:             } else
  188:               goto ilseq;
  189:           case STATE4_DESIGNATED_CNS11643_5:
  190:             if (s[2] < 0x80 && s[3] < 0x80) {
  191:               int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
  192:               if (ret == RET_ILSEQ)
  193:                 goto ilseq;
  194:               if (ret != 2) abort();
  195:               COMBINE_STATE;
  196:               conv->istate = state;
  197:               return count+4;
  198:             } else
  199:               goto ilseq;
  200:           case STATE4_DESIGNATED_CNS11643_6:
  201:             if (s[2] < 0x80 && s[3] < 0x80) {
  202:               int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
  203:               if (ret == RET_ILSEQ)
  204:                 goto ilseq;
  205:               if (ret != 2) abort();
  206:               COMBINE_STATE;
  207:               conv->istate = state;
  208:               return count+4;
  209:             } else
  210:               goto ilseq;
  211:           case STATE4_DESIGNATED_CNS11643_7:
  212:             if (s[2] < 0x80 && s[3] < 0x80) {
  213:               int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
  214:               if (ret == RET_ILSEQ)
  215:                 goto ilseq;
  216:               if (ret != 2) abort();
  217:               COMBINE_STATE;
  218:               conv->istate = state;
  219:               return count+4;
  220:             } else
  221:               goto ilseq;
  222:           default: abort();
  223:         }
  224:       }
  225:       goto ilseq;
  226:     }
  227:     if (c == SO) {
  228:       if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
  229:         goto ilseq;
  230:       state1 = STATE_TWOBYTE;
  231:       s++; count++;
  232:       if (n < count+1)
  233:         goto none;
  234:       continue;
  235:     }
  236:     if (c == SI) {
  237:       state1 = STATE_ASCII;
  238:       s++; count++;
  239:       if (n < count+1)
  240:         goto none;
  241:       continue;
  242:     }
  243:     break;
  244:   }
  245:   switch (state1) {
  246:     case STATE_ASCII:
  247:       if (c < 0x80) {
  248:         int ret = ascii_mbtowc(conv,pwc,s,1);
  249:         if (ret == RET_ILSEQ)
  250:           goto ilseq;
  251:         if (ret != 1) abort();
  252:         if (*pwc == 0x000a || *pwc == 0x000d) {
  253:           state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
  254:         }
  255:         COMBINE_STATE;
  256:         conv->istate = state;
  257:         return count+1;
  258:       } else
  259:         goto ilseq;
  260:     case STATE_TWOBYTE:
  261:       if (n < count+2)
  262:         goto none;
  263:       if (s[0] < 0x80 && s[1] < 0x80) {
  264:         int ret;
  265:         switch (state2) {
  266:           case STATE2_NONE:
  267:             goto ilseq;
  268:           case STATE2_DESIGNATED_GB2312:
  269:             ret = gb2312_mbtowc(conv,pwc,s,2); break;
  270:           case STATE2_DESIGNATED_CNS11643_1:
  271:             ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
  272:           case STATE2_DESIGNATED_ISO_IR_165:
  273:             ret = isoir165_mbtowc(conv,pwc,s,2); break;
  274:           default: abort();
  275:         }
  276:         if (ret == RET_ILSEQ)
  277:           goto ilseq;
  278:         if (ret != 2) abort();
  279:         COMBINE_STATE;
  280:         conv->istate = state;
  281:         return count+2;
  282:       } else
  283:         goto ilseq;
  284:     default: abort();
  285:   }
  286: 
  287: none:
  288:   COMBINE_STATE;
  289:   conv->istate = state;
  290:   return RET_TOOFEW(count);
  291: 
  292: ilseq:
  293:   COMBINE_STATE;
  294:   conv->istate = state;
  295:   return RET_SHIFT_ILSEQ(count);
  296: }
  297: 
  298: static int
  299: iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
  300: {
  301:   state_t state = conv->ostate;
  302:   SPLIT_STATE;
  303:   unsigned char buf[3];
  304:   int ret;
  305: 
  306:   /* There is no need to handle Unicode 3.1 tag characters and to look for
  307:      "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
  308: 
  309:   /* Try ASCII. */
  310:   ret = ascii_wctomb(conv,buf,wc,1);
  311:   if (ret != RET_ILUNI) {
  312:     if (ret != 1) abort();
  313:     if (buf[0] < 0x80) {
  314:       int count = (state1 == STATE_ASCII ? 1 : 2);
  315:       if (n < count)
  316:         return RET_TOOSMALL;
  317:       if (state1 != STATE_ASCII) {
  318:         r[0] = SI;
  319:         r += 1;
  320:         state1 = STATE_ASCII;
  321:       }
  322:       r[0] = buf[0];
  323:       if (wc == 0x000a || wc == 0x000d) {
  324:         state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
  325:       }
  326:       COMBINE_STATE;
  327:       conv->ostate = state;
  328:       return count;
  329:     }
  330:   }
  331: 
  332:   /* Try GB 2312-1980. */
  333:   ret = gb2312_wctomb(conv,buf,wc,2);
  334:   if (ret != RET_ILUNI) {
  335:     if (ret != 2) abort();
  336:     if (buf[0] < 0x80 && buf[1] < 0x80) {
  337:       int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  338:       if (n < count)
  339:         return RET_TOOSMALL;
  340:       if (state2 != STATE2_DESIGNATED_GB2312) {
  341:         r[0] = ESC;
  342:         r[1] = '$';
  343:         r[2] = ')';
  344:         r[3] = 'A';
  345:         r += 4;
  346:         state2 = STATE2_DESIGNATED_GB2312;
  347:       }
  348:       if (state1 != STATE_TWOBYTE) {
  349:         r[0] = SO;
  350:         r += 1;
  351:         state1 = STATE_TWOBYTE;
  352:       }
  353:       r[0] = buf[0];
  354:       r[1] = buf[1];
  355:       COMBINE_STATE;
  356:       conv->ostate = state;
  357:       return count;
  358:     }
  359:   }
  360: 
  361:   ret = cns11643_wctomb(conv,buf,wc,3);
  362:   if (ret != RET_ILUNI) {
  363:     if (ret != 3) abort();
  364: 
  365:     /* Try CNS 11643-1992 Plane 1. */
  366:     if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
  367:       int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  368:       if (n < count)
  369:         return RET_TOOSMALL;
  370:       if (state2 != STATE2_DESIGNATED_CNS11643_1) {
  371:         r[0] = ESC;
  372:         r[1] = '$';
  373:         r[2] = ')';
  374:         r[3] = 'G';
  375:         r += 4;
  376:         state2 = STATE2_DESIGNATED_CNS11643_1;
  377:       }
  378:       if (state1 != STATE_TWOBYTE) {
  379:         r[0] = SO;
  380:         r += 1;
  381:         state1 = STATE_TWOBYTE;
  382:       }
  383:       r[0] = buf[1];
  384:       r[1] = buf[2];
  385:       COMBINE_STATE;
  386:       conv->ostate = state;
  387:       return count;
  388:     }
  389: 
  390:     /* Try CNS 11643-1992 Plane 2. */
  391:     if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
  392:       int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
  393:       if (n < count)
  394:         return RET_TOOSMALL;
  395:       if (state3 != STATE3_DESIGNATED_CNS11643_2) {
  396:         r[0] = ESC;
  397:         r[1] = '$';
  398:         r[2] = '*';
  399:         r[3] = 'H';
  400:         r += 4;
  401:         state3 = STATE3_DESIGNATED_CNS11643_2;
  402:       }
  403:       r[0] = ESC;
  404:       r[1] = 'N';
  405:       r[2] = buf[1];
  406:       r[3] = buf[2];
  407:       COMBINE_STATE;
  408:       conv->ostate = state;
  409:       return count;
  410:     }
  411: 
  412:     /* Try CNS 11643-1992 Plane 3. */
  413:     if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) {
  414:       int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4;
  415:       if (n < count)
  416:         return RET_TOOSMALL;
  417:       if (state4 != STATE4_DESIGNATED_CNS11643_3) {
  418:         r[0] = ESC;
  419:         r[1] = '$';
  420:         r[2] = '+';
  421:         r[3] = 'I';
  422:         r += 4;
  423:         state4 = STATE4_DESIGNATED_CNS11643_3;
  424:       }
  425:       r[0] = ESC;
  426:       r[1] = 'O';
  427:       r[2] = buf[1];
  428:       r[3] = buf[2];
  429:       COMBINE_STATE;
  430:       conv->ostate = state;
  431:       return count;
  432:     }
  433: 
  434:     /* Try CNS 11643-1992 Plane 4. */
  435:     if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) {
  436:       int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4;
  437:       if (n < count)
  438:         return RET_TOOSMALL;
  439:       if (state4 != STATE4_DESIGNATED_CNS11643_4) {
  440:         r[0] = ESC;
  441:         r[1] = '$';
  442:         r[2] = '+';
  443:         r[3] = 'J';
  444:         r += 4;
  445:         state4 = STATE4_DESIGNATED_CNS11643_4;
  446:       }
  447:       r[0] = ESC;
  448:       r[1] = 'O';
  449:       r[2] = buf[1];
  450:       r[3] = buf[2];
  451:       COMBINE_STATE;
  452:       conv->ostate = state;
  453:       return count;
  454:     }
  455: 
  456:     /* Try CNS 11643-1992 Plane 5. */
  457:     if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) {
  458:       int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4;
  459:       if (n < count)
  460:         return RET_TOOSMALL;
  461:       if (state4 != STATE4_DESIGNATED_CNS11643_5) {
  462:         r[0] = ESC;
  463:         r[1] = '$';
  464:         r[2] = '+';
  465:         r[3] = 'K';
  466:         r += 4;
  467:         state4 = STATE4_DESIGNATED_CNS11643_5;
  468:       }
  469:       r[0] = ESC;
  470:       r[1] = 'O';
  471:       r[2] = buf[1];
  472:       r[3] = buf[2];
  473:       COMBINE_STATE;
  474:       conv->ostate = state;
  475:       return count;
  476:     }
  477: 
  478:     /* Try CNS 11643-1992 Plane 6. */
  479:     if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) {
  480:       int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4;
  481:       if (n < count)
  482:         return RET_TOOSMALL;
  483:       if (state4 != STATE4_DESIGNATED_CNS11643_6) {
  484:         r[0] = ESC;
  485:         r[1] = '$';
  486:         r[2] = '+';
  487:         r[3] = 'L';
  488:         r += 4;
  489:         state4 = STATE4_DESIGNATED_CNS11643_6;
  490:       }
  491:       r[0] = ESC;
  492:       r[1] = 'O';
  493:       r[2] = buf[1];
  494:       r[3] = buf[2];
  495:       COMBINE_STATE;
  496:       conv->ostate = state;
  497:       return count;
  498:     }
  499: 
  500:     /* Try CNS 11643-1992 Plane 7. */
  501:     if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) {
  502:       int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4;
  503:       if (n < count)
  504:         return RET_TOOSMALL;
  505:       if (state4 != STATE4_DESIGNATED_CNS11643_7) {
  506:         r[0] = ESC;
  507:         r[1] = '$';
  508:         r[2] = '+';
  509:         r[3] = 'M';
  510:         r += 4;
  511:         state4 = STATE4_DESIGNATED_CNS11643_7;
  512:       }
  513:       r[0] = ESC;
  514:       r[1] = 'O';
  515:       r[2] = buf[1];
  516:       r[3] = buf[2];
  517:       COMBINE_STATE;
  518:       conv->ostate = state;
  519:       return count;
  520:     }
  521: 
  522:   }
  523: 
  524:   /* Try ISO-IR-165. */
  525:   ret = isoir165_wctomb(conv,buf,wc,2);
  526:   if (ret != RET_ILUNI) {
  527:     if (ret != 2) abort();
  528:     if (buf[0] < 0x80 && buf[1] < 0x80) {
  529:       int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  530:       if (n < count)
  531:         return RET_TOOSMALL;
  532:       if (state2 != STATE2_DESIGNATED_ISO_IR_165) {
  533:         r[0] = ESC;
  534:         r[1] = '$';
  535:         r[2] = ')';
  536:         r[3] = 'E';
  537:         r += 4;
  538:         state2 = STATE2_DESIGNATED_ISO_IR_165;
  539:       }
  540:       if (state1 != STATE_TWOBYTE) {
  541:         r[0] = SO;
  542:         r += 1;
  543:         state1 = STATE_TWOBYTE;
  544:       }
  545:       r[0] = buf[0];
  546:       r[1] = buf[1];
  547:       COMBINE_STATE;
  548:       conv->ostate = state;
  549:       return count;
  550:     }
  551:   }
  552: 
  553:   return RET_ILUNI;
  554: }
  555: 
  556: static int
  557: iso2022_cn_ext_reset (conv_t conv, unsigned char *r, size_t n)
  558: {
  559:   state_t state = conv->ostate;
  560:   SPLIT_STATE;
  561:   (void)state2;
  562:   (void)state3;
  563:   (void)state4;
  564:   if (state1 != STATE_ASCII) {
  565:     if (n < 1)
  566:       return RET_TOOSMALL;
  567:     r[0] = SI;
  568:     /* conv->ostate = 0; will be done by the caller */
  569:     return 1;
  570:   } else
  571:     return 0;
  572: }
  573: 
  574: #undef COMBINE_STATE
  575: #undef SPLIT_STATE
  576: #undef STATE4_DESIGNATED_CNS11643_7
  577: #undef STATE4_DESIGNATED_CNS11643_6
  578: #undef STATE4_DESIGNATED_CNS11643_5
  579: #undef STATE4_DESIGNATED_CNS11643_4
  580: #undef STATE4_DESIGNATED_CNS11643_3
  581: #undef STATE4_NONE
  582: #undef STATE3_DESIGNATED_CNS11643_2
  583: #undef STATE3_NONE
  584: #undef STATE2_DESIGNATED_ISO_IR_165
  585: #undef STATE2_DESIGNATED_CNS11643_1
  586: #undef STATE2_DESIGNATED_GB2312
  587: #undef STATE2_NONE
  588: #undef STATE_TWOBYTE
  589: #undef STATE_ASCII

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>