File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / converters.h
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (3 years, 3 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD
libiconv 1.16

    1: /*
    2:  * Copyright (C) 1999-2002, 2004-2011, 2016 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, see <https://www.gnu.org/licenses/>.
   18:  */
   19: 
   20: /* This file defines all the converters. */
   21: 
   22: 
   23: /* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */
   24: typedef unsigned int ucs4_t;
   25: 
   26: /* State used by a conversion. 0 denotes the initial state. */
   27: typedef unsigned int state_t;
   28: 
   29: /* iconv_t is an opaque type. This is the real iconv_t type. */
   30: typedef struct conv_struct * conv_t;
   31: 
   32: /*
   33:  * Data type for conversion multibyte -> unicode
   34:  */
   35: struct mbtowc_funcs {
   36:   int (*xxx_mbtowc) (conv_t conv, ucs4_t *pwc, unsigned char const *s, size_t n);
   37:   /*
   38:    * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, size_t n)
   39:    * converts the byte sequence starting at s to a wide character. Up to n bytes
   40:    * are available at s. n is >= 1.
   41:    * Result is number of bytes consumed (if a wide character was read),
   42:    * or -1 if invalid, or -2 if n too small,
   43:    * or RET_SHIFT_ILSEQ(number of bytes consumed) if invalid input after a shift
   44:    * sequence was read,
   45:    * or RET_TOOFEW(number of bytes consumed) if only a shift sequence was read.
   46:    */
   47:   int (*xxx_flushwc) (conv_t conv, ucs4_t *pwc);
   48:   /*
   49:    * int xxx_flushwc (conv_t conv, ucs4_t *pwc)
   50:    * returns to the initial state and stores the pending wide character, if any.
   51:    * Result is 1 (if a wide character was read) or 0 if none was pending.
   52:    */
   53: };
   54: 
   55: /* Return code if invalid input after a shift sequence of n bytes was read.
   56:    (xxx_mbtowc) */
   57: #define RET_SHIFT_ILSEQ(n)  (-1-2*(n))
   58: /* Return code if invalid. (xxx_mbtowc) */
   59: #define RET_ILSEQ           RET_SHIFT_ILSEQ(0)
   60: /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
   61: #define RET_TOOFEW(n)       (-2-2*(n))
   62: /* Retrieve the n from the encoded RET_... value. */
   63: #define DECODE_SHIFT_ILSEQ(r)  ((unsigned int)(RET_SHIFT_ILSEQ(0) - (r)) / 2)
   64: #define DECODE_TOOFEW(r)       ((unsigned int)(RET_TOOFEW(0) - (r)) / 2)
   65: /* Maximum value of n that may be used as argument to RET_SHIFT_ILSEQ or RET_TOOFEW. */
   66: #define RET_COUNT_MAX       ((INT_MAX / 2) - 1)
   67: 
   68: /*
   69:  * Data type for conversion unicode -> multibyte
   70:  */
   71: struct wctomb_funcs {
   72:   int (*xxx_wctomb) (conv_t conv, unsigned char *r, ucs4_t wc, size_t n);
   73:   /*
   74:    * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
   75:    * converts the wide character wc to the character set xxx, and stores the
   76:    * result beginning at r. Up to n bytes may be written at r. n is >= 1.
   77:    * Result is number of bytes written, or -1 if invalid, or -2 if n too small.
   78:    */
   79:   int (*xxx_reset) (conv_t conv, unsigned char *r, size_t n);
   80:   /*
   81:    * int xxx_reset (conv_t conv, unsigned char *r, size_t n)
   82:    * stores a shift sequences returning to the initial state beginning at r.
   83:    * Up to n bytes may be written at r. n is >= 0.
   84:    * Result is number of bytes written, or -2 if n too small.
   85:    */
   86: };
   87: 
   88: /* Return code if invalid. (xxx_wctomb) */
   89: #define RET_ILUNI      -1
   90: /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
   91: #define RET_TOOSMALL   -2
   92: 
   93: /*
   94:  * Contents of a conversion descriptor.
   95:  */
   96: struct conv_struct {
   97:   struct loop_funcs lfuncs;
   98:   /* Input (conversion multibyte -> unicode) */
   99:   int iindex;
  100:   struct mbtowc_funcs ifuncs;
  101:   state_t istate;
  102:   /* Output (conversion unicode -> multibyte) */
  103:   int oindex;
  104:   struct wctomb_funcs ofuncs;
  105:   int oflags;
  106:   state_t ostate;
  107:   /* Operation flags */
  108:   int transliterate;
  109:   int discard_ilseq;
  110:   #ifndef LIBICONV_PLUG
  111:   struct iconv_fallbacks fallbacks;
  112:   struct iconv_hooks hooks;
  113:   #endif
  114: };
  115: 
  116: /*
  117:  * Include all the converters.
  118:  */
  119: 
  120: #include "ascii.h"
  121: 
  122: /* General multi-byte encodings */
  123: #include "utf8.h"
  124: #include "ucs2.h"
  125: #include "ucs2be.h"
  126: #include "ucs2le.h"
  127: #include "ucs4.h"
  128: #include "ucs4be.h"
  129: #include "ucs4le.h"
  130: #include "utf16.h"
  131: #include "utf16be.h"
  132: #include "utf16le.h"
  133: #include "utf32.h"
  134: #include "utf32be.h"
  135: #include "utf32le.h"
  136: #include "utf7.h"
  137: #include "ucs2internal.h"
  138: #include "ucs2swapped.h"
  139: #include "ucs4internal.h"
  140: #include "ucs4swapped.h"
  141: #include "c99.h"
  142: #include "java.h"
  143: 
  144: /* 8-bit encodings */
  145: #include "iso8859_1.h"
  146: #include "iso8859_2.h"
  147: #include "iso8859_3.h"
  148: #include "iso8859_4.h"
  149: #include "iso8859_5.h"
  150: #include "iso8859_6.h"
  151: #include "iso8859_7.h"
  152: #include "iso8859_8.h"
  153: #include "iso8859_9.h"
  154: #include "iso8859_10.h"
  155: #include "iso8859_11.h"
  156: #include "iso8859_13.h"
  157: #include "iso8859_14.h"
  158: #include "iso8859_15.h"
  159: #include "iso8859_16.h"
  160: #include "koi8_r.h"
  161: #include "koi8_u.h"
  162: #include "koi8_ru.h"
  163: #include "cp1250.h"
  164: #include "cp1251.h"
  165: #include "cp1252.h"
  166: #include "cp1253.h"
  167: #include "cp1254.h"
  168: #include "cp1255.h"
  169: #include "cp1256.h"
  170: #include "cp1257.h"
  171: #include "cp1258.h"
  172: #include "cp850.h"
  173: #include "cp862.h"
  174: #include "cp866.h"
  175: #include "cp1131.h"
  176: #include "mac_roman.h"
  177: #include "mac_centraleurope.h"
  178: #include "mac_iceland.h"
  179: #include "mac_croatian.h"
  180: #include "mac_romania.h"
  181: #include "mac_cyrillic.h"
  182: #include "mac_ukraine.h"
  183: #include "mac_greek.h"
  184: #include "mac_turkish.h"
  185: #include "mac_hebrew.h"
  186: #include "mac_arabic.h"
  187: #include "mac_thai.h"
  188: #include "hp_roman8.h"
  189: #include "nextstep.h"
  190: #include "armscii_8.h"
  191: #include "georgian_academy.h"
  192: #include "georgian_ps.h"
  193: #include "koi8_t.h"
  194: #include "pt154.h"
  195: #include "rk1048.h"
  196: #include "mulelao.h"
  197: #include "cp1133.h"
  198: #include "tis620.h"
  199: #include "cp874.h"
  200: #include "viscii.h"
  201: #include "tcvn.h"
  202: 
  203: /* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */
  204: 
  205: typedef struct {
  206:   unsigned short indx; /* index into big table */
  207:   unsigned short used; /* bitmask of used entries */
  208: } Summary16;
  209: 
  210: #include "iso646_jp.h"
  211: #include "jisx0201.h"
  212: #include "jisx0208.h"
  213: #include "jisx0212.h"
  214: 
  215: #include "iso646_cn.h"
  216: #include "gb2312.h"
  217: #include "isoir165.h"
  218: /*#include "gb12345.h"*/
  219: #include "gbk.h"
  220: #include "cns11643.h"
  221: #include "big5.h"
  222: 
  223: #include "ksc5601.h"
  224: #include "johab_hangul.h"
  225: 
  226: /* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */
  227: 
  228: #include "euc_jp.h"
  229: #include "sjis.h"
  230: #include "cp932.h"
  231: #include "iso2022_jp.h"
  232: #include "iso2022_jp1.h"
  233: #include "iso2022_jp2.h"
  234: #include "iso2022_jpms.h"
  235: 
  236: #include "euc_cn.h"
  237: #include "ces_gbk.h"
  238: #include "cp936.h"
  239: #include "gb18030.h"
  240: #include "iso2022_cn.h"
  241: #include "iso2022_cnext.h"
  242: #include "hz.h"
  243: #include "euc_tw.h"
  244: #include "ces_big5.h"
  245: #include "cp950.h"
  246: #include "big5hkscs1999.h"
  247: #include "big5hkscs2001.h"
  248: #include "big5hkscs2004.h"
  249: #include "big5hkscs2008.h"
  250: 
  251: #include "euc_kr.h"
  252: #include "cp949.h"
  253: #include "johab.h"
  254: #include "iso2022_kr.h"
  255: 
  256: /* Encodings used by system dependent locales. */
  257: 
  258: #ifdef USE_AIX
  259: #include "cp856.h"
  260: #include "cp922.h"
  261: #include "cp943.h"
  262: #include "cp1046.h"
  263: #include "cp1124.h"
  264: #include "cp1129.h"
  265: #include "cp1161.h"
  266: #include "cp1162.h"
  267: #include "cp1163.h"
  268: #endif
  269: 
  270: #ifdef USE_OSF1
  271: #include "dec_kanji.h"
  272: #include "dec_hanyu.h"
  273: #endif
  274: 
  275: #ifdef USE_DOS
  276: #include "cp437.h"
  277: #include "cp737.h"
  278: #include "cp775.h"
  279: #include "cp852.h"
  280: #include "cp853.h"
  281: #include "cp855.h"
  282: #include "cp857.h"
  283: #include "cp858.h"
  284: #include "cp860.h"
  285: #include "cp861.h"
  286: #include "cp863.h"
  287: #include "cp864.h"
  288: #include "cp865.h"
  289: #include "cp869.h"
  290: #include "cp1125.h"
  291: #endif
  292: 
  293: #ifdef USE_EXTRA
  294: #include "euc_jisx0213.h"
  295: #include "shift_jisx0213.h"
  296: #include "iso2022_jp3.h"
  297: #include "big5_2003.h"
  298: #include "tds565.h"
  299: #include "atarist.h"
  300: #include "riscos1.h"
  301: #endif
  302: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>