embedaddon/libiconv/lib/converters.h - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / converters.h
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (4 years, 3 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD

libiconv 1.16

1: /* 2: * Copyright (C) 1999-2002, 2004-2011, 2016 Free Software Foundation, Inc. 3: * This file is part of the GNU LIBICONV Library. 4: * 5: * The GNU LIBICONV Library is free software; you can redistribute it 6: * and/or modify it under the terms of the GNU Library General Public 7: * License as published by the Free Software Foundation; either version 2 8: * of the License, or (at your option) any later version. 9: * 10: * The GNU LIBICONV Library is distributed in the hope that it will be 11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13: * Library General Public License for more details. 14: * 15: * You should have received a copy of the GNU Library General Public 16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17: * If not, see <https://www.gnu.org/licenses/>. 18: */ 19: 20: /* This file defines all the converters. */ 21: 22: 23: /* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */ 24: typedef unsigned int ucs4_t; 25: 26: /* State used by a conversion. 0 denotes the initial state. */ 27: typedef unsigned int state_t; 28: 29: /* iconv_t is an opaque type. This is the real iconv_t type. */ 30: typedef struct conv_struct * conv_t; 31: 32: /* 33: * Data type for conversion multibyte -> unicode 34: */ 35: struct mbtowc_funcs { 36: int (*xxx_mbtowc) (conv_t conv, ucs4_t *pwc, unsigned char const *s, size_t n); 37: /* 38: * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, size_t n) 39: * converts the byte sequence starting at s to a wide character. Up to n bytes 40: * are available at s. n is >= 1. 41: * Result is number of bytes consumed (if a wide character was read), 42: * or -1 if invalid, or -2 if n too small, 43: * or RET_SHIFT_ILSEQ(number of bytes consumed) if invalid input after a shift 44: * sequence was read, 45: * or RET_TOOFEW(number of bytes consumed) if only a shift sequence was read. 46: */ 47: int (*xxx_flushwc) (conv_t conv, ucs4_t *pwc); 48: /* 49: * int xxx_flushwc (conv_t conv, ucs4_t *pwc) 50: * returns to the initial state and stores the pending wide character, if any. 51: * Result is 1 (if a wide character was read) or 0 if none was pending. 52: */ 53: }; 54: 55: /* Return code if invalid input after a shift sequence of n bytes was read. 56: (xxx_mbtowc) */ 57: #define RET_SHIFT_ILSEQ(n) (-1-2*(n)) 58: /* Return code if invalid. (xxx_mbtowc) */ 59: #define RET_ILSEQ RET_SHIFT_ILSEQ(0) 60: /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */ 61: #define RET_TOOFEW(n) (-2-2*(n)) 62: /* Retrieve the n from the encoded RET_... value. */ 63: #define DECODE_SHIFT_ILSEQ(r) ((unsigned int)(RET_SHIFT_ILSEQ(0) - (r)) / 2) 64: #define DECODE_TOOFEW(r) ((unsigned int)(RET_TOOFEW(0) - (r)) / 2) 65: /* Maximum value of n that may be used as argument to RET_SHIFT_ILSEQ or RET_TOOFEW. */ 66: #define RET_COUNT_MAX ((INT_MAX / 2) - 1) 67: 68: /* 69: * Data type for conversion unicode -> multibyte 70: */ 71: struct wctomb_funcs { 72: int (*xxx_wctomb) (conv_t conv, unsigned char *r, ucs4_t wc, size_t n); 73: /* 74: * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 75: * converts the wide character wc to the character set xxx, and stores the 76: * result beginning at r. Up to n bytes may be written at r. n is >= 1. 77: * Result is number of bytes written, or -1 if invalid, or -2 if n too small. 78: */ 79: int (*xxx_reset) (conv_t conv, unsigned char *r, size_t n); 80: /* 81: * int xxx_reset (conv_t conv, unsigned char *r, size_t n) 82: * stores a shift sequences returning to the initial state beginning at r. 83: * Up to n bytes may be written at r. n is >= 0. 84: * Result is number of bytes written, or -2 if n too small. 85: */ 86: }; 87: 88: /* Return code if invalid. (xxx_wctomb) */ 89: #define RET_ILUNI -1 90: /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */ 91: #define RET_TOOSMALL -2 92: 93: /* 94: * Contents of a conversion descriptor. 95: */ 96: struct conv_struct { 97: struct loop_funcs lfuncs; 98: /* Input (conversion multibyte -> unicode) */ 99: int iindex; 100: struct mbtowc_funcs ifuncs; 101: state_t istate; 102: /* Output (conversion unicode -> multibyte) */ 103: int oindex; 104: struct wctomb_funcs ofuncs; 105: int oflags; 106: state_t ostate; 107: /* Operation flags */ 108: int transliterate; 109: int discard_ilseq; 110: #ifndef LIBICONV_PLUG 111: struct iconv_fallbacks fallbacks; 112: struct iconv_hooks hooks; 113: #endif 114: }; 115: 116: /* 117: * Include all the converters. 118: */ 119: 120: #include "ascii.h" 121: 122: /* General multi-byte encodings */ 123: #include "utf8.h" 124: #include "ucs2.h" 125: #include "ucs2be.h" 126: #include "ucs2le.h" 127: #include "ucs4.h" 128: #include "ucs4be.h" 129: #include "ucs4le.h" 130: #include "utf16.h" 131: #include "utf16be.h" 132: #include "utf16le.h" 133: #include "utf32.h" 134: #include "utf32be.h" 135: #include "utf32le.h" 136: #include "utf7.h" 137: #include "ucs2internal.h" 138: #include "ucs2swapped.h" 139: #include "ucs4internal.h" 140: #include "ucs4swapped.h" 141: #include "c99.h" 142: #include "java.h" 143: 144: /* 8-bit encodings */ 145: #include "iso8859_1.h" 146: #include "iso8859_2.h" 147: #include "iso8859_3.h" 148: #include "iso8859_4.h" 149: #include "iso8859_5.h" 150: #include "iso8859_6.h" 151: #include "iso8859_7.h" 152: #include "iso8859_8.h" 153: #include "iso8859_9.h" 154: #include "iso8859_10.h" 155: #include "iso8859_11.h" 156: #include "iso8859_13.h" 157: #include "iso8859_14.h" 158: #include "iso8859_15.h" 159: #include "iso8859_16.h" 160: #include "koi8_r.h" 161: #include "koi8_u.h" 162: #include "koi8_ru.h" 163: #include "cp1250.h" 164: #include "cp1251.h" 165: #include "cp1252.h" 166: #include "cp1253.h" 167: #include "cp1254.h" 168: #include "cp1255.h" 169: #include "cp1256.h" 170: #include "cp1257.h" 171: #include "cp1258.h" 172: #include "cp850.h" 173: #include "cp862.h" 174: #include "cp866.h" 175: #include "cp1131.h" 176: #include "mac_roman.h" 177: #include "mac_centraleurope.h" 178: #include "mac_iceland.h" 179: #include "mac_croatian.h" 180: #include "mac_romania.h" 181: #include "mac_cyrillic.h" 182: #include "mac_ukraine.h" 183: #include "mac_greek.h" 184: #include "mac_turkish.h" 185: #include "mac_hebrew.h" 186: #include "mac_arabic.h" 187: #include "mac_thai.h" 188: #include "hp_roman8.h" 189: #include "nextstep.h" 190: #include "armscii_8.h" 191: #include "georgian_academy.h" 192: #include "georgian_ps.h" 193: #include "koi8_t.h" 194: #include "pt154.h" 195: #include "rk1048.h" 196: #include "mulelao.h" 197: #include "cp1133.h" 198: #include "tis620.h" 199: #include "cp874.h" 200: #include "viscii.h" 201: #include "tcvn.h" 202: 203: /* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */ 204: 205: typedef struct { 206: unsigned short indx; /* index into big table */ 207: unsigned short used; /* bitmask of used entries */ 208: } Summary16; 209: 210: #include "iso646_jp.h" 211: #include "jisx0201.h" 212: #include "jisx0208.h" 213: #include "jisx0212.h" 214: 215: #include "iso646_cn.h" 216: #include "gb2312.h" 217: #include "isoir165.h" 218: /*#include "gb12345.h"*/ 219: #include "gbk.h" 220: #include "cns11643.h" 221: #include "big5.h" 222: 223: #include "ksc5601.h" 224: #include "johab_hangul.h" 225: 226: /* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */ 227: 228: #include "euc_jp.h" 229: #include "sjis.h" 230: #include "cp932.h" 231: #include "iso2022_jp.h" 232: #include "iso2022_jp1.h" 233: #include "iso2022_jp2.h" 234: #include "iso2022_jpms.h" 235: 236: #include "euc_cn.h" 237: #include "ces_gbk.h" 238: #include "cp936.h" 239: #include "gb18030.h" 240: #include "iso2022_cn.h" 241: #include "iso2022_cnext.h" 242: #include "hz.h" 243: #include "euc_tw.h" 244: #include "ces_big5.h" 245: #include "cp950.h" 246: #include "big5hkscs1999.h" 247: #include "big5hkscs2001.h" 248: #include "big5hkscs2004.h" 249: #include "big5hkscs2008.h" 250: 251: #include "euc_kr.h" 252: #include "cp949.h" 253: #include "johab.h" 254: #include "iso2022_kr.h" 255: 256: /* Encodings used by system dependent locales. */ 257: 258: #ifdef USE_AIX 259: #include "cp856.h" 260: #include "cp922.h" 261: #include "cp943.h" 262: #include "cp1046.h" 263: #include "cp1124.h" 264: #include "cp1129.h" 265: #include "cp1161.h" 266: #include "cp1162.h" 267: #include "cp1163.h" 268: #endif 269: 270: #ifdef USE_OSF1 271: #include "dec_kanji.h" 272: #include "dec_hanyu.h" 273: #endif 274: 275: #ifdef USE_DOS 276: #include "cp437.h" 277: #include "cp737.h" 278: #include "cp775.h" 279: #include "cp852.h" 280: #include "cp853.h" 281: #include "cp855.h" 282: #include "cp857.h" 283: #include "cp858.h" 284: #include "cp860.h" 285: #include "cp861.h" 286: #include "cp863.h" 287: #include "cp864.h" 288: #include "cp865.h" 289: #include "cp869.h" 290: #include "cp1125.h" 291: #endif 292: 293: #ifdef USE_EXTRA 294: #include "euc_jisx0213.h" 295: #include "shift_jisx0213.h" 296: #include "iso2022_jp3.h" 297: #include "big5_2003.h" 298: #include "tds565.h" 299: #include "atarist.h" 300: #include "riscos1.h" 301: #endif 302: