Annotation of embedaddon/libiconv/lib/converters.h, revision 1.1.1.3
1.1 misho 1: /*
1.1.1.3 ! misho 2: * Copyright (C) 1999-2002, 2004-2011, 2016 Free Software Foundation, Inc.
1.1 misho 3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.3 ! misho 17: * If not, see <https://www.gnu.org/licenses/>.
1.1 misho 18: */
19:
20: /* This file defines all the converters. */
21:
22:
23: /* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */
24: typedef unsigned int ucs4_t;
25:
26: /* State used by a conversion. 0 denotes the initial state. */
27: typedef unsigned int state_t;
28:
29: /* iconv_t is an opaque type. This is the real iconv_t type. */
30: typedef struct conv_struct * conv_t;
31:
32: /*
33: * Data type for conversion multibyte -> unicode
34: */
35: struct mbtowc_funcs {
1.1.1.3 ! misho 36: int (*xxx_mbtowc) (conv_t conv, ucs4_t *pwc, unsigned char const *s, size_t n);
1.1 misho 37: /*
1.1.1.3 ! misho 38: * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, size_t n)
1.1 misho 39: * converts the byte sequence starting at s to a wide character. Up to n bytes
40: * are available at s. n is >= 1.
41: * Result is number of bytes consumed (if a wide character was read),
1.1.1.3 ! misho 42: * or -1 if invalid, or -2 if n too small,
! 43: * or RET_SHIFT_ILSEQ(number of bytes consumed) if invalid input after a shift
! 44: * sequence was read,
! 45: * or RET_TOOFEW(number of bytes consumed) if only a shift sequence was read.
1.1 misho 46: */
47: int (*xxx_flushwc) (conv_t conv, ucs4_t *pwc);
48: /*
49: * int xxx_flushwc (conv_t conv, ucs4_t *pwc)
50: * returns to the initial state and stores the pending wide character, if any.
51: * Result is 1 (if a wide character was read) or 0 if none was pending.
52: */
53: };
54:
55: /* Return code if invalid input after a shift sequence of n bytes was read.
56: (xxx_mbtowc) */
57: #define RET_SHIFT_ILSEQ(n) (-1-2*(n))
58: /* Return code if invalid. (xxx_mbtowc) */
59: #define RET_ILSEQ RET_SHIFT_ILSEQ(0)
60: /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
61: #define RET_TOOFEW(n) (-2-2*(n))
62: /* Retrieve the n from the encoded RET_... value. */
63: #define DECODE_SHIFT_ILSEQ(r) ((unsigned int)(RET_SHIFT_ILSEQ(0) - (r)) / 2)
64: #define DECODE_TOOFEW(r) ((unsigned int)(RET_TOOFEW(0) - (r)) / 2)
1.1.1.3 ! misho 65: /* Maximum value of n that may be used as argument to RET_SHIFT_ILSEQ or RET_TOOFEW. */
! 66: #define RET_COUNT_MAX ((INT_MAX / 2) - 1)
1.1 misho 67:
68: /*
69: * Data type for conversion unicode -> multibyte
70: */
71: struct wctomb_funcs {
1.1.1.3 ! misho 72: int (*xxx_wctomb) (conv_t conv, unsigned char *r, ucs4_t wc, size_t n);
1.1 misho 73: /*
1.1.1.3 ! misho 74: * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
1.1 misho 75: * converts the wide character wc to the character set xxx, and stores the
76: * result beginning at r. Up to n bytes may be written at r. n is >= 1.
77: * Result is number of bytes written, or -1 if invalid, or -2 if n too small.
78: */
1.1.1.3 ! misho 79: int (*xxx_reset) (conv_t conv, unsigned char *r, size_t n);
1.1 misho 80: /*
1.1.1.3 ! misho 81: * int xxx_reset (conv_t conv, unsigned char *r, size_t n)
1.1 misho 82: * stores a shift sequences returning to the initial state beginning at r.
83: * Up to n bytes may be written at r. n is >= 0.
84: * Result is number of bytes written, or -2 if n too small.
85: */
86: };
87:
88: /* Return code if invalid. (xxx_wctomb) */
89: #define RET_ILUNI -1
90: /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
91: #define RET_TOOSMALL -2
92:
93: /*
94: * Contents of a conversion descriptor.
95: */
96: struct conv_struct {
97: struct loop_funcs lfuncs;
98: /* Input (conversion multibyte -> unicode) */
99: int iindex;
100: struct mbtowc_funcs ifuncs;
101: state_t istate;
102: /* Output (conversion unicode -> multibyte) */
103: int oindex;
104: struct wctomb_funcs ofuncs;
105: int oflags;
106: state_t ostate;
107: /* Operation flags */
108: int transliterate;
109: int discard_ilseq;
110: #ifndef LIBICONV_PLUG
111: struct iconv_fallbacks fallbacks;
112: struct iconv_hooks hooks;
113: #endif
114: };
115:
116: /*
117: * Include all the converters.
118: */
119:
120: #include "ascii.h"
121:
122: /* General multi-byte encodings */
123: #include "utf8.h"
124: #include "ucs2.h"
125: #include "ucs2be.h"
126: #include "ucs2le.h"
127: #include "ucs4.h"
128: #include "ucs4be.h"
129: #include "ucs4le.h"
130: #include "utf16.h"
131: #include "utf16be.h"
132: #include "utf16le.h"
133: #include "utf32.h"
134: #include "utf32be.h"
135: #include "utf32le.h"
136: #include "utf7.h"
137: #include "ucs2internal.h"
138: #include "ucs2swapped.h"
139: #include "ucs4internal.h"
140: #include "ucs4swapped.h"
141: #include "c99.h"
142: #include "java.h"
143:
144: /* 8-bit encodings */
145: #include "iso8859_1.h"
146: #include "iso8859_2.h"
147: #include "iso8859_3.h"
148: #include "iso8859_4.h"
149: #include "iso8859_5.h"
150: #include "iso8859_6.h"
151: #include "iso8859_7.h"
152: #include "iso8859_8.h"
153: #include "iso8859_9.h"
154: #include "iso8859_10.h"
155: #include "iso8859_11.h"
156: #include "iso8859_13.h"
157: #include "iso8859_14.h"
158: #include "iso8859_15.h"
159: #include "iso8859_16.h"
160: #include "koi8_r.h"
161: #include "koi8_u.h"
162: #include "koi8_ru.h"
163: #include "cp1250.h"
164: #include "cp1251.h"
165: #include "cp1252.h"
166: #include "cp1253.h"
167: #include "cp1254.h"
168: #include "cp1255.h"
169: #include "cp1256.h"
170: #include "cp1257.h"
171: #include "cp1258.h"
172: #include "cp850.h"
173: #include "cp862.h"
174: #include "cp866.h"
175: #include "cp1131.h"
176: #include "mac_roman.h"
177: #include "mac_centraleurope.h"
178: #include "mac_iceland.h"
179: #include "mac_croatian.h"
180: #include "mac_romania.h"
181: #include "mac_cyrillic.h"
182: #include "mac_ukraine.h"
183: #include "mac_greek.h"
184: #include "mac_turkish.h"
185: #include "mac_hebrew.h"
186: #include "mac_arabic.h"
187: #include "mac_thai.h"
188: #include "hp_roman8.h"
189: #include "nextstep.h"
190: #include "armscii_8.h"
191: #include "georgian_academy.h"
192: #include "georgian_ps.h"
193: #include "koi8_t.h"
194: #include "pt154.h"
195: #include "rk1048.h"
196: #include "mulelao.h"
197: #include "cp1133.h"
198: #include "tis620.h"
199: #include "cp874.h"
200: #include "viscii.h"
201: #include "tcvn.h"
202:
203: /* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */
204:
205: typedef struct {
206: unsigned short indx; /* index into big table */
207: unsigned short used; /* bitmask of used entries */
208: } Summary16;
209:
210: #include "iso646_jp.h"
211: #include "jisx0201.h"
212: #include "jisx0208.h"
213: #include "jisx0212.h"
214:
215: #include "iso646_cn.h"
216: #include "gb2312.h"
217: #include "isoir165.h"
218: /*#include "gb12345.h"*/
219: #include "gbk.h"
220: #include "cns11643.h"
221: #include "big5.h"
222:
223: #include "ksc5601.h"
224: #include "johab_hangul.h"
225:
226: /* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */
227:
228: #include "euc_jp.h"
229: #include "sjis.h"
230: #include "cp932.h"
231: #include "iso2022_jp.h"
232: #include "iso2022_jp1.h"
233: #include "iso2022_jp2.h"
1.1.1.3 ! misho 234: #include "iso2022_jpms.h"
1.1 misho 235:
236: #include "euc_cn.h"
237: #include "ces_gbk.h"
238: #include "cp936.h"
239: #include "gb18030.h"
240: #include "iso2022_cn.h"
241: #include "iso2022_cnext.h"
242: #include "hz.h"
243: #include "euc_tw.h"
244: #include "ces_big5.h"
245: #include "cp950.h"
246: #include "big5hkscs1999.h"
247: #include "big5hkscs2001.h"
248: #include "big5hkscs2004.h"
1.1.1.2 misho 249: #include "big5hkscs2008.h"
1.1 misho 250:
251: #include "euc_kr.h"
252: #include "cp949.h"
253: #include "johab.h"
254: #include "iso2022_kr.h"
255:
256: /* Encodings used by system dependent locales. */
257:
258: #ifdef USE_AIX
259: #include "cp856.h"
260: #include "cp922.h"
261: #include "cp943.h"
262: #include "cp1046.h"
263: #include "cp1124.h"
264: #include "cp1129.h"
265: #include "cp1161.h"
266: #include "cp1162.h"
267: #include "cp1163.h"
268: #endif
269:
270: #ifdef USE_OSF1
271: #include "dec_kanji.h"
272: #include "dec_hanyu.h"
273: #endif
274:
275: #ifdef USE_DOS
276: #include "cp437.h"
277: #include "cp737.h"
278: #include "cp775.h"
279: #include "cp852.h"
280: #include "cp853.h"
281: #include "cp855.h"
282: #include "cp857.h"
283: #include "cp858.h"
284: #include "cp860.h"
285: #include "cp861.h"
286: #include "cp863.h"
287: #include "cp864.h"
288: #include "cp865.h"
289: #include "cp869.h"
290: #include "cp1125.h"
291: #endif
292:
293: #ifdef USE_EXTRA
294: #include "euc_jisx0213.h"
295: #include "shift_jisx0213.h"
296: #include "iso2022_jp3.h"
297: #include "big5_2003.h"
298: #include "tds565.h"
299: #include "atarist.h"
300: #include "riscos1.h"
301: #endif
302:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>