Annotation of embedaddon/libiconv/libcharset/lib/localcharset.c, revision 1.1.1.3

1.1       misho       1: /* Determine a canonical name for the current locale's character encoding.
                      2: 
1.1.1.3 ! misho       3:    Copyright (C) 2000-2006, 2008-2018 Free Software Foundation, Inc.
1.1       misho       4: 
                      5:    This program is free software; you can redistribute it and/or modify it
                      6:    under the terms of the GNU Library General Public License as published
                      7:    by the Free Software Foundation; either version 2, or (at your option)
                      8:    any later version.
                      9: 
                     10:    This program is distributed in the hope that it will be useful,
                     11:    but WITHOUT ANY WARRANTY; without even the implied warranty of
                     12:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     13:    Library General Public License for more details.
                     14: 
1.1.1.3 ! misho      15:    You should have received a copy of the GNU Library General Public License
        !            16:    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
1.1       misho      17: 
                     18: /* Written by Bruno Haible <bruno@clisp.org>.  */
                     19: 
                     20: #include <config.h>
                     21: 
                     22: /* Specification.  */
                     23: #include "localcharset.h"
                     24: 
                     25: #include <stddef.h>
                     26: #include <stdio.h>
                     27: #include <string.h>
                     28: #include <stdlib.h>
                     29: 
                     30: #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
1.1.1.3 ! misho      31: # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
1.1       misho      32: #endif
                     33: 
1.1.1.3 ! misho      34: #if defined _WIN32 && !defined __CYGWIN__
        !            35: # define WINDOWS_NATIVE
        !            36: # include <locale.h>
1.1       misho      37: #endif
                     38: 
                     39: #if defined __EMX__
                     40: /* Assume EMX program runs on OS/2, even if compiled under DOS.  */
                     41: # ifndef OS2
                     42: #  define OS2
                     43: # endif
                     44: #endif
                     45: 
1.1.1.3 ! misho      46: #if !defined WINDOWS_NATIVE
1.1       misho      47: # if HAVE_LANGINFO_CODESET
                     48: #  include <langinfo.h>
                     49: # else
1.1.1.3 ! misho      50: #  if 0 /* see comment regarding use of setlocale(), below */
1.1       misho      51: #   include <locale.h>
                     52: #  endif
                     53: # endif
                     54: # ifdef __CYGWIN__
                     55: #  define WIN32_LEAN_AND_MEAN
                     56: #  include <windows.h>
                     57: # endif
1.1.1.3 ! misho      58: #elif defined WINDOWS_NATIVE
1.1       misho      59: # define WIN32_LEAN_AND_MEAN
                     60: # include <windows.h>
                     61: #endif
                     62: #if defined OS2
                     63: # define INCL_DOS
                     64: # include <os2.h>
                     65: #endif
                     66: 
1.1.1.3 ! misho      67: /* For MB_CUR_MAX_L */
        !            68: #if defined DARWIN7
        !            69: # include <xlocale.h>
1.1       misho      70: #endif
                     71: 
                     72: 
1.1.1.3 ! misho      73: #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
1.1.1.2   misho      74: 
1.1.1.3 ! misho      75: /* On these platforms, we use a mapping from non-canonical encoding name
        !            76:    to GNU canonical encoding name.  */
1.1       misho      77: 
1.1.1.3 ! misho      78: /* With glibc-2.1 or newer, we don't need any canonicalization,
        !            79:    because glibc has iconv and both glibc and libiconv support all
        !            80:    GNU canonical names directly.  */
        !            81: # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
1.1       misho      82: 
1.1.1.3 ! misho      83: struct table_entry
1.1       misho      84: {
1.1.1.3 ! misho      85:   const char alias[11+1];
        !            86:   const char canonical[11+1];
        !            87: };
        !            88: 
        !            89: /* Table of platform-dependent mappings, sorted in ascending order.  */
        !            90: static const struct table_entry alias_table[] =
        !            91:   {
        !            92: #  if defined __FreeBSD__                                   /* FreeBSD */
        !            93:   /*{ "ARMSCII-8",  "ARMSCII-8" },*/
        !            94:     { "Big5",       "BIG5" },
        !            95:     { "Big5HKSCS",  "BIG5-HKSCS" },
        !            96:     { "C",          "ASCII" },
        !            97:   /*{ "CP1131",     "CP1131" },*/
        !            98:   /*{ "CP1251",     "CP1251" },*/
        !            99:   /*{ "CP866",      "CP866" },*/
        !           100:   /*{ "GB18030",    "GB18030" },*/
        !           101:   /*{ "GB2312",     "GB2312" },*/
        !           102:   /*{ "GBK",        "GBK" },*/
        !           103:   /*{ "ISCII-DEV",  "?" },*/
        !           104:     { "ISO8859-1",  "ISO-8859-1" },
        !           105:     { "ISO8859-13", "ISO-8859-13" },
        !           106:     { "ISO8859-15", "ISO-8859-15" },
        !           107:     { "ISO8859-2",  "ISO-8859-2" },
        !           108:     { "ISO8859-4",  "ISO-8859-4" },
        !           109:     { "ISO8859-5",  "ISO-8859-5" },
        !           110:     { "ISO8859-7",  "ISO-8859-7" },
        !           111:     { "ISO8859-9",  "ISO-8859-9" },
        !           112:   /*{ "KOI8-R",     "KOI8-R" },*/
        !           113:   /*{ "KOI8-U",     "KOI8-U" },*/
        !           114:     { "SJIS",       "SHIFT_JIS" },
        !           115:     { "US-ASCII",   "ASCII" },
        !           116:     { "eucCN",      "GB2312" },
        !           117:     { "eucJP",      "EUC-JP" },
        !           118:     { "eucKR",      "EUC-KR" }
        !           119: #   define alias_table_defined
        !           120: #  endif
        !           121: #  if defined __NetBSD__                                    /* NetBSD */
        !           122:     { "646",        "ASCII" },
        !           123:   /*{ "ARMSCII-8",  "ARMSCII-8" },*/
        !           124:   /*{ "BIG5",       "BIG5" },*/
        !           125:     { "Big5-HKSCS", "BIG5-HKSCS" },
        !           126:   /*{ "CP1251",     "CP1251" },*/
        !           127:   /*{ "CP866",      "CP866" },*/
        !           128:   /*{ "GB18030",    "GB18030" },*/
        !           129:   /*{ "GB2312",     "GB2312" },*/
        !           130:     { "ISO8859-1",  "ISO-8859-1" },
        !           131:     { "ISO8859-13", "ISO-8859-13" },
        !           132:     { "ISO8859-15", "ISO-8859-15" },
        !           133:     { "ISO8859-2",  "ISO-8859-2" },
        !           134:     { "ISO8859-4",  "ISO-8859-4" },
        !           135:     { "ISO8859-5",  "ISO-8859-5" },
        !           136:     { "ISO8859-7",  "ISO-8859-7" },
        !           137:   /*{ "KOI8-R",     "KOI8-R" },*/
        !           138:   /*{ "KOI8-U",     "KOI8-U" },*/
        !           139:   /*{ "PT154",      "PT154" },*/
        !           140:     { "SJIS",       "SHIFT_JIS" },
        !           141:     { "eucCN",      "GB2312" },
        !           142:     { "eucJP",      "EUC-JP" },
        !           143:     { "eucKR",      "EUC-KR" },
        !           144:     { "eucTW",      "EUC-TW" }
        !           145: #   define alias_table_defined
        !           146: #  endif
        !           147: #  if defined __OpenBSD__                                   /* OpenBSD */
        !           148:     { "646",        "ASCII" },
        !           149:     { "ISO8859-1",  "ISO-8859-1" },
        !           150:     { "ISO8859-13", "ISO-8859-13" },
        !           151:     { "ISO8859-15", "ISO-8859-15" },
        !           152:     { "ISO8859-2",  "ISO-8859-2" },
        !           153:     { "ISO8859-4",  "ISO-8859-4" },
        !           154:     { "ISO8859-5",  "ISO-8859-5" },
        !           155:     { "ISO8859-7",  "ISO-8859-7" }
        !           156: #   define alias_table_defined
        !           157: #  endif
        !           158: #  if defined __APPLE__ && defined __MACH__                 /* Mac OS X */
        !           159:     /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
        !           160:        useless:
        !           161:        - It returns the empty string when LANG is set to a locale of the
        !           162:          form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
        !           163:          LC_CTYPE file.
        !           164:        - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
        !           165:          the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
        !           166:        - The documentation says:
        !           167:            "... all code that calls BSD system routines should ensure
        !           168:             that the const *char parameters of these routines are in UTF-8
        !           169:             encoding. All BSD system functions expect their string
        !           170:             parameters to be in UTF-8 encoding and nothing else."
        !           171:          It also says
        !           172:            "An additional caveat is that string parameters for files,
        !           173:             paths, and other file-system entities must be in canonical
        !           174:             UTF-8. In a canonical UTF-8 Unicode string, all decomposable
        !           175:             characters are decomposed ..."
        !           176:          but this is not true: You can pass non-decomposed UTF-8 strings
        !           177:          to file system functions, and it is the OS which will convert
        !           178:          them to decomposed UTF-8 before accessing the file system.
        !           179:        - The Apple Terminal application displays UTF-8 by default.
        !           180:        - However, other applications are free to use different encodings:
        !           181:          - xterm uses ISO-8859-1 by default.
        !           182:          - TextEdit uses MacRoman by default.
        !           183:        We prefer UTF-8 over decomposed UTF-8-MAC because one should
        !           184:        minimize the use of decomposed Unicode. Unfortunately, through the
        !           185:        Darwin file system, decomposed UTF-8 strings are leaked into user
        !           186:        space nevertheless.
        !           187:        Then there are also the locales with encodings other than US-ASCII
        !           188:        and UTF-8. These locales can be occasionally useful to users (e.g.
        !           189:        when grepping through ISO-8859-1 encoded text files), when all their
        !           190:        file names are in US-ASCII.
        !           191:      */
        !           192:     { "ARMSCII-8",  "ARMSCII-8" },
        !           193:     { "Big5",       "BIG5" },
        !           194:     { "Big5HKSCS",  "BIG5-HKSCS" },
        !           195:     { "CP1131",     "CP1131" },
        !           196:     { "CP1251",     "CP1251" },
        !           197:     { "CP866",      "CP866" },
        !           198:     { "CP949",      "CP949" },
        !           199:     { "GB18030",    "GB18030" },
        !           200:     { "GB2312",     "GB2312" },
        !           201:     { "GBK",        "GBK" },
        !           202:   /*{ "ISCII-DEV",  "?" },*/
        !           203:     { "ISO8859-1",  "ISO-8859-1" },
        !           204:     { "ISO8859-13", "ISO-8859-13" },
        !           205:     { "ISO8859-15", "ISO-8859-15" },
        !           206:     { "ISO8859-2",  "ISO-8859-2" },
        !           207:     { "ISO8859-4",  "ISO-8859-4" },
        !           208:     { "ISO8859-5",  "ISO-8859-5" },
        !           209:     { "ISO8859-7",  "ISO-8859-7" },
        !           210:     { "ISO8859-9",  "ISO-8859-9" },
        !           211:     { "KOI8-R",     "KOI8-R" },
        !           212:     { "KOI8-U",     "KOI8-U" },
        !           213:     { "PT154",      "PT154" },
        !           214:     { "SJIS",       "SHIFT_JIS" },
        !           215:     { "eucCN",      "GB2312" },
        !           216:     { "eucJP",      "EUC-JP" },
        !           217:     { "eucKR",      "EUC-KR" }
        !           218: #   define alias_table_defined
        !           219: #  endif
        !           220: #  if defined _AIX                                          /* AIX */
        !           221:   /*{ "GBK",        "GBK" },*/
        !           222:     { "IBM-1046",   "CP1046" },
        !           223:     { "IBM-1124",   "CP1124" },
        !           224:     { "IBM-1129",   "CP1129" },
        !           225:     { "IBM-1252",   "CP1252" },
        !           226:     { "IBM-850",    "CP850" },
        !           227:     { "IBM-856",    "CP856" },
        !           228:     { "IBM-921",    "ISO-8859-13" },
        !           229:     { "IBM-922",    "CP922" },
        !           230:     { "IBM-932",    "CP932" },
        !           231:     { "IBM-943",    "CP943" },
        !           232:     { "IBM-eucCN",  "GB2312" },
        !           233:     { "IBM-eucJP",  "EUC-JP" },
        !           234:     { "IBM-eucKR",  "EUC-KR" },
        !           235:     { "IBM-eucTW",  "EUC-TW" },
        !           236:     { "ISO8859-1",  "ISO-8859-1" },
        !           237:     { "ISO8859-15", "ISO-8859-15" },
        !           238:     { "ISO8859-2",  "ISO-8859-2" },
        !           239:     { "ISO8859-5",  "ISO-8859-5" },
        !           240:     { "ISO8859-6",  "ISO-8859-6" },
        !           241:     { "ISO8859-7",  "ISO-8859-7" },
        !           242:     { "ISO8859-8",  "ISO-8859-8" },
        !           243:     { "ISO8859-9",  "ISO-8859-9" },
        !           244:     { "TIS-620",    "TIS-620" },
        !           245:   /*{ "UTF-8",      "UTF-8" },*/
        !           246:     { "big5",       "BIG5" }
        !           247: #   define alias_table_defined
        !           248: #  endif
        !           249: #  if defined __hpux                                        /* HP-UX */
        !           250:     { "SJIS",      "SHIFT_JIS" },
        !           251:     { "arabic8",   "HP-ARABIC8" },
        !           252:     { "big5",      "BIG5" },
        !           253:     { "cp1251",    "CP1251" },
        !           254:     { "eucJP",     "EUC-JP" },
        !           255:     { "eucKR",     "EUC-KR" },
        !           256:     { "eucTW",     "EUC-TW" },
        !           257:     { "gb18030",   "GB18030" },
        !           258:     { "greek8",    "HP-GREEK8" },
        !           259:     { "hebrew8",   "HP-HEBREW8" },
        !           260:     { "hkbig5",    "BIG5-HKSCS" },
        !           261:     { "hp15CN",    "GB2312" },
        !           262:     { "iso88591",  "ISO-8859-1" },
        !           263:     { "iso885913", "ISO-8859-13" },
        !           264:     { "iso885915", "ISO-8859-15" },
        !           265:     { "iso88592",  "ISO-8859-2" },
        !           266:     { "iso88594",  "ISO-8859-4" },
        !           267:     { "iso88595",  "ISO-8859-5" },
        !           268:     { "iso88596",  "ISO-8859-6" },
        !           269:     { "iso88597",  "ISO-8859-7" },
        !           270:     { "iso88598",  "ISO-8859-8" },
        !           271:     { "iso88599",  "ISO-8859-9" },
        !           272:     { "kana8",     "HP-KANA8" },
        !           273:     { "koi8r",     "KOI8-R" },
        !           274:     { "roman8",    "HP-ROMAN8" },
        !           275:     { "tis620",    "TIS-620" },
        !           276:     { "turkish8",  "HP-TURKISH8" },
        !           277:     { "utf8",      "UTF-8" }
        !           278: #   define alias_table_defined
        !           279: #  endif
        !           280: #  if defined __sgi                                         /* IRIX */
        !           281:     { "ISO8859-1",  "ISO-8859-1" },
        !           282:     { "ISO8859-15", "ISO-8859-15" },
        !           283:     { "ISO8859-2",  "ISO-8859-2" },
        !           284:     { "ISO8859-5",  "ISO-8859-5" },
        !           285:     { "ISO8859-7",  "ISO-8859-7" },
        !           286:     { "ISO8859-9",  "ISO-8859-9" },
        !           287:     { "eucCN",      "GB2312" },
        !           288:     { "eucJP",      "EUC-JP" },
        !           289:     { "eucKR",      "EUC-KR" },
        !           290:     { "eucTW",      "EUC-TW" }
        !           291: #   define alias_table_defined
        !           292: #  endif
        !           293: #  if defined __osf__                                       /* OSF/1 */
        !           294:   /*{ "GBK",        "GBK" },*/
        !           295:     { "ISO8859-1",  "ISO-8859-1" },
        !           296:     { "ISO8859-15", "ISO-8859-15" },
        !           297:     { "ISO8859-2",  "ISO-8859-2" },
        !           298:     { "ISO8859-4",  "ISO-8859-4" },
        !           299:     { "ISO8859-5",  "ISO-8859-5" },
        !           300:     { "ISO8859-7",  "ISO-8859-7" },
        !           301:     { "ISO8859-8",  "ISO-8859-8" },
        !           302:     { "ISO8859-9",  "ISO-8859-9" },
        !           303:     { "KSC5601",    "CP949" },
        !           304:     { "SJIS",       "SHIFT_JIS" },
        !           305:     { "TACTIS",     "TIS-620" },
        !           306:   /*{ "UTF-8",      "UTF-8" },*/
        !           307:     { "big5",       "BIG5" },
        !           308:     { "cp850",      "CP850" },
        !           309:     { "dechanyu",   "DEC-HANYU" },
        !           310:     { "dechanzi",   "GB2312" },
        !           311:     { "deckanji",   "DEC-KANJI" },
        !           312:     { "deckorean",  "EUC-KR" },
        !           313:     { "eucJP",      "EUC-JP" },
        !           314:     { "eucKR",      "EUC-KR" },
        !           315:     { "eucTW",      "EUC-TW" },
        !           316:     { "sdeckanji",  "EUC-JP" }
        !           317: #   define alias_table_defined
        !           318: #  endif
        !           319: #  if defined __sun                                         /* Solaris */
        !           320:     { "5601",        "EUC-KR" },
        !           321:     { "646",         "ASCII" },
        !           322:   /*{ "BIG5",        "BIG5" },*/
        !           323:     { "Big5-HKSCS",  "BIG5-HKSCS" },
        !           324:     { "GB18030",     "GB18030" },
        !           325:   /*{ "GBK",         "GBK" },*/
        !           326:     { "ISO8859-1",   "ISO-8859-1" },
        !           327:     { "ISO8859-11",  "TIS-620" },
        !           328:     { "ISO8859-13",  "ISO-8859-13" },
        !           329:     { "ISO8859-15",  "ISO-8859-15" },
        !           330:     { "ISO8859-2",   "ISO-8859-2" },
        !           331:     { "ISO8859-3",   "ISO-8859-3" },
        !           332:     { "ISO8859-4",   "ISO-8859-4" },
        !           333:     { "ISO8859-5",   "ISO-8859-5" },
        !           334:     { "ISO8859-6",   "ISO-8859-6" },
        !           335:     { "ISO8859-7",   "ISO-8859-7" },
        !           336:     { "ISO8859-8",   "ISO-8859-8" },
        !           337:     { "ISO8859-9",   "ISO-8859-9" },
        !           338:     { "PCK",         "SHIFT_JIS" },
        !           339:     { "TIS620.2533", "TIS-620" },
        !           340:   /*{ "UTF-8",       "UTF-8" },*/
        !           341:     { "ansi-1251",   "CP1251" },
        !           342:     { "cns11643",    "EUC-TW" },
        !           343:     { "eucJP",       "EUC-JP" },
        !           344:     { "gb2312",      "GB2312" },
        !           345:     { "koi8-r",      "KOI8-R" }
        !           346: #   define alias_table_defined
        !           347: #  endif
        !           348: #  if defined __minix                                       /* Minix */
        !           349:     { "646", "ASCII" }
        !           350: #   define alias_table_defined
        !           351: #  endif
        !           352: #  if defined WINDOWS_NATIVE || defined __CYGWIN__          /* Windows */
        !           353:     { "CP1361",  "JOHAB" },
        !           354:     { "CP20127", "ASCII" },
        !           355:     { "CP20866", "KOI8-R" },
        !           356:     { "CP20936", "GB2312" },
        !           357:     { "CP21866", "KOI8-RU" },
        !           358:     { "CP28591", "ISO-8859-1" },
        !           359:     { "CP28592", "ISO-8859-2" },
        !           360:     { "CP28593", "ISO-8859-3" },
        !           361:     { "CP28594", "ISO-8859-4" },
        !           362:     { "CP28595", "ISO-8859-5" },
        !           363:     { "CP28596", "ISO-8859-6" },
        !           364:     { "CP28597", "ISO-8859-7" },
        !           365:     { "CP28598", "ISO-8859-8" },
        !           366:     { "CP28599", "ISO-8859-9" },
        !           367:     { "CP28605", "ISO-8859-15" },
        !           368:     { "CP38598", "ISO-8859-8" },
        !           369:     { "CP51932", "EUC-JP" },
        !           370:     { "CP51936", "GB2312" },
        !           371:     { "CP51949", "EUC-KR" },
        !           372:     { "CP51950", "EUC-TW" },
        !           373:     { "CP54936", "GB18030" },
        !           374:     { "CP65001", "UTF-8" },
        !           375:     { "CP936",   "GBK" }
        !           376: #   define alias_table_defined
        !           377: #  endif
        !           378: #  if defined OS2                                           /* OS/2 */
        !           379:     /* The list of encodings is taken from "List of OS/2 Codepages"
        !           380:        by Alex Taylor:
        !           381:        <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
        !           382:        See also "IBM Globalization - Code page identifiers":
        !           383:        <https://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>.  */
        !           384:     { "CP1089", "ISO-8859-6" },
        !           385:     { "CP1208", "UTF-8" },
        !           386:     { "CP1381", "GB2312" },
        !           387:     { "CP1386", "GBK" },
        !           388:     { "CP3372", "EUC-JP" },
        !           389:     { "CP813",  "ISO-8859-7" },
        !           390:     { "CP819",  "ISO-8859-1" },
        !           391:     { "CP878",  "KOI8-R" },
        !           392:     { "CP912",  "ISO-8859-2" },
        !           393:     { "CP913",  "ISO-8859-3" },
        !           394:     { "CP914",  "ISO-8859-4" },
        !           395:     { "CP915",  "ISO-8859-5" },
        !           396:     { "CP916",  "ISO-8859-8" },
        !           397:     { "CP920",  "ISO-8859-9" },
        !           398:     { "CP921",  "ISO-8859-13" },
        !           399:     { "CP923",  "ISO-8859-15" },
        !           400:     { "CP954",  "EUC-JP" },
        !           401:     { "CP964",  "EUC-TW" },
        !           402:     { "CP970",  "EUC-KR" }
        !           403: #   define alias_table_defined
        !           404: #  endif
        !           405: #  if defined VMS                                           /* OpenVMS */
        !           406:     /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
        !           407:        "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
        !           408:        section 10.7 "Handling Different Character Sets".  */
        !           409:     { "DECHANYU",  "DEC-HANYU" },
        !           410:     { "DECHANZI",  "GB2312" },
        !           411:     { "DECKANJI",  "DEC-KANJI" },
        !           412:     { "DECKOREAN", "EUC-KR" },
        !           413:     { "ISO8859-1", "ISO-8859-1" },
        !           414:     { "ISO8859-2", "ISO-8859-2" },
        !           415:     { "ISO8859-5", "ISO-8859-5" },
        !           416:     { "ISO8859-7", "ISO-8859-7" },
        !           417:     { "ISO8859-8", "ISO-8859-8" },
        !           418:     { "ISO8859-9", "ISO-8859-9" },
        !           419:     { "SDECKANJI", "EUC-JP" },
        !           420:     { "SJIS",      "SHIFT_JIS" },
        !           421:     { "eucJP",     "EUC-JP" },
        !           422:     { "eucTW",     "EUC-TW" }
        !           423: #   define alias_table_defined
        !           424: #  endif
        !           425: #  ifndef alias_table_defined
        !           426:     /* Just a dummy entry, to avoid a C syntax error.  */
        !           427:     { "", "" }
        !           428: #  endif
        !           429:   };
1.1.1.2   misho     430: 
1.1.1.3 ! misho     431: # endif
1.1       misho     432: 
                    433: #else
                    434: 
1.1.1.3 ! misho     435: /* On these platforms, we use a mapping from locale name to GNU canonical
        !           436:    encoding name.  */
        !           437: 
        !           438: struct table_entry
        !           439: {
        !           440:   const char locale[17+1];
        !           441:   const char canonical[11+1];
        !           442: };
        !           443: 
        !           444: /* Table of platform-dependent mappings, sorted in ascending order.  */
        !           445: static const struct table_entry locale_table[] =
        !           446:   {
        !           447: # if defined __FreeBSD__                                    /* FreeBSD 4.2 */
        !           448:     { "cs_CZ.ISO_8859-2",  "ISO-8859-2" },
        !           449:     { "da_DK.DIS_8859-15", "ISO-8859-15" },
        !           450:     { "da_DK.ISO_8859-1",  "ISO-8859-1" },
        !           451:     { "de_AT.DIS_8859-15", "ISO-8859-15" },
        !           452:     { "de_AT.ISO_8859-1",  "ISO-8859-1" },
        !           453:     { "de_CH.DIS_8859-15", "ISO-8859-15" },
        !           454:     { "de_CH.ISO_8859-1",  "ISO-8859-1" },
        !           455:     { "de_DE.DIS_8859-15", "ISO-8859-15" },
        !           456:     { "de_DE.ISO_8859-1",  "ISO-8859-1" },
        !           457:     { "en_AU.DIS_8859-15", "ISO-8859-15" },
        !           458:     { "en_AU.ISO_8859-1",  "ISO-8859-1" },
        !           459:     { "en_CA.DIS_8859-15", "ISO-8859-15" },
        !           460:     { "en_CA.ISO_8859-1",  "ISO-8859-1" },
        !           461:     { "en_GB.DIS_8859-15", "ISO-8859-15" },
        !           462:     { "en_GB.ISO_8859-1",  "ISO-8859-1" },
        !           463:     { "en_US.DIS_8859-15", "ISO-8859-15" },
        !           464:     { "en_US.ISO_8859-1",  "ISO-8859-1" },
        !           465:     { "es_ES.DIS_8859-15", "ISO-8859-15" },
        !           466:     { "es_ES.ISO_8859-1",  "ISO-8859-1" },
        !           467:     { "fi_FI.DIS_8859-15", "ISO-8859-15" },
        !           468:     { "fi_FI.ISO_8859-1",  "ISO-8859-1" },
        !           469:     { "fr_BE.DIS_8859-15", "ISO-8859-15" },
        !           470:     { "fr_BE.ISO_8859-1",  "ISO-8859-1" },
        !           471:     { "fr_CA.DIS_8859-15", "ISO-8859-15" },
        !           472:     { "fr_CA.ISO_8859-1",  "ISO-8859-1" },
        !           473:     { "fr_CH.DIS_8859-15", "ISO-8859-15" },
        !           474:     { "fr_CH.ISO_8859-1",  "ISO-8859-1" },
        !           475:     { "fr_FR.DIS_8859-15", "ISO-8859-15" },
        !           476:     { "fr_FR.ISO_8859-1",  "ISO-8859-1" },
        !           477:     { "hr_HR.ISO_8859-2",  "ISO-8859-2" },
        !           478:     { "hu_HU.ISO_8859-2",  "ISO-8859-2" },
        !           479:     { "is_IS.DIS_8859-15", "ISO-8859-15" },
        !           480:     { "is_IS.ISO_8859-1",  "ISO-8859-1" },
        !           481:     { "it_CH.DIS_8859-15", "ISO-8859-15" },
        !           482:     { "it_CH.ISO_8859-1",  "ISO-8859-1" },
        !           483:     { "it_IT.DIS_8859-15", "ISO-8859-15" },
        !           484:     { "it_IT.ISO_8859-1",  "ISO-8859-1" },
        !           485:     { "ja_JP.EUC",         "EUC-JP" },
        !           486:     { "ja_JP.SJIS",        "SHIFT_JIS" },
        !           487:     { "ja_JP.Shift_JIS",   "SHIFT_JIS" },
        !           488:     { "ko_KR.EUC",         "EUC-KR" },
        !           489:     { "la_LN.ASCII",       "ASCII" },
        !           490:     { "la_LN.DIS_8859-15", "ISO-8859-15" },
        !           491:     { "la_LN.ISO_8859-1",  "ISO-8859-1" },
        !           492:     { "la_LN.ISO_8859-2",  "ISO-8859-2" },
        !           493:     { "la_LN.ISO_8859-4",  "ISO-8859-4" },
        !           494:     { "lt_LN.ASCII",       "ASCII" },
        !           495:     { "lt_LN.DIS_8859-15", "ISO-8859-15" },
        !           496:     { "lt_LN.ISO_8859-1",  "ISO-8859-1" },
        !           497:     { "lt_LN.ISO_8859-2",  "ISO-8859-2" },
        !           498:     { "lt_LT.ISO_8859-4",  "ISO-8859-4" },
        !           499:     { "nl_BE.DIS_8859-15", "ISO-8859-15" },
        !           500:     { "nl_BE.ISO_8859-1",  "ISO-8859-1" },
        !           501:     { "nl_NL.DIS_8859-15", "ISO-8859-15" },
        !           502:     { "nl_NL.ISO_8859-1",  "ISO-8859-1" },
        !           503:     { "no_NO.DIS_8859-15", "ISO-8859-15" },
        !           504:     { "no_NO.ISO_8859-1",  "ISO-8859-1" },
        !           505:     { "pl_PL.ISO_8859-2",  "ISO-8859-2" },
        !           506:     { "pt_PT.DIS_8859-15", "ISO-8859-15" },
        !           507:     { "pt_PT.ISO_8859-1",  "ISO-8859-1" },
        !           508:     { "ru_RU.CP866",       "CP866" },
        !           509:     { "ru_RU.ISO_8859-5",  "ISO-8859-5" },
        !           510:     { "ru_RU.KOI8-R",      "KOI8-R" },
        !           511:     { "ru_SU.CP866",       "CP866" },
        !           512:     { "ru_SU.ISO_8859-5",  "ISO-8859-5" },
        !           513:     { "ru_SU.KOI8-R",      "KOI8-R" },
        !           514:     { "sl_SI.ISO_8859-2",  "ISO-8859-2" },
        !           515:     { "sv_SE.DIS_8859-15", "ISO-8859-15" },
        !           516:     { "sv_SE.ISO_8859-1",  "ISO-8859-1" },
        !           517:     { "uk_UA.KOI8-U",      "KOI8-U" },
        !           518:     { "zh_CN.EUC",         "GB2312" },
        !           519:     { "zh_TW.BIG5",        "BIG5" },
        !           520:     { "zh_TW.Big5",        "BIG5" }
        !           521: #  define locale_table_defined
1.1       misho     522: # endif
1.1.1.3 ! misho     523: # if defined __DJGPP__                                      /* DOS / DJGPP 2.03 */
        !           524:     /* The encodings given here may not all be correct.
        !           525:        If you find that the encoding given for your language and
        !           526:        country is not the one your DOS machine actually uses, just
        !           527:        correct it in this file, and send a mail to
        !           528:        Juan Manuel Guerrero <juan.guerrero@gmx.de>
        !           529:        and <bug-gnulib@gnu.org>.  */
        !           530:     { "C",     "ASCII" },
        !           531:     { "ar",    "CP864" },
        !           532:     { "ar_AE", "CP864" },
        !           533:     { "ar_DZ", "CP864" },
        !           534:     { "ar_EG", "CP864" },
        !           535:     { "ar_IQ", "CP864" },
        !           536:     { "ar_IR", "CP864" },
        !           537:     { "ar_JO", "CP864" },
        !           538:     { "ar_KW", "CP864" },
        !           539:     { "ar_MA", "CP864" },
        !           540:     { "ar_OM", "CP864" },
        !           541:     { "ar_QA", "CP864" },
        !           542:     { "ar_SA", "CP864" },
        !           543:     { "ar_SY", "CP864" },
        !           544:     { "be",    "CP866" },
        !           545:     { "be_BE", "CP866" },
        !           546:     { "bg",    "CP866" }, /* not CP855 ?? */
        !           547:     { "bg_BG", "CP866" }, /* not CP855 ?? */
        !           548:     { "ca",    "CP850" },
        !           549:     { "ca_ES", "CP850" },
        !           550:     { "cs",    "CP852" },
        !           551:     { "cs_CZ", "CP852" },
        !           552:     { "da",    "CP865" }, /* not CP850 ?? */
        !           553:     { "da_DK", "CP865" }, /* not CP850 ?? */
        !           554:     { "de",    "CP850" },
        !           555:     { "de_AT", "CP850" },
        !           556:     { "de_CH", "CP850" },
        !           557:     { "de_DE", "CP850" },
        !           558:     { "el",    "CP869" },
        !           559:     { "el_GR", "CP869" },
        !           560:     { "en",    "CP850" },
        !           561:     { "en_AU", "CP850" }, /* not CP437 ?? */
        !           562:     { "en_CA", "CP850" },
        !           563:     { "en_GB", "CP850" },
        !           564:     { "en_NZ", "CP437" },
        !           565:     { "en_US", "CP437" },
        !           566:     { "en_ZA", "CP850" }, /* not CP437 ?? */
        !           567:     { "eo",    "CP850" },
        !           568:     { "eo_EO", "CP850" },
        !           569:     { "es",    "CP850" },
        !           570:     { "es_AR", "CP850" },
        !           571:     { "es_BO", "CP850" },
        !           572:     { "es_CL", "CP850" },
        !           573:     { "es_CO", "CP850" },
        !           574:     { "es_CR", "CP850" },
        !           575:     { "es_CU", "CP850" },
        !           576:     { "es_DO", "CP850" },
        !           577:     { "es_EC", "CP850" },
        !           578:     { "es_ES", "CP850" },
        !           579:     { "es_GT", "CP850" },
        !           580:     { "es_HN", "CP850" },
        !           581:     { "es_MX", "CP850" },
        !           582:     { "es_NI", "CP850" },
        !           583:     { "es_PA", "CP850" },
        !           584:     { "es_PE", "CP850" },
        !           585:     { "es_PY", "CP850" },
        !           586:     { "es_SV", "CP850" },
        !           587:     { "es_UY", "CP850" },
        !           588:     { "es_VE", "CP850" },
        !           589:     { "et",    "CP850" },
        !           590:     { "et_EE", "CP850" },
        !           591:     { "eu",    "CP850" },
        !           592:     { "eu_ES", "CP850" },
        !           593:     { "fi",    "CP850" },
        !           594:     { "fi_FI", "CP850" },
        !           595:     { "fr",    "CP850" },
        !           596:     { "fr_BE", "CP850" },
        !           597:     { "fr_CA", "CP850" },
        !           598:     { "fr_CH", "CP850" },
        !           599:     { "fr_FR", "CP850" },
        !           600:     { "ga",    "CP850" },
        !           601:     { "ga_IE", "CP850" },
        !           602:     { "gd",    "CP850" },
        !           603:     { "gd_GB", "CP850" },
        !           604:     { "gl",    "CP850" },
        !           605:     { "gl_ES", "CP850" },
        !           606:     { "he",    "CP862" },
        !           607:     { "he_IL", "CP862" },
        !           608:     { "hr",    "CP852" },
        !           609:     { "hr_HR", "CP852" },
        !           610:     { "hu",    "CP852" },
        !           611:     { "hu_HU", "CP852" },
        !           612:     { "id",    "CP850" }, /* not CP437 ?? */
        !           613:     { "id_ID", "CP850" }, /* not CP437 ?? */
        !           614:     { "is",    "CP861" }, /* not CP850 ?? */
        !           615:     { "is_IS", "CP861" }, /* not CP850 ?? */
        !           616:     { "it",    "CP850" },
        !           617:     { "it_CH", "CP850" },
        !           618:     { "it_IT", "CP850" },
        !           619:     { "ja",    "CP932" },
        !           620:     { "ja_JP", "CP932" },
        !           621:     { "kr",    "CP949" }, /* not CP934 ?? */
        !           622:     { "kr_KR", "CP949" }, /* not CP934 ?? */
        !           623:     { "lt",    "CP775" },
        !           624:     { "lt_LT", "CP775" },
        !           625:     { "lv",    "CP775" },
        !           626:     { "lv_LV", "CP775" },
        !           627:     { "mk",    "CP866" }, /* not CP855 ?? */
        !           628:     { "mk_MK", "CP866" }, /* not CP855 ?? */
        !           629:     { "mt",    "CP850" },
        !           630:     { "mt_MT", "CP850" },
        !           631:     { "nb",    "CP865" }, /* not CP850 ?? */
        !           632:     { "nb_NO", "CP865" }, /* not CP850 ?? */
        !           633:     { "nl",    "CP850" },
        !           634:     { "nl_BE", "CP850" },
        !           635:     { "nl_NL", "CP850" },
        !           636:     { "nn",    "CP865" }, /* not CP850 ?? */
        !           637:     { "nn_NO", "CP865" }, /* not CP850 ?? */
        !           638:     { "no",    "CP865" }, /* not CP850 ?? */
        !           639:     { "no_NO", "CP865" }, /* not CP850 ?? */
        !           640:     { "pl",    "CP852" },
        !           641:     { "pl_PL", "CP852" },
        !           642:     { "pt",    "CP850" },
        !           643:     { "pt_BR", "CP850" },
        !           644:     { "pt_PT", "CP850" },
        !           645:     { "ro",    "CP852" },
        !           646:     { "ro_RO", "CP852" },
        !           647:     { "ru",    "CP866" },
        !           648:     { "ru_RU", "CP866" },
        !           649:     { "sk",    "CP852" },
        !           650:     { "sk_SK", "CP852" },
        !           651:     { "sl",    "CP852" },
        !           652:     { "sl_SI", "CP852" },
        !           653:     { "sq",    "CP852" },
        !           654:     { "sq_AL", "CP852" },
        !           655:     { "sr",    "CP852" }, /* CP852 or CP866 or CP855 ?? */
        !           656:     { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
        !           657:     { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
        !           658:     { "sv",    "CP850" },
        !           659:     { "sv_SE", "CP850" },
        !           660:     { "th",    "CP874" },
        !           661:     { "th_TH", "CP874" },
        !           662:     { "tr",    "CP857" },
        !           663:     { "tr_TR", "CP857" },
        !           664:     { "uk",    "CP1125" },
        !           665:     { "uk_UA", "CP1125" },
        !           666:     { "zh_CN", "GBK" },
        !           667:     { "zh_TW", "CP950" } /* not CP938 ?? */
        !           668: #  define locale_table_defined
        !           669: # endif
        !           670: # ifndef locale_table_defined
        !           671:     /* Just a dummy entry, to avoid a C syntax error.  */
        !           672:     { "", "" }
        !           673: # endif
        !           674:   };
1.1       misho     675: 
1.1.1.3 ! misho     676: #endif
1.1       misho     677: 
                    678: 
                    679: /* Determine the current locale's character encoding, and canonicalize it
1.1.1.3 ! misho     680:    into one of the canonical names listed in localcharset.h.
1.1       misho     681:    The result must not be freed; it is statically allocated.
                    682:    If the canonical name cannot be determined, the result is a non-canonical
                    683:    name.  */
                    684: 
                    685: #ifdef STATIC
                    686: STATIC
                    687: #endif
                    688: const char *
                    689: locale_charset (void)
                    690: {
                    691:   const char *codeset;
                    692: 
1.1.1.3 ! misho     693: #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
1.1       misho     694: 
                    695: # if HAVE_LANGINFO_CODESET
                    696: 
                    697:   /* Most systems support nl_langinfo (CODESET) nowadays.  */
                    698:   codeset = nl_langinfo (CODESET);
                    699: 
                    700: #  ifdef __CYGWIN__
1.1.1.2   misho     701:   /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always
                    702:      returns "US-ASCII".  Return the suffix of the locale name from the
                    703:      environment variables (if present) or the codepage as a number.  */
1.1       misho     704:   if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
                    705:     {
                    706:       const char *locale;
                    707:       static char buf[2 + 10 + 1];
                    708: 
                    709:       locale = getenv ("LC_ALL");
                    710:       if (locale == NULL || locale[0] == '\0')
1.1.1.2   misho     711:         {
                    712:           locale = getenv ("LC_CTYPE");
                    713:           if (locale == NULL || locale[0] == '\0')
                    714:             locale = getenv ("LANG");
                    715:         }
1.1       misho     716:       if (locale != NULL && locale[0] != '\0')
1.1.1.2   misho     717:         {
                    718:           /* If the locale name contains an encoding after the dot, return
                    719:              it.  */
                    720:           const char *dot = strchr (locale, '.');
                    721: 
                    722:           if (dot != NULL)
                    723:             {
                    724:               const char *modifier;
                    725: 
                    726:               dot++;
                    727:               /* Look for the possible @... trailer and remove it, if any.  */
                    728:               modifier = strchr (dot, '@');
                    729:               if (modifier == NULL)
                    730:                 return dot;
                    731:               if (modifier - dot < sizeof (buf))
                    732:                 {
                    733:                   memcpy (buf, dot, modifier - dot);
                    734:                   buf [modifier - dot] = '\0';
                    735:                   return buf;
                    736:                 }
                    737:             }
                    738:         }
                    739: 
1.1.1.3 ! misho     740:       /* The Windows API has a function returning the locale's codepage as a
        !           741:          number: GetACP().  This encoding is used by Cygwin, unless the user
        !           742:          has set the environment variable CYGWIN=codepage:oem (which very few
        !           743:          people do).
1.1.1.2   misho     744:          Output directed to console windows needs to be converted (to
                    745:          GetOEMCP() if the console is using a raster font, or to
                    746:          GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
                    747:          this conversion transparently (see winsup/cygwin/fhandler_console.cc),
                    748:          converting to GetConsoleOutputCP().  This leads to correct results,
                    749:          except when SetConsoleOutputCP has been called and a raster font is
                    750:          in use.  */
1.1       misho     751:       sprintf (buf, "CP%u", GetACP ());
                    752:       codeset = buf;
                    753:     }
                    754: #  endif
                    755: 
1.1.1.3 ! misho     756:   if (codeset == NULL)
        !           757:     /* The canonical name cannot be determined.  */
        !           758:     codeset = "";
1.1       misho     759: 
1.1.1.3 ! misho     760: # elif defined WINDOWS_NATIVE
1.1       misho     761: 
                    762:   static char buf[2 + 10 + 1];
                    763: 
1.1.1.3 ! misho     764:   /* The Windows API has a function returning the locale's codepage as
        !           765:      a number, but the value doesn't change according to what the
        !           766:      'setlocale' call specified.  So we use it as a last resort, in
        !           767:      case the string returned by 'setlocale' doesn't specify the
        !           768:      codepage.  */
        !           769:   char *current_locale = setlocale (LC_ALL, NULL);
        !           770:   char *pdot;
        !           771: 
        !           772:   /* If they set different locales for different categories,
        !           773:      'setlocale' will return a semi-colon separated list of locale
        !           774:      values.  To make sure we use the correct one, we choose LC_CTYPE.  */
        !           775:   if (strchr (current_locale, ';'))
        !           776:     current_locale = setlocale (LC_CTYPE, NULL);
        !           777: 
        !           778:   pdot = strrchr (current_locale, '.');
        !           779:   if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
        !           780:     sprintf (buf, "CP%s", pdot + 1);
        !           781:   else
        !           782:     {
        !           783:       /* The Windows API has a function returning the locale's codepage as a
        !           784:         number: GetACP().
        !           785:         When the output goes to a console window, it needs to be provided in
        !           786:         GetOEMCP() encoding if the console is using a raster font, or in
        !           787:         GetConsoleOutputCP() encoding if it is using a TrueType font.
        !           788:         But in GUI programs and for output sent to files and pipes, GetACP()
        !           789:         encoding is the best bet.  */
        !           790:       sprintf (buf, "CP%u", GetACP ());
        !           791:     }
1.1       misho     792:   codeset = buf;
                    793: 
1.1.1.3 ! misho     794: # elif defined OS2
1.1       misho     795: 
                    796:   const char *locale;
                    797:   static char buf[2 + 10 + 1];
                    798:   ULONG cp[3];
                    799:   ULONG cplen;
                    800: 
1.1.1.3 ! misho     801:   codeset = NULL;
        !           802: 
1.1       misho     803:   /* Allow user to override the codeset, as set in the operating system,
                    804:      with standard language environment variables.  */
                    805:   locale = getenv ("LC_ALL");
                    806:   if (locale == NULL || locale[0] == '\0')
                    807:     {
                    808:       locale = getenv ("LC_CTYPE");
                    809:       if (locale == NULL || locale[0] == '\0')
1.1.1.2   misho     810:         locale = getenv ("LANG");
1.1       misho     811:     }
                    812:   if (locale != NULL && locale[0] != '\0')
                    813:     {
                    814:       /* If the locale name contains an encoding after the dot, return it.  */
                    815:       const char *dot = strchr (locale, '.');
                    816: 
                    817:       if (dot != NULL)
1.1.1.2   misho     818:         {
                    819:           const char *modifier;
1.1       misho     820: 
1.1.1.2   misho     821:           dot++;
                    822:           /* Look for the possible @... trailer and remove it, if any.  */
                    823:           modifier = strchr (dot, '@');
                    824:           if (modifier == NULL)
                    825:             return dot;
                    826:           if (modifier - dot < sizeof (buf))
                    827:             {
                    828:               memcpy (buf, dot, modifier - dot);
                    829:               buf [modifier - dot] = '\0';
                    830:               return buf;
                    831:             }
                    832:         }
1.1       misho     833: 
1.1.1.3 ! misho     834:       /* For the POSIX locale, don't use the system's codepage.  */
        !           835:       if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
        !           836:         codeset = "";
1.1       misho     837:     }
1.1.1.3 ! misho     838: 
        !           839:   if (codeset == NULL)
1.1       misho     840:     {
                    841:       /* OS/2 has a function returning the locale's codepage as a number.  */
                    842:       if (DosQueryCp (sizeof (cp), cp, &cplen))
1.1.1.2   misho     843:         codeset = "";
1.1       misho     844:       else
1.1.1.2   misho     845:         {
                    846:           sprintf (buf, "CP%u", cp[0]);
                    847:           codeset = buf;
                    848:         }
1.1       misho     849:     }
                    850: 
1.1.1.3 ! misho     851: # else
1.1       misho     852: 
1.1.1.3 ! misho     853: #  error "Add code for other platforms here."
        !           854: 
        !           855: # endif
        !           856: 
        !           857:   /* Resolve alias.  */
        !           858:   {
        !           859: # ifdef alias_table_defined
        !           860:     /* On some platforms, UTF-8 locales are the most frequently used ones.
        !           861:        Speed up the common case and slow down the less common cases by
        !           862:        testing for this case first.  */
        !           863: #  if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
        !           864:     if (strcmp (codeset, "UTF-8") == 0)
        !           865:       goto done_table_lookup;
        !           866:     else
        !           867: #  endif
        !           868:       {
        !           869:         const struct table_entry * const table = alias_table;
        !           870:         size_t const table_size =
        !           871:           sizeof (alias_table) / sizeof (struct table_entry);
        !           872:         /* The table is sorted.  Perform a binary search.  */
        !           873:         size_t hi = table_size;
        !           874:         size_t lo = 0;
        !           875:         while (lo < hi)
        !           876:           {
        !           877:             /* Invariant:
        !           878:                for i < lo, strcmp (table[i].alias, codeset) < 0,
        !           879:                for i >= hi, strcmp (table[i].alias, codeset) > 0.  */
        !           880:             size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
        !           881:             int cmp = strcmp (table[mid].alias, codeset);
        !           882:             if (cmp < 0)
        !           883:               lo = mid + 1;
        !           884:             else if (cmp > 0)
        !           885:               hi = mid;
        !           886:             else
        !           887:               {
        !           888:                 /* Found an i with
        !           889:                      strcmp (table[i].alias, codeset) == 0.  */
        !           890:                 codeset = table[mid].canonical;
        !           891:                 goto done_table_lookup;
        !           892:               }
        !           893:           }
        !           894:       }
        !           895:     if (0)
        !           896:       done_table_lookup: ;
        !           897:     else
        !           898: # endif
        !           899:       {
        !           900:         /* Did not find it in the table.  */
        !           901:         /* On Mac OS X, all modern locales use the UTF-8 encoding.
        !           902:            BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
        !           903: # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
        !           904:         codeset = "UTF-8";
        !           905: # else
        !           906:         /* Don't return an empty string.  GNU libc and GNU libiconv interpret
        !           907:            the empty string as denoting "the locale's character encoding",
        !           908:            thus GNU libiconv would call this function a second time.  */
        !           909:         if (codeset[0] == '\0')
        !           910:           codeset = "ASCII";
        !           911: # endif
        !           912:       }
        !           913:   }
        !           914: 
        !           915: #else
        !           916: 
        !           917:   /* On old systems which lack it, use setlocale or getenv.  */
        !           918:   const char *locale = NULL;
1.1       misho     919: 
1.1.1.3 ! misho     920:   /* But most old systems don't have a complete set of locales.  Some
        !           921:      (like DJGPP) have only the C locale.  Therefore we don't use setlocale
        !           922:      here; it would return "C" when it doesn't support the locale name the
        !           923:      user has set.  */
        !           924: # if 0
        !           925:   locale = setlocale (LC_CTYPE, NULL);
        !           926: # endif
        !           927:   if (locale == NULL || locale[0] == '\0')
        !           928:     {
        !           929:       locale = getenv ("LC_ALL");
        !           930:       if (locale == NULL || locale[0] == '\0')
        !           931:         {
        !           932:           locale = getenv ("LC_CTYPE");
        !           933:           if (locale == NULL || locale[0] == '\0')
        !           934:             locale = getenv ("LANG");
        !           935:             if (locale == NULL)
        !           936:               locale = "";
        !           937:         }
        !           938:     }
        !           939: 
        !           940:   /* Map locale name to canonical encoding name.  */
        !           941:   {
        !           942: # ifdef locale_table_defined
        !           943:     const struct table_entry * const table = locale_table;
        !           944:     size_t const table_size =
        !           945:       sizeof (locale_table) / sizeof (struct table_entry);
        !           946:     /* The table is sorted.  Perform a binary search.  */
        !           947:     size_t hi = table_size;
        !           948:     size_t lo = 0;
        !           949:     while (lo < hi)
        !           950:       {
        !           951:         /* Invariant:
        !           952:            for i < lo, strcmp (table[i].locale, locale) < 0,
        !           953:            for i >= hi, strcmp (table[i].locale, locale) > 0.  */
        !           954:         size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
        !           955:         int cmp = strcmp (table[mid].locale, locale);
        !           956:         if (cmp < 0)
        !           957:           lo = mid + 1;
        !           958:         else if (cmp > 0)
        !           959:           hi = mid;
        !           960:         else
        !           961:           {
        !           962:             /* Found an i with
        !           963:                  strcmp (table[i].locale, locale) == 0.  */
        !           964:             codeset = table[mid].canonical;
        !           965:             goto done_table_lookup;
        !           966:           }
        !           967:       }
        !           968:     if (0)
        !           969:       done_table_lookup: ;
        !           970:     else
        !           971: # endif
1.1       misho     972:       {
1.1.1.3 ! misho     973:         /* Did not find it in the table.  */
        !           974:         /* On Mac OS X, all modern locales use the UTF-8 encoding.
        !           975:            BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
        !           976: # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
        !           977:         codeset = "UTF-8";
        !           978: # else
        !           979:         /* The canonical name cannot be determined.  */
        !           980:         /* Don't return an empty string.  GNU libc and GNU libiconv interpret
        !           981:            the empty string as denoting "the locale's character encoding",
        !           982:            thus GNU libiconv would call this function a second time.  */
        !           983:         codeset = "ASCII";
        !           984: # endif
1.1       misho     985:       }
1.1.1.3 ! misho     986:   }
        !           987: 
        !           988: #endif
1.1       misho     989: 
1.1.1.3 ! misho     990: #ifdef DARWIN7
        !           991:   /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
        !           992:      (the default codeset) does not work when MB_CUR_MAX is 1.  */
        !           993:   if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
1.1       misho     994:     codeset = "ASCII";
1.1.1.3 ! misho     995: #endif
1.1       misho     996: 
                    997:   return codeset;
                    998: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>