Annotation of embedaddon/libiconv/libcharset/lib/localcharset.c, revision 1.1

1.1     ! misho       1: /* Determine a canonical name for the current locale's character encoding.
        !             2: 
        !             3:    Copyright (C) 2000-2006, 2008-2009 Free Software Foundation, Inc.
        !             4: 
        !             5:    This program is free software; you can redistribute it and/or modify it
        !             6:    under the terms of the GNU Library General Public License as published
        !             7:    by the Free Software Foundation; either version 2, or (at your option)
        !             8:    any later version.
        !             9: 
        !            10:    This program is distributed in the hope that it will be useful,
        !            11:    but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            12:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            13:    Library General Public License for more details.
        !            14: 
        !            15:    You should have received a copy of the GNU Library General Public
        !            16:    License along with this program; if not, write to the Free Software
        !            17:    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
        !            18:    USA.  */
        !            19: 
        !            20: /* Written by Bruno Haible <bruno@clisp.org>.  */
        !            21: 
        !            22: #include <config.h>
        !            23: 
        !            24: /* Specification.  */
        !            25: #include "localcharset.h"
        !            26: 
        !            27: #include <stddef.h>
        !            28: #include <stdio.h>
        !            29: #include <string.h>
        !            30: #include <stdlib.h>
        !            31: 
        !            32: #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
        !            33: # define DARWIN7 /* Darwin 7 or newer, i.e. MacOS X 10.3 or newer */
        !            34: #endif
        !            35: 
        !            36: #if defined _WIN32 || defined __WIN32__
        !            37: # define WIN32_NATIVE
        !            38: #endif
        !            39: 
        !            40: #if defined __EMX__
        !            41: /* Assume EMX program runs on OS/2, even if compiled under DOS.  */
        !            42: # ifndef OS2
        !            43: #  define OS2
        !            44: # endif
        !            45: #endif
        !            46: 
        !            47: #if !defined WIN32_NATIVE
        !            48: # if HAVE_LANGINFO_CODESET
        !            49: #  include <langinfo.h>
        !            50: # else
        !            51: #  if 0 /* see comment below */
        !            52: #   include <locale.h>
        !            53: #  endif
        !            54: # endif
        !            55: # ifdef __CYGWIN__
        !            56: #  define WIN32_LEAN_AND_MEAN
        !            57: #  include <windows.h>
        !            58: # endif
        !            59: #elif defined WIN32_NATIVE
        !            60: # define WIN32_LEAN_AND_MEAN
        !            61: # include <windows.h>
        !            62: #endif
        !            63: #if defined OS2
        !            64: # define INCL_DOS
        !            65: # include <os2.h>
        !            66: #endif
        !            67: 
        !            68: #if ENABLE_RELOCATABLE
        !            69: # include "relocatable.h"
        !            70: #else
        !            71: # define relocate(pathname) (pathname)
        !            72: #endif
        !            73: 
        !            74: /* Get LIBDIR.  */
        !            75: #ifndef LIBDIR
        !            76: # include "configmake.h"
        !            77: #endif
        !            78: 
        !            79: #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
        !            80:   /* Win32, Cygwin, OS/2, DOS */
        !            81: # define ISSLASH(C) ((C) == '/' || (C) == '\\')
        !            82: #endif
        !            83: 
        !            84: #ifndef DIRECTORY_SEPARATOR
        !            85: # define DIRECTORY_SEPARATOR '/'
        !            86: #endif
        !            87: 
        !            88: #ifndef ISSLASH
        !            89: # define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
        !            90: #endif
        !            91: 
        !            92: #if HAVE_DECL_GETC_UNLOCKED
        !            93: # undef getc
        !            94: # define getc getc_unlocked
        !            95: #endif
        !            96: 
        !            97: /* The following static variable is declared 'volatile' to avoid a
        !            98:    possible multithread problem in the function get_charset_aliases. If we
        !            99:    are running in a threaded environment, and if two threads initialize
        !           100:    'charset_aliases' simultaneously, both will produce the same value,
        !           101:    and everything will be ok if the two assignments to 'charset_aliases'
        !           102:    are atomic. But I don't know what will happen if the two assignments mix.  */
        !           103: #if __STDC__ != 1
        !           104: # define volatile /* empty */
        !           105: #endif
        !           106: /* Pointer to the contents of the charset.alias file, if it has already been
        !           107:    read, else NULL.  Its format is:
        !           108:    ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0'  */
        !           109: static const char * volatile charset_aliases;
        !           110: 
        !           111: /* Return a pointer to the contents of the charset.alias file.  */
        !           112: static const char *
        !           113: get_charset_aliases (void)
        !           114: {
        !           115:   const char *cp;
        !           116: 
        !           117:   cp = charset_aliases;
        !           118:   if (cp == NULL)
        !           119:     {
        !           120: #if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__)
        !           121:       FILE *fp;
        !           122:       const char *dir;
        !           123:       const char *base = "charset.alias";
        !           124:       char *file_name;
        !           125: 
        !           126:       /* Make it possible to override the charset.alias location.  This is
        !           127:         necessary for running the testsuite before "make install".  */
        !           128:       dir = getenv ("CHARSETALIASDIR");
        !           129:       if (dir == NULL || dir[0] == '\0')
        !           130:        dir = relocate (LIBDIR);
        !           131: 
        !           132:       /* Concatenate dir and base into freshly allocated file_name.  */
        !           133:       {
        !           134:        size_t dir_len = strlen (dir);
        !           135:        size_t base_len = strlen (base);
        !           136:        int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
        !           137:        file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
        !           138:        if (file_name != NULL)
        !           139:          {
        !           140:            memcpy (file_name, dir, dir_len);
        !           141:            if (add_slash)
        !           142:              file_name[dir_len] = DIRECTORY_SEPARATOR;
        !           143:            memcpy (file_name + dir_len + add_slash, base, base_len + 1);
        !           144:          }
        !           145:       }
        !           146: 
        !           147:       if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL)
        !           148:        /* Out of memory or file not found, treat it as empty.  */
        !           149:        cp = "";
        !           150:       else
        !           151:        {
        !           152:          /* Parse the file's contents.  */
        !           153:          char *res_ptr = NULL;
        !           154:          size_t res_size = 0;
        !           155: 
        !           156:          for (;;)
        !           157:            {
        !           158:              int c;
        !           159:              char buf1[50+1];
        !           160:              char buf2[50+1];
        !           161:              size_t l1, l2;
        !           162:              char *old_res_ptr;
        !           163: 
        !           164:              c = getc (fp);
        !           165:              if (c == EOF)
        !           166:                break;
        !           167:              if (c == '\n' || c == ' ' || c == '\t')
        !           168:                continue;
        !           169:              if (c == '#')
        !           170:                {
        !           171:                  /* Skip comment, to end of line.  */
        !           172:                  do
        !           173:                    c = getc (fp);
        !           174:                  while (!(c == EOF || c == '\n'));
        !           175:                  if (c == EOF)
        !           176:                    break;
        !           177:                  continue;
        !           178:                }
        !           179:              ungetc (c, fp);
        !           180:              if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
        !           181:                break;
        !           182:              l1 = strlen (buf1);
        !           183:              l2 = strlen (buf2);
        !           184:              old_res_ptr = res_ptr;
        !           185:              if (res_size == 0)
        !           186:                {
        !           187:                  res_size = l1 + 1 + l2 + 1;
        !           188:                  res_ptr = (char *) malloc (res_size + 1);
        !           189:                }
        !           190:              else
        !           191:                {
        !           192:                  res_size += l1 + 1 + l2 + 1;
        !           193:                  res_ptr = (char *) realloc (res_ptr, res_size + 1);
        !           194:                }
        !           195:              if (res_ptr == NULL)
        !           196:                {
        !           197:                  /* Out of memory. */
        !           198:                  res_size = 0;
        !           199:                  if (old_res_ptr != NULL)
        !           200:                    free (old_res_ptr);
        !           201:                  break;
        !           202:                }
        !           203:              strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
        !           204:              strcpy (res_ptr + res_size - (l2 + 1), buf2);
        !           205:            }
        !           206:          fclose (fp);
        !           207:          if (res_size == 0)
        !           208:            cp = "";
        !           209:          else
        !           210:            {
        !           211:              *(res_ptr + res_size) = '\0';
        !           212:              cp = res_ptr;
        !           213:            }
        !           214:        }
        !           215: 
        !           216:       if (file_name != NULL)
        !           217:        free (file_name);
        !           218: 
        !           219: #else
        !           220: 
        !           221: # if defined DARWIN7
        !           222:       /* To avoid the trouble of installing a file that is shared by many
        !           223:         GNU packages -- many packaging systems have problems with this --,
        !           224:         simply inline the aliases here.  */
        !           225:       cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
        !           226:           "ISO8859-2" "\0" "ISO-8859-2" "\0"
        !           227:           "ISO8859-4" "\0" "ISO-8859-4" "\0"
        !           228:           "ISO8859-5" "\0" "ISO-8859-5" "\0"
        !           229:           "ISO8859-7" "\0" "ISO-8859-7" "\0"
        !           230:           "ISO8859-9" "\0" "ISO-8859-9" "\0"
        !           231:           "ISO8859-13" "\0" "ISO-8859-13" "\0"
        !           232:           "ISO8859-15" "\0" "ISO-8859-15" "\0"
        !           233:           "KOI8-R" "\0" "KOI8-R" "\0"
        !           234:           "KOI8-U" "\0" "KOI8-U" "\0"
        !           235:           "CP866" "\0" "CP866" "\0"
        !           236:           "CP949" "\0" "CP949" "\0"
        !           237:           "CP1131" "\0" "CP1131" "\0"
        !           238:           "CP1251" "\0" "CP1251" "\0"
        !           239:           "eucCN" "\0" "GB2312" "\0"
        !           240:           "GB2312" "\0" "GB2312" "\0"
        !           241:           "eucJP" "\0" "EUC-JP" "\0"
        !           242:           "eucKR" "\0" "EUC-KR" "\0"
        !           243:           "Big5" "\0" "BIG5" "\0"
        !           244:           "Big5HKSCS" "\0" "BIG5-HKSCS" "\0"
        !           245:           "GBK" "\0" "GBK" "\0"
        !           246:           "GB18030" "\0" "GB18030" "\0"
        !           247:           "SJIS" "\0" "SHIFT_JIS" "\0"
        !           248:           "ARMSCII-8" "\0" "ARMSCII-8" "\0"
        !           249:           "PT154" "\0" "PT154" "\0"
        !           250:         /*"ISCII-DEV" "\0" "?" "\0"*/
        !           251:           "*" "\0" "UTF-8" "\0";
        !           252: # endif
        !           253: 
        !           254: # if defined VMS
        !           255:       /* To avoid the troubles of an extra file charset.alias_vms in the
        !           256:         sources of many GNU packages, simply inline the aliases here.  */
        !           257:       /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
        !           258:         "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
        !           259:         section 10.7 "Handling Different Character Sets".  */
        !           260:       cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
        !           261:           "ISO8859-2" "\0" "ISO-8859-2" "\0"
        !           262:           "ISO8859-5" "\0" "ISO-8859-5" "\0"
        !           263:           "ISO8859-7" "\0" "ISO-8859-7" "\0"
        !           264:           "ISO8859-8" "\0" "ISO-8859-8" "\0"
        !           265:           "ISO8859-9" "\0" "ISO-8859-9" "\0"
        !           266:           /* Japanese */
        !           267:           "eucJP" "\0" "EUC-JP" "\0"
        !           268:           "SJIS" "\0" "SHIFT_JIS" "\0"
        !           269:           "DECKANJI" "\0" "DEC-KANJI" "\0"
        !           270:           "SDECKANJI" "\0" "EUC-JP" "\0"
        !           271:           /* Chinese */
        !           272:           "eucTW" "\0" "EUC-TW" "\0"
        !           273:           "DECHANYU" "\0" "DEC-HANYU" "\0"
        !           274:           "DECHANZI" "\0" "GB2312" "\0"
        !           275:           /* Korean */
        !           276:           "DECKOREAN" "\0" "EUC-KR" "\0";
        !           277: # endif
        !           278: 
        !           279: # if defined WIN32_NATIVE || defined __CYGWIN__
        !           280:       /* To avoid the troubles of installing a separate file in the same
        !           281:         directory as the DLL and of retrieving the DLL's directory at
        !           282:         runtime, simply inline the aliases here.  */
        !           283: 
        !           284:       cp = "CP936" "\0" "GBK" "\0"
        !           285:           "CP1361" "\0" "JOHAB" "\0"
        !           286:           "CP20127" "\0" "ASCII" "\0"
        !           287:           "CP20866" "\0" "KOI8-R" "\0"
        !           288:           "CP20936" "\0" "GB2312" "\0"
        !           289:           "CP21866" "\0" "KOI8-RU" "\0"
        !           290:           "CP28591" "\0" "ISO-8859-1" "\0"
        !           291:           "CP28592" "\0" "ISO-8859-2" "\0"
        !           292:           "CP28593" "\0" "ISO-8859-3" "\0"
        !           293:           "CP28594" "\0" "ISO-8859-4" "\0"
        !           294:           "CP28595" "\0" "ISO-8859-5" "\0"
        !           295:           "CP28596" "\0" "ISO-8859-6" "\0"
        !           296:           "CP28597" "\0" "ISO-8859-7" "\0"
        !           297:           "CP28598" "\0" "ISO-8859-8" "\0"
        !           298:           "CP28599" "\0" "ISO-8859-9" "\0"
        !           299:           "CP28605" "\0" "ISO-8859-15" "\0"
        !           300:           "CP38598" "\0" "ISO-8859-8" "\0"
        !           301:           "CP51932" "\0" "EUC-JP" "\0"
        !           302:           "CP51936" "\0" "GB2312" "\0"
        !           303:           "CP51949" "\0" "EUC-KR" "\0"
        !           304:           "CP51950" "\0" "EUC-TW" "\0"
        !           305:           "CP54936" "\0" "GB18030" "\0"
        !           306:           "CP65001" "\0" "UTF-8" "\0";
        !           307: # endif
        !           308: #endif
        !           309: 
        !           310:       charset_aliases = cp;
        !           311:     }
        !           312: 
        !           313:   return cp;
        !           314: }
        !           315: 
        !           316: /* Determine the current locale's character encoding, and canonicalize it
        !           317:    into one of the canonical names listed in config.charset.
        !           318:    The result must not be freed; it is statically allocated.
        !           319:    If the canonical name cannot be determined, the result is a non-canonical
        !           320:    name.  */
        !           321: 
        !           322: #ifdef STATIC
        !           323: STATIC
        !           324: #endif
        !           325: const char *
        !           326: locale_charset (void)
        !           327: {
        !           328:   const char *codeset;
        !           329:   const char *aliases;
        !           330: 
        !           331: #if !(defined WIN32_NATIVE || defined OS2)
        !           332: 
        !           333: # if HAVE_LANGINFO_CODESET
        !           334: 
        !           335:   /* Most systems support nl_langinfo (CODESET) nowadays.  */
        !           336:   codeset = nl_langinfo (CODESET);
        !           337: 
        !           338: #  ifdef __CYGWIN__
        !           339:   /* Cygwin 2006 does not have locales.  nl_langinfo (CODESET) always
        !           340:      returns "US-ASCII".  As long as this is not fixed, return the suffix
        !           341:      of the locale name from the environment variables (if present) or
        !           342:      the codepage as a number.  */
        !           343:   if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
        !           344:     {
        !           345:       const char *locale;
        !           346:       static char buf[2 + 10 + 1];
        !           347: 
        !           348:       locale = getenv ("LC_ALL");
        !           349:       if (locale == NULL || locale[0] == '\0')
        !           350:        {
        !           351:          locale = getenv ("LC_CTYPE");
        !           352:          if (locale == NULL || locale[0] == '\0')
        !           353:            locale = getenv ("LANG");
        !           354:        }
        !           355:       if (locale != NULL && locale[0] != '\0')
        !           356:        {
        !           357:          /* If the locale name contains an encoding after the dot, return
        !           358:             it.  */
        !           359:          const char *dot = strchr (locale, '.');
        !           360: 
        !           361:          if (dot != NULL)
        !           362:            {
        !           363:              const char *modifier;
        !           364: 
        !           365:              dot++;
        !           366:              /* Look for the possible @... trailer and remove it, if any.  */
        !           367:              modifier = strchr (dot, '@');
        !           368:              if (modifier == NULL)
        !           369:                return dot;
        !           370:              if (modifier - dot < sizeof (buf))
        !           371:                {
        !           372:                  memcpy (buf, dot, modifier - dot);
        !           373:                  buf [modifier - dot] = '\0';
        !           374:                  return buf;
        !           375:                }
        !           376:            }
        !           377:        }
        !           378: 
        !           379:       /* Woe32 has a function returning the locale's codepage as a number.  */
        !           380:       sprintf (buf, "CP%u", GetACP ());
        !           381:       codeset = buf;
        !           382:     }
        !           383: #  endif
        !           384: 
        !           385: # else
        !           386: 
        !           387:   /* On old systems which lack it, use setlocale or getenv.  */
        !           388:   const char *locale = NULL;
        !           389: 
        !           390:   /* But most old systems don't have a complete set of locales.  Some
        !           391:      (like SunOS 4 or DJGPP) have only the C locale.  Therefore we don't
        !           392:      use setlocale here; it would return "C" when it doesn't support the
        !           393:      locale name the user has set.  */
        !           394: #  if 0
        !           395:   locale = setlocale (LC_CTYPE, NULL);
        !           396: #  endif
        !           397:   if (locale == NULL || locale[0] == '\0')
        !           398:     {
        !           399:       locale = getenv ("LC_ALL");
        !           400:       if (locale == NULL || locale[0] == '\0')
        !           401:        {
        !           402:          locale = getenv ("LC_CTYPE");
        !           403:          if (locale == NULL || locale[0] == '\0')
        !           404:            locale = getenv ("LANG");
        !           405:        }
        !           406:     }
        !           407: 
        !           408:   /* On some old systems, one used to set locale = "iso8859_1". On others,
        !           409:      you set it to "language_COUNTRY.charset". In any case, we resolve it
        !           410:      through the charset.alias file.  */
        !           411:   codeset = locale;
        !           412: 
        !           413: # endif
        !           414: 
        !           415: #elif defined WIN32_NATIVE
        !           416: 
        !           417:   static char buf[2 + 10 + 1];
        !           418: 
        !           419:   /* Woe32 has a function returning the locale's codepage as a number.  */
        !           420:   sprintf (buf, "CP%u", GetACP ());
        !           421:   codeset = buf;
        !           422: 
        !           423: #elif defined OS2
        !           424: 
        !           425:   const char *locale;
        !           426:   static char buf[2 + 10 + 1];
        !           427:   ULONG cp[3];
        !           428:   ULONG cplen;
        !           429: 
        !           430:   /* Allow user to override the codeset, as set in the operating system,
        !           431:      with standard language environment variables.  */
        !           432:   locale = getenv ("LC_ALL");
        !           433:   if (locale == NULL || locale[0] == '\0')
        !           434:     {
        !           435:       locale = getenv ("LC_CTYPE");
        !           436:       if (locale == NULL || locale[0] == '\0')
        !           437:        locale = getenv ("LANG");
        !           438:     }
        !           439:   if (locale != NULL && locale[0] != '\0')
        !           440:     {
        !           441:       /* If the locale name contains an encoding after the dot, return it.  */
        !           442:       const char *dot = strchr (locale, '.');
        !           443: 
        !           444:       if (dot != NULL)
        !           445:        {
        !           446:          const char *modifier;
        !           447: 
        !           448:          dot++;
        !           449:          /* Look for the possible @... trailer and remove it, if any.  */
        !           450:          modifier = strchr (dot, '@');
        !           451:          if (modifier == NULL)
        !           452:            return dot;
        !           453:          if (modifier - dot < sizeof (buf))
        !           454:            {
        !           455:              memcpy (buf, dot, modifier - dot);
        !           456:              buf [modifier - dot] = '\0';
        !           457:              return buf;
        !           458:            }
        !           459:        }
        !           460: 
        !           461:       /* Resolve through the charset.alias file.  */
        !           462:       codeset = locale;
        !           463:     }
        !           464:   else
        !           465:     {
        !           466:       /* OS/2 has a function returning the locale's codepage as a number.  */
        !           467:       if (DosQueryCp (sizeof (cp), cp, &cplen))
        !           468:        codeset = "";
        !           469:       else
        !           470:        {
        !           471:          sprintf (buf, "CP%u", cp[0]);
        !           472:          codeset = buf;
        !           473:        }
        !           474:     }
        !           475: 
        !           476: #endif
        !           477: 
        !           478:   if (codeset == NULL)
        !           479:     /* The canonical name cannot be determined.  */
        !           480:     codeset = "";
        !           481: 
        !           482:   /* Resolve alias. */
        !           483:   for (aliases = get_charset_aliases ();
        !           484:        *aliases != '\0';
        !           485:        aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
        !           486:     if (strcmp (codeset, aliases) == 0
        !           487:        || (aliases[0] == '*' && aliases[1] == '\0'))
        !           488:       {
        !           489:        codeset = aliases + strlen (aliases) + 1;
        !           490:        break;
        !           491:       }
        !           492: 
        !           493:   /* Don't return an empty string.  GNU libc and GNU libiconv interpret
        !           494:      the empty string as denoting "the locale's character encoding",
        !           495:      thus GNU libiconv would call this function a second time.  */
        !           496:   if (codeset[0] == '\0')
        !           497:     codeset = "ASCII";
        !           498: 
        !           499:   return codeset;
        !           500: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>