Annotation of embedaddon/readline/mbutil.c, revision 1.1

1.1     ! misho       1: /* mbutil.c -- readline multibyte character utility functions */
        !             2: 
        !             3: /* Copyright (C) 2001-2009 Free Software Foundation, Inc.
        !             4: 
        !             5:    This file is part of the GNU Readline Library (Readline), a library
        !             6:    for reading lines of text with interactive input and history editing.      
        !             7: 
        !             8:    Readline is free software: you can redistribute it and/or modify
        !             9:    it under the terms of the GNU General Public License as published by
        !            10:    the Free Software Foundation, either version 3 of the License, or
        !            11:    (at your option) any later version.
        !            12: 
        !            13:    Readline is distributed in the hope that it will be useful,
        !            14:    but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            15:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        !            16:    GNU General Public License for more details.
        !            17: 
        !            18:    You should have received a copy of the GNU General Public License
        !            19:    along with Readline.  If not, see <http://www.gnu.org/licenses/>.
        !            20: */
        !            21: 
        !            22: #define READLINE_LIBRARY
        !            23: 
        !            24: #if defined (HAVE_CONFIG_H)
        !            25: #  include <config.h>
        !            26: #endif
        !            27: 
        !            28: #include <sys/types.h>
        !            29: #include <fcntl.h>
        !            30: #include "posixjmp.h"
        !            31: 
        !            32: #if defined (HAVE_UNISTD_H)
        !            33: #  include <unistd.h>     /* for _POSIX_VERSION */
        !            34: #endif /* HAVE_UNISTD_H */
        !            35: 
        !            36: #if defined (HAVE_STDLIB_H)
        !            37: #  include <stdlib.h>
        !            38: #else
        !            39: #  include "ansi_stdlib.h"
        !            40: #endif /* HAVE_STDLIB_H */
        !            41: 
        !            42: #include <stdio.h>
        !            43: #include <ctype.h>
        !            44: 
        !            45: /* System-specific feature definitions and include files. */
        !            46: #include "rldefs.h"
        !            47: #include "rlmbutil.h"
        !            48: 
        !            49: #if defined (TIOCSTAT_IN_SYS_IOCTL)
        !            50: #  include <sys/ioctl.h>
        !            51: #endif /* TIOCSTAT_IN_SYS_IOCTL */
        !            52: 
        !            53: /* Some standard library routines. */
        !            54: #include "readline.h"
        !            55: 
        !            56: #include "rlprivate.h"
        !            57: #include "xmalloc.h"
        !            58: 
        !            59: /* Declared here so it can be shared between the readline and history
        !            60:    libraries. */
        !            61: #if defined (HANDLE_MULTIBYTE)
        !            62: int rl_byte_oriented = 0;
        !            63: #else
        !            64: int rl_byte_oriented = 1;
        !            65: #endif
        !            66: 
        !            67: /* Ditto */
        !            68: int _rl_utf8locale = 0;
        !            69: 
        !            70: /* **************************************************************** */
        !            71: /*                                                                 */
        !            72: /*             Multibyte Character Utility Functions               */
        !            73: /*                                                                 */
        !            74: /* **************************************************************** */
        !            75: 
        !            76: #if defined(HANDLE_MULTIBYTE)
        !            77: 
        !            78: static int
        !            79: _rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
        !            80:      char *string;
        !            81:      int seed, count, find_non_zero;
        !            82: {
        !            83:   size_t tmp, len;
        !            84:   mbstate_t ps;
        !            85:   int point;
        !            86:   wchar_t wc;
        !            87: 
        !            88:   tmp = 0;
        !            89: 
        !            90:   memset(&ps, 0, sizeof (mbstate_t));
        !            91:   if (seed < 0)
        !            92:     seed = 0;
        !            93:   if (count <= 0)
        !            94:     return seed;
        !            95: 
        !            96:   point = seed + _rl_adjust_point (string, seed, &ps);
        !            97:   /* if this is true, means that seed was not pointing to a byte indicating
        !            98:      the beginning of a multibyte character.  Correct the point and consume
        !            99:      one char. */
        !           100:   if (seed < point)
        !           101:     count--;
        !           102: 
        !           103:   while (count > 0)  
        !           104:     {
        !           105:       len = strlen (string + point);
        !           106:       if (len == 0)
        !           107:        break;
        !           108:       tmp = mbrtowc (&wc, string+point, len, &ps);
        !           109:       if (MB_INVALIDCH ((size_t)tmp))
        !           110:        {
        !           111:          /* invalid bytes. assume a byte represents a character */
        !           112:          point++;
        !           113:          count--;
        !           114:          /* reset states. */
        !           115:          memset(&ps, 0, sizeof(mbstate_t));
        !           116:        }
        !           117:       else if (MB_NULLWCH (tmp))
        !           118:        break;                  /* found wide '\0' */
        !           119:       else
        !           120:        {
        !           121:          /* valid bytes */
        !           122:          point += tmp;
        !           123:          if (find_non_zero)
        !           124:            {
        !           125:              if (WCWIDTH (wc) == 0)
        !           126:                continue;
        !           127:              else
        !           128:                count--;
        !           129:            }
        !           130:          else
        !           131:            count--;
        !           132:        }
        !           133:     }
        !           134: 
        !           135:   if (find_non_zero)
        !           136:     {
        !           137:       tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
        !           138:       while (MB_NULLWCH (tmp) == 0 && MB_INVALIDCH (tmp) == 0 && WCWIDTH (wc) == 0)
        !           139:        {
        !           140:          point += tmp;
        !           141:          tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
        !           142:        }
        !           143:     }
        !           144: 
        !           145:   return point;
        !           146: }
        !           147: 
        !           148: static int
        !           149: _rl_find_prev_mbchar_internal (string, seed, find_non_zero)
        !           150:      char *string;
        !           151:      int seed, find_non_zero;
        !           152: {
        !           153:   mbstate_t ps;
        !           154:   int prev, non_zero_prev, point, length;
        !           155:   size_t tmp;
        !           156:   wchar_t wc;
        !           157: 
        !           158:   memset(&ps, 0, sizeof(mbstate_t));
        !           159:   length = strlen(string);
        !           160:   
        !           161:   if (seed < 0)
        !           162:     return 0;
        !           163:   else if (length < seed)
        !           164:     return length;
        !           165: 
        !           166:   prev = non_zero_prev = point = 0;
        !           167:   while (point < seed)
        !           168:     {
        !           169:       tmp = mbrtowc (&wc, string + point, length - point, &ps);
        !           170:       if (MB_INVALIDCH ((size_t)tmp))
        !           171:        {
        !           172:          /* in this case, bytes are invalid or shorted to compose
        !           173:             multibyte char, so assume that the first byte represents
        !           174:             a single character anyway. */
        !           175:          tmp = 1;
        !           176:          /* clear the state of the byte sequence, because
        !           177:             in this case effect of mbstate is undefined  */
        !           178:          memset(&ps, 0, sizeof (mbstate_t));
        !           179: 
        !           180:          /* Since we're assuming that this byte represents a single
        !           181:             non-zero-width character, don't forget about it. */
        !           182:          prev = point;
        !           183:        }
        !           184:       else if (MB_NULLWCH (tmp))
        !           185:        break;                  /* Found '\0' char.  Can this happen? */
        !           186:       else
        !           187:        {
        !           188:          if (find_non_zero)
        !           189:            {
        !           190:              if (WCWIDTH (wc) != 0)
        !           191:                prev = point;
        !           192:            }
        !           193:          else
        !           194:            prev = point;  
        !           195:        }
        !           196: 
        !           197:       point += tmp;
        !           198:     }
        !           199: 
        !           200:   return prev;
        !           201: }
        !           202: 
        !           203: /* return the number of bytes parsed from the multibyte sequence starting
        !           204:    at src, if a non-L'\0' wide character was recognized. It returns 0, 
        !           205:    if a L'\0' wide character was recognized. It  returns (size_t)(-1), 
        !           206:    if an invalid multibyte sequence was encountered. It returns (size_t)(-2) 
        !           207:    if it couldn't parse a complete  multibyte character.  */
        !           208: int
        !           209: _rl_get_char_len (src, ps)
        !           210:      char *src;
        !           211:      mbstate_t *ps;
        !           212: {
        !           213:   size_t tmp;
        !           214: 
        !           215:   tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
        !           216:   if (tmp == (size_t)(-2))
        !           217:     {
        !           218:       /* shorted to compose multibyte char */
        !           219:       if (ps)
        !           220:        memset (ps, 0, sizeof(mbstate_t));
        !           221:       return -2;
        !           222:     }
        !           223:   else if (tmp == (size_t)(-1))
        !           224:     {
        !           225:       /* invalid to compose multibyte char */
        !           226:       /* initialize the conversion state */
        !           227:       if (ps)
        !           228:        memset (ps, 0, sizeof(mbstate_t));
        !           229:       return -1;
        !           230:     }
        !           231:   else if (tmp == (size_t)0)
        !           232:     return 0;
        !           233:   else
        !           234:     return (int)tmp;
        !           235: }
        !           236: 
        !           237: /* compare the specified two characters. If the characters matched,
        !           238:    return 1. Otherwise return 0. */
        !           239: int
        !           240: _rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
        !           241:      char *buf1;
        !           242:      int pos1;
        !           243:      mbstate_t *ps1;
        !           244:      char *buf2;
        !           245:      int pos2;
        !           246:      mbstate_t *ps2;
        !           247: {
        !           248:   int i, w1, w2;
        !           249: 
        !           250:   if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 || 
        !           251:        (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
        !           252:        (w1 != w2) ||
        !           253:        (buf1[pos1] != buf2[pos2]))
        !           254:     return 0;
        !           255: 
        !           256:   for (i = 1; i < w1; i++)
        !           257:     if (buf1[pos1+i] != buf2[pos2+i])
        !           258:       return 0;
        !           259: 
        !           260:   return 1;
        !           261: }
        !           262: 
        !           263: /* adjust pointed byte and find mbstate of the point of string.
        !           264:    adjusted point will be point <= adjusted_point, and returns
        !           265:    differences of the byte(adjusted_point - point).
        !           266:    if point is invalied (point < 0 || more than string length),
        !           267:    it returns -1 */
        !           268: int
        !           269: _rl_adjust_point (string, point, ps)
        !           270:      char *string;
        !           271:      int point;
        !           272:      mbstate_t *ps;
        !           273: {
        !           274:   size_t tmp = 0;
        !           275:   int length;
        !           276:   int pos = 0;
        !           277: 
        !           278:   length = strlen(string);
        !           279:   if (point < 0)
        !           280:     return -1;
        !           281:   if (length < point)
        !           282:     return -1;
        !           283:   
        !           284:   while (pos < point)
        !           285:     {
        !           286:       tmp = mbrlen (string + pos, length - pos, ps);
        !           287:       if (MB_INVALIDCH ((size_t)tmp))
        !           288:        {
        !           289:          /* in this case, bytes are invalid or shorted to compose
        !           290:             multibyte char, so assume that the first byte represents
        !           291:             a single character anyway. */
        !           292:          pos++;
        !           293:          /* clear the state of the byte sequence, because
        !           294:             in this case effect of mbstate is undefined  */
        !           295:          if (ps)
        !           296:            memset (ps, 0, sizeof (mbstate_t));
        !           297:        }
        !           298:       else if (MB_NULLWCH (tmp))
        !           299:        pos++;
        !           300:       else
        !           301:        pos += tmp;
        !           302:     }
        !           303: 
        !           304:   return (pos - point);
        !           305: }
        !           306: 
        !           307: int
        !           308: _rl_is_mbchar_matched (string, seed, end, mbchar, length)
        !           309:      char *string;
        !           310:      int seed, end;
        !           311:      char *mbchar;
        !           312:      int length;
        !           313: {
        !           314:   int i;
        !           315: 
        !           316:   if ((end - seed) < length)
        !           317:     return 0;
        !           318: 
        !           319:   for (i = 0; i < length; i++)
        !           320:     if (string[seed + i] != mbchar[i])
        !           321:       return 0;
        !           322:   return 1;
        !           323: }
        !           324: 
        !           325: wchar_t
        !           326: _rl_char_value (buf, ind)
        !           327:      char *buf;
        !           328:      int ind;
        !           329: {
        !           330:   size_t tmp;
        !           331:   wchar_t wc;
        !           332:   mbstate_t ps;
        !           333:   int l;
        !           334: 
        !           335:   if (MB_LEN_MAX == 1 || rl_byte_oriented)
        !           336:     return ((wchar_t) buf[ind]);
        !           337:   l = strlen (buf);
        !           338:   if (ind >= l - 1)
        !           339:     return ((wchar_t) buf[ind]);
        !           340:   memset (&ps, 0, sizeof (mbstate_t));
        !           341:   tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
        !           342:   if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))  
        !           343:     return ((wchar_t) buf[ind]);
        !           344:   return wc;
        !           345: }
        !           346: #endif /* HANDLE_MULTIBYTE */
        !           347: 
        !           348: /* Find next `count' characters started byte point of the specified seed.
        !           349:    If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
        !           350:    characters. */
        !           351: #undef _rl_find_next_mbchar
        !           352: int
        !           353: _rl_find_next_mbchar (string, seed, count, flags)
        !           354:      char *string;
        !           355:      int seed, count, flags;
        !           356: {
        !           357: #if defined (HANDLE_MULTIBYTE)
        !           358:   return _rl_find_next_mbchar_internal (string, seed, count, flags);
        !           359: #else
        !           360:   return (seed + count);
        !           361: #endif
        !           362: }
        !           363: 
        !           364: /* Find previous character started byte point of the specified seed.
        !           365:    Returned point will be point <= seed.  If flags is MB_FIND_NONZERO,
        !           366:    we look for non-zero-width multibyte characters. */
        !           367: #undef _rl_find_prev_mbchar
        !           368: int
        !           369: _rl_find_prev_mbchar (string, seed, flags)
        !           370:      char *string;
        !           371:      int seed, flags;
        !           372: {
        !           373: #if defined (HANDLE_MULTIBYTE)
        !           374:   return _rl_find_prev_mbchar_internal (string, seed, flags);
        !           375: #else
        !           376:   return ((seed == 0) ? seed : seed - 1);
        !           377: #endif
        !           378: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>