Annotation of embedaddon/readline/mbutil.c, revision 1.1.1.1

1.1       misho       1: /* mbutil.c -- readline multibyte character utility functions */
                      2: 
                      3: /* Copyright (C) 2001-2009 Free Software Foundation, Inc.
                      4: 
                      5:    This file is part of the GNU Readline Library (Readline), a library
                      6:    for reading lines of text with interactive input and history editing.      
                      7: 
                      8:    Readline is free software: you can redistribute it and/or modify
                      9:    it under the terms of the GNU General Public License as published by
                     10:    the Free Software Foundation, either version 3 of the License, or
                     11:    (at your option) any later version.
                     12: 
                     13:    Readline is distributed in the hope that it will be useful,
                     14:    but WITHOUT ANY WARRANTY; without even the implied warranty of
                     15:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     16:    GNU General Public License for more details.
                     17: 
                     18:    You should have received a copy of the GNU General Public License
                     19:    along with Readline.  If not, see <http://www.gnu.org/licenses/>.
                     20: */
                     21: 
                     22: #define READLINE_LIBRARY
                     23: 
                     24: #if defined (HAVE_CONFIG_H)
                     25: #  include <config.h>
                     26: #endif
                     27: 
                     28: #include <sys/types.h>
                     29: #include <fcntl.h>
                     30: #include "posixjmp.h"
                     31: 
                     32: #if defined (HAVE_UNISTD_H)
                     33: #  include <unistd.h>     /* for _POSIX_VERSION */
                     34: #endif /* HAVE_UNISTD_H */
                     35: 
                     36: #if defined (HAVE_STDLIB_H)
                     37: #  include <stdlib.h>
                     38: #else
                     39: #  include "ansi_stdlib.h"
                     40: #endif /* HAVE_STDLIB_H */
                     41: 
                     42: #include <stdio.h>
                     43: #include <ctype.h>
                     44: 
                     45: /* System-specific feature definitions and include files. */
                     46: #include "rldefs.h"
                     47: #include "rlmbutil.h"
                     48: 
                     49: #if defined (TIOCSTAT_IN_SYS_IOCTL)
                     50: #  include <sys/ioctl.h>
                     51: #endif /* TIOCSTAT_IN_SYS_IOCTL */
                     52: 
                     53: /* Some standard library routines. */
                     54: #include "readline.h"
                     55: 
                     56: #include "rlprivate.h"
                     57: #include "xmalloc.h"
                     58: 
                     59: /* Declared here so it can be shared between the readline and history
                     60:    libraries. */
                     61: #if defined (HANDLE_MULTIBYTE)
                     62: int rl_byte_oriented = 0;
                     63: #else
                     64: int rl_byte_oriented = 1;
                     65: #endif
                     66: 
                     67: /* Ditto */
                     68: int _rl_utf8locale = 0;
                     69: 
                     70: /* **************************************************************** */
                     71: /*                                                                 */
                     72: /*             Multibyte Character Utility Functions               */
                     73: /*                                                                 */
                     74: /* **************************************************************** */
                     75: 
                     76: #if defined(HANDLE_MULTIBYTE)
                     77: 
                     78: static int
                     79: _rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
                     80:      char *string;
                     81:      int seed, count, find_non_zero;
                     82: {
                     83:   size_t tmp, len;
                     84:   mbstate_t ps;
                     85:   int point;
                     86:   wchar_t wc;
                     87: 
                     88:   tmp = 0;
                     89: 
                     90:   memset(&ps, 0, sizeof (mbstate_t));
                     91:   if (seed < 0)
                     92:     seed = 0;
                     93:   if (count <= 0)
                     94:     return seed;
                     95: 
                     96:   point = seed + _rl_adjust_point (string, seed, &ps);
                     97:   /* if this is true, means that seed was not pointing to a byte indicating
                     98:      the beginning of a multibyte character.  Correct the point and consume
                     99:      one char. */
                    100:   if (seed < point)
                    101:     count--;
                    102: 
                    103:   while (count > 0)  
                    104:     {
                    105:       len = strlen (string + point);
                    106:       if (len == 0)
                    107:        break;
                    108:       tmp = mbrtowc (&wc, string+point, len, &ps);
                    109:       if (MB_INVALIDCH ((size_t)tmp))
                    110:        {
                    111:          /* invalid bytes. assume a byte represents a character */
                    112:          point++;
                    113:          count--;
                    114:          /* reset states. */
                    115:          memset(&ps, 0, sizeof(mbstate_t));
                    116:        }
                    117:       else if (MB_NULLWCH (tmp))
                    118:        break;                  /* found wide '\0' */
                    119:       else
                    120:        {
                    121:          /* valid bytes */
                    122:          point += tmp;
                    123:          if (find_non_zero)
                    124:            {
                    125:              if (WCWIDTH (wc) == 0)
                    126:                continue;
                    127:              else
                    128:                count--;
                    129:            }
                    130:          else
                    131:            count--;
                    132:        }
                    133:     }
                    134: 
                    135:   if (find_non_zero)
                    136:     {
                    137:       tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
                    138:       while (MB_NULLWCH (tmp) == 0 && MB_INVALIDCH (tmp) == 0 && WCWIDTH (wc) == 0)
                    139:        {
                    140:          point += tmp;
                    141:          tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
                    142:        }
                    143:     }
                    144: 
                    145:   return point;
                    146: }
                    147: 
                    148: static int
                    149: _rl_find_prev_mbchar_internal (string, seed, find_non_zero)
                    150:      char *string;
                    151:      int seed, find_non_zero;
                    152: {
                    153:   mbstate_t ps;
                    154:   int prev, non_zero_prev, point, length;
                    155:   size_t tmp;
                    156:   wchar_t wc;
                    157: 
                    158:   memset(&ps, 0, sizeof(mbstate_t));
                    159:   length = strlen(string);
                    160:   
                    161:   if (seed < 0)
                    162:     return 0;
                    163:   else if (length < seed)
                    164:     return length;
                    165: 
                    166:   prev = non_zero_prev = point = 0;
                    167:   while (point < seed)
                    168:     {
                    169:       tmp = mbrtowc (&wc, string + point, length - point, &ps);
                    170:       if (MB_INVALIDCH ((size_t)tmp))
                    171:        {
                    172:          /* in this case, bytes are invalid or shorted to compose
                    173:             multibyte char, so assume that the first byte represents
                    174:             a single character anyway. */
                    175:          tmp = 1;
                    176:          /* clear the state of the byte sequence, because
                    177:             in this case effect of mbstate is undefined  */
                    178:          memset(&ps, 0, sizeof (mbstate_t));
                    179: 
                    180:          /* Since we're assuming that this byte represents a single
                    181:             non-zero-width character, don't forget about it. */
                    182:          prev = point;
                    183:        }
                    184:       else if (MB_NULLWCH (tmp))
                    185:        break;                  /* Found '\0' char.  Can this happen? */
                    186:       else
                    187:        {
                    188:          if (find_non_zero)
                    189:            {
                    190:              if (WCWIDTH (wc) != 0)
                    191:                prev = point;
                    192:            }
                    193:          else
                    194:            prev = point;  
                    195:        }
                    196: 
                    197:       point += tmp;
                    198:     }
                    199: 
                    200:   return prev;
                    201: }
                    202: 
                    203: /* return the number of bytes parsed from the multibyte sequence starting
                    204:    at src, if a non-L'\0' wide character was recognized. It returns 0, 
                    205:    if a L'\0' wide character was recognized. It  returns (size_t)(-1), 
                    206:    if an invalid multibyte sequence was encountered. It returns (size_t)(-2) 
                    207:    if it couldn't parse a complete  multibyte character.  */
                    208: int
                    209: _rl_get_char_len (src, ps)
                    210:      char *src;
                    211:      mbstate_t *ps;
                    212: {
                    213:   size_t tmp;
                    214: 
                    215:   tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
                    216:   if (tmp == (size_t)(-2))
                    217:     {
                    218:       /* shorted to compose multibyte char */
                    219:       if (ps)
                    220:        memset (ps, 0, sizeof(mbstate_t));
                    221:       return -2;
                    222:     }
                    223:   else if (tmp == (size_t)(-1))
                    224:     {
                    225:       /* invalid to compose multibyte char */
                    226:       /* initialize the conversion state */
                    227:       if (ps)
                    228:        memset (ps, 0, sizeof(mbstate_t));
                    229:       return -1;
                    230:     }
                    231:   else if (tmp == (size_t)0)
                    232:     return 0;
                    233:   else
                    234:     return (int)tmp;
                    235: }
                    236: 
                    237: /* compare the specified two characters. If the characters matched,
                    238:    return 1. Otherwise return 0. */
                    239: int
                    240: _rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
                    241:      char *buf1;
                    242:      int pos1;
                    243:      mbstate_t *ps1;
                    244:      char *buf2;
                    245:      int pos2;
                    246:      mbstate_t *ps2;
                    247: {
                    248:   int i, w1, w2;
                    249: 
                    250:   if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 || 
                    251:        (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
                    252:        (w1 != w2) ||
                    253:        (buf1[pos1] != buf2[pos2]))
                    254:     return 0;
                    255: 
                    256:   for (i = 1; i < w1; i++)
                    257:     if (buf1[pos1+i] != buf2[pos2+i])
                    258:       return 0;
                    259: 
                    260:   return 1;
                    261: }
                    262: 
                    263: /* adjust pointed byte and find mbstate of the point of string.
                    264:    adjusted point will be point <= adjusted_point, and returns
                    265:    differences of the byte(adjusted_point - point).
                    266:    if point is invalied (point < 0 || more than string length),
                    267:    it returns -1 */
                    268: int
                    269: _rl_adjust_point (string, point, ps)
                    270:      char *string;
                    271:      int point;
                    272:      mbstate_t *ps;
                    273: {
                    274:   size_t tmp = 0;
                    275:   int length;
                    276:   int pos = 0;
                    277: 
                    278:   length = strlen(string);
                    279:   if (point < 0)
                    280:     return -1;
                    281:   if (length < point)
                    282:     return -1;
                    283:   
                    284:   while (pos < point)
                    285:     {
                    286:       tmp = mbrlen (string + pos, length - pos, ps);
                    287:       if (MB_INVALIDCH ((size_t)tmp))
                    288:        {
                    289:          /* in this case, bytes are invalid or shorted to compose
                    290:             multibyte char, so assume that the first byte represents
                    291:             a single character anyway. */
                    292:          pos++;
                    293:          /* clear the state of the byte sequence, because
                    294:             in this case effect of mbstate is undefined  */
                    295:          if (ps)
                    296:            memset (ps, 0, sizeof (mbstate_t));
                    297:        }
                    298:       else if (MB_NULLWCH (tmp))
                    299:        pos++;
                    300:       else
                    301:        pos += tmp;
                    302:     }
                    303: 
                    304:   return (pos - point);
                    305: }
                    306: 
                    307: int
                    308: _rl_is_mbchar_matched (string, seed, end, mbchar, length)
                    309:      char *string;
                    310:      int seed, end;
                    311:      char *mbchar;
                    312:      int length;
                    313: {
                    314:   int i;
                    315: 
                    316:   if ((end - seed) < length)
                    317:     return 0;
                    318: 
                    319:   for (i = 0; i < length; i++)
                    320:     if (string[seed + i] != mbchar[i])
                    321:       return 0;
                    322:   return 1;
                    323: }
                    324: 
                    325: wchar_t
                    326: _rl_char_value (buf, ind)
                    327:      char *buf;
                    328:      int ind;
                    329: {
                    330:   size_t tmp;
                    331:   wchar_t wc;
                    332:   mbstate_t ps;
                    333:   int l;
                    334: 
                    335:   if (MB_LEN_MAX == 1 || rl_byte_oriented)
                    336:     return ((wchar_t) buf[ind]);
                    337:   l = strlen (buf);
                    338:   if (ind >= l - 1)
                    339:     return ((wchar_t) buf[ind]);
                    340:   memset (&ps, 0, sizeof (mbstate_t));
                    341:   tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
                    342:   if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))  
                    343:     return ((wchar_t) buf[ind]);
                    344:   return wc;
                    345: }
                    346: #endif /* HANDLE_MULTIBYTE */
                    347: 
                    348: /* Find next `count' characters started byte point of the specified seed.
                    349:    If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
                    350:    characters. */
                    351: #undef _rl_find_next_mbchar
                    352: int
                    353: _rl_find_next_mbchar (string, seed, count, flags)
                    354:      char *string;
                    355:      int seed, count, flags;
                    356: {
                    357: #if defined (HANDLE_MULTIBYTE)
                    358:   return _rl_find_next_mbchar_internal (string, seed, count, flags);
                    359: #else
                    360:   return (seed + count);
                    361: #endif
                    362: }
                    363: 
                    364: /* Find previous character started byte point of the specified seed.
                    365:    Returned point will be point <= seed.  If flags is MB_FIND_NONZERO,
                    366:    we look for non-zero-width multibyte characters. */
                    367: #undef _rl_find_prev_mbchar
                    368: int
                    369: _rl_find_prev_mbchar (string, seed, flags)
                    370:      char *string;
                    371:      int seed, flags;
                    372: {
                    373: #if defined (HANDLE_MULTIBYTE)
                    374:   return _rl_find_prev_mbchar_internal (string, seed, flags);
                    375: #else
                    376:   return ((seed == 0) ? seed : seed - 1);
                    377: #endif
                    378: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>