File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / readline / mbutil.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Jul 30 08:16:45 2014 UTC (9 years, 11 months ago) by misho
Branches: readline, MAIN
CVS tags: v6_3p10_cross, v6_3p10, v6_3, p6, HEAD
readline 6.3

    1: /* mbutil.c -- readline multibyte character utility functions */
    2: 
    3: /* Copyright (C) 2001-2009 Free Software Foundation, Inc.
    4: 
    5:    This file is part of the GNU Readline Library (Readline), a library
    6:    for reading lines of text with interactive input and history editing.      
    7: 
    8:    Readline is free software: you can redistribute it and/or modify
    9:    it under the terms of the GNU General Public License as published by
   10:    the Free Software Foundation, either version 3 of the License, or
   11:    (at your option) any later version.
   12: 
   13:    Readline is distributed in the hope that it will be useful,
   14:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   15:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   16:    GNU General Public License for more details.
   17: 
   18:    You should have received a copy of the GNU General Public License
   19:    along with Readline.  If not, see <http://www.gnu.org/licenses/>.
   20: */
   21: 
   22: #define READLINE_LIBRARY
   23: 
   24: #if defined (HAVE_CONFIG_H)
   25: #  include <config.h>
   26: #endif
   27: 
   28: #include <sys/types.h>
   29: #include <fcntl.h>
   30: #include "posixjmp.h"
   31: 
   32: #if defined (HAVE_UNISTD_H)
   33: #  include <unistd.h>	   /* for _POSIX_VERSION */
   34: #endif /* HAVE_UNISTD_H */
   35: 
   36: #if defined (HAVE_STDLIB_H)
   37: #  include <stdlib.h>
   38: #else
   39: #  include "ansi_stdlib.h"
   40: #endif /* HAVE_STDLIB_H */
   41: 
   42: #include <stdio.h>
   43: #include <ctype.h>
   44: 
   45: /* System-specific feature definitions and include files. */
   46: #include "rldefs.h"
   47: #include "rlmbutil.h"
   48: 
   49: #if defined (TIOCSTAT_IN_SYS_IOCTL)
   50: #  include <sys/ioctl.h>
   51: #endif /* TIOCSTAT_IN_SYS_IOCTL */
   52: 
   53: /* Some standard library routines. */
   54: #include "readline.h"
   55: 
   56: #include "rlprivate.h"
   57: #include "xmalloc.h"
   58: 
   59: /* Declared here so it can be shared between the readline and history
   60:    libraries. */
   61: #if defined (HANDLE_MULTIBYTE)
   62: int rl_byte_oriented = 0;
   63: #else
   64: int rl_byte_oriented = 1;
   65: #endif
   66: 
   67: /* Ditto */
   68: int _rl_utf8locale = 0;
   69: 
   70: /* **************************************************************** */
   71: /*								    */
   72: /*		Multibyte Character Utility Functions		    */
   73: /*								    */
   74: /* **************************************************************** */
   75: 
   76: #if defined(HANDLE_MULTIBYTE)
   77: 
   78: static int
   79: _rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
   80:      char *string;
   81:      int seed, count, find_non_zero;
   82: {
   83:   size_t tmp, len;
   84:   mbstate_t ps;
   85:   int point;
   86:   wchar_t wc;
   87: 
   88:   tmp = 0;
   89: 
   90:   memset(&ps, 0, sizeof (mbstate_t));
   91:   if (seed < 0)
   92:     seed = 0;
   93:   if (count <= 0)
   94:     return seed;
   95: 
   96:   point = seed + _rl_adjust_point (string, seed, &ps);
   97:   /* if this is true, means that seed was not pointing to a byte indicating
   98:      the beginning of a multibyte character.  Correct the point and consume
   99:      one char. */
  100:   if (seed < point)
  101:     count--;
  102: 
  103:   while (count > 0)  
  104:     {
  105:       len = strlen (string + point);
  106:       if (len == 0)
  107: 	break;
  108:       tmp = mbrtowc (&wc, string+point, len, &ps);
  109:       if (MB_INVALIDCH ((size_t)tmp))
  110: 	{
  111: 	  /* invalid bytes. assume a byte represents a character */
  112: 	  point++;
  113: 	  count--;
  114: 	  /* reset states. */
  115: 	  memset(&ps, 0, sizeof(mbstate_t));
  116: 	}
  117:       else if (MB_NULLWCH (tmp))
  118: 	break;			/* found wide '\0' */
  119:       else
  120: 	{
  121: 	  /* valid bytes */
  122: 	  point += tmp;
  123: 	  if (find_non_zero)
  124: 	    {
  125: 	      if (WCWIDTH (wc) == 0)
  126: 		continue;
  127: 	      else
  128: 		count--;
  129: 	    }
  130: 	  else
  131: 	    count--;
  132: 	}
  133:     }
  134: 
  135:   if (find_non_zero)
  136:     {
  137:       tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
  138:       while (MB_NULLWCH (tmp) == 0 && MB_INVALIDCH (tmp) == 0 && WCWIDTH (wc) == 0)
  139: 	{
  140: 	  point += tmp;
  141: 	  tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
  142: 	}
  143:     }
  144: 
  145:   return point;
  146: }
  147: 
  148: static int
  149: _rl_find_prev_mbchar_internal (string, seed, find_non_zero)
  150:      char *string;
  151:      int seed, find_non_zero;
  152: {
  153:   mbstate_t ps;
  154:   int prev, non_zero_prev, point, length;
  155:   size_t tmp;
  156:   wchar_t wc;
  157: 
  158:   memset(&ps, 0, sizeof(mbstate_t));
  159:   length = strlen(string);
  160:   
  161:   if (seed < 0)
  162:     return 0;
  163:   else if (length < seed)
  164:     return length;
  165: 
  166:   prev = non_zero_prev = point = 0;
  167:   while (point < seed)
  168:     {
  169:       tmp = mbrtowc (&wc, string + point, length - point, &ps);
  170:       if (MB_INVALIDCH ((size_t)tmp))
  171: 	{
  172: 	  /* in this case, bytes are invalid or shorted to compose
  173: 	     multibyte char, so assume that the first byte represents
  174: 	     a single character anyway. */
  175: 	  tmp = 1;
  176: 	  /* clear the state of the byte sequence, because
  177: 	     in this case effect of mbstate is undefined  */
  178: 	  memset(&ps, 0, sizeof (mbstate_t));
  179: 
  180: 	  /* Since we're assuming that this byte represents a single
  181: 	     non-zero-width character, don't forget about it. */
  182: 	  prev = point;
  183: 	}
  184:       else if (MB_NULLWCH (tmp))
  185: 	break;			/* Found '\0' char.  Can this happen? */
  186:       else
  187: 	{
  188: 	  if (find_non_zero)
  189: 	    {
  190: 	      if (WCWIDTH (wc) != 0)
  191: 		prev = point;
  192: 	    }
  193: 	  else
  194: 	    prev = point;  
  195: 	}
  196: 
  197:       point += tmp;
  198:     }
  199: 
  200:   return prev;
  201: }
  202: 
  203: /* return the number of bytes parsed from the multibyte sequence starting
  204:    at src, if a non-L'\0' wide character was recognized. It returns 0, 
  205:    if a L'\0' wide character was recognized. It  returns (size_t)(-1), 
  206:    if an invalid multibyte sequence was encountered. It returns (size_t)(-2) 
  207:    if it couldn't parse a complete  multibyte character.  */
  208: int
  209: _rl_get_char_len (src, ps)
  210:      char *src;
  211:      mbstate_t *ps;
  212: {
  213:   size_t tmp;
  214: 
  215:   tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
  216:   if (tmp == (size_t)(-2))
  217:     {
  218:       /* shorted to compose multibyte char */
  219:       if (ps)
  220: 	memset (ps, 0, sizeof(mbstate_t));
  221:       return -2;
  222:     }
  223:   else if (tmp == (size_t)(-1))
  224:     {
  225:       /* invalid to compose multibyte char */
  226:       /* initialize the conversion state */
  227:       if (ps)
  228: 	memset (ps, 0, sizeof(mbstate_t));
  229:       return -1;
  230:     }
  231:   else if (tmp == (size_t)0)
  232:     return 0;
  233:   else
  234:     return (int)tmp;
  235: }
  236: 
  237: /* compare the specified two characters. If the characters matched,
  238:    return 1. Otherwise return 0. */
  239: int
  240: _rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
  241:      char *buf1;
  242:      int pos1;
  243:      mbstate_t *ps1;
  244:      char *buf2;
  245:      int pos2;
  246:      mbstate_t *ps2;
  247: {
  248:   int i, w1, w2;
  249: 
  250:   if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 || 
  251: 	(w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
  252: 	(w1 != w2) ||
  253: 	(buf1[pos1] != buf2[pos2]))
  254:     return 0;
  255: 
  256:   for (i = 1; i < w1; i++)
  257:     if (buf1[pos1+i] != buf2[pos2+i])
  258:       return 0;
  259: 
  260:   return 1;
  261: }
  262: 
  263: /* adjust pointed byte and find mbstate of the point of string.
  264:    adjusted point will be point <= adjusted_point, and returns
  265:    differences of the byte(adjusted_point - point).
  266:    if point is invalied (point < 0 || more than string length),
  267:    it returns -1 */
  268: int
  269: _rl_adjust_point (string, point, ps)
  270:      char *string;
  271:      int point;
  272:      mbstate_t *ps;
  273: {
  274:   size_t tmp = 0;
  275:   int length;
  276:   int pos = 0;
  277: 
  278:   length = strlen(string);
  279:   if (point < 0)
  280:     return -1;
  281:   if (length < point)
  282:     return -1;
  283:   
  284:   while (pos < point)
  285:     {
  286:       tmp = mbrlen (string + pos, length - pos, ps);
  287:       if (MB_INVALIDCH ((size_t)tmp))
  288: 	{
  289: 	  /* in this case, bytes are invalid or shorted to compose
  290: 	     multibyte char, so assume that the first byte represents
  291: 	     a single character anyway. */
  292: 	  pos++;
  293: 	  /* clear the state of the byte sequence, because
  294: 	     in this case effect of mbstate is undefined  */
  295: 	  if (ps)
  296: 	    memset (ps, 0, sizeof (mbstate_t));
  297: 	}
  298:       else if (MB_NULLWCH (tmp))
  299: 	pos++;
  300:       else
  301: 	pos += tmp;
  302:     }
  303: 
  304:   return (pos - point);
  305: }
  306: 
  307: int
  308: _rl_is_mbchar_matched (string, seed, end, mbchar, length)
  309:      char *string;
  310:      int seed, end;
  311:      char *mbchar;
  312:      int length;
  313: {
  314:   int i;
  315: 
  316:   if ((end - seed) < length)
  317:     return 0;
  318: 
  319:   for (i = 0; i < length; i++)
  320:     if (string[seed + i] != mbchar[i])
  321:       return 0;
  322:   return 1;
  323: }
  324: 
  325: wchar_t
  326: _rl_char_value (buf, ind)
  327:      char *buf;
  328:      int ind;
  329: {
  330:   size_t tmp;
  331:   wchar_t wc;
  332:   mbstate_t ps;
  333:   int l;
  334: 
  335:   if (MB_LEN_MAX == 1 || rl_byte_oriented)
  336:     return ((wchar_t) buf[ind]);
  337:   l = strlen (buf);
  338:   if (ind >= l - 1)
  339:     return ((wchar_t) buf[ind]);
  340:   memset (&ps, 0, sizeof (mbstate_t));
  341:   tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
  342:   if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))  
  343:     return ((wchar_t) buf[ind]);
  344:   return wc;
  345: }
  346: #endif /* HANDLE_MULTIBYTE */
  347: 
  348: /* Find next `count' characters started byte point of the specified seed.
  349:    If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
  350:    characters. */
  351: #undef _rl_find_next_mbchar
  352: int
  353: _rl_find_next_mbchar (string, seed, count, flags)
  354:      char *string;
  355:      int seed, count, flags;
  356: {
  357: #if defined (HANDLE_MULTIBYTE)
  358:   return _rl_find_next_mbchar_internal (string, seed, count, flags);
  359: #else
  360:   return (seed + count);
  361: #endif
  362: }
  363: 
  364: /* Find previous character started byte point of the specified seed.
  365:    Returned point will be point <= seed.  If flags is MB_FIND_NONZERO,
  366:    we look for non-zero-width multibyte characters. */
  367: #undef _rl_find_prev_mbchar
  368: int
  369: _rl_find_prev_mbchar (string, seed, flags)
  370:      char *string;
  371:      int seed, flags;
  372: {
  373: #if defined (HANDLE_MULTIBYTE)
  374:   return _rl_find_prev_mbchar_internal (string, seed, flags);
  375: #else
  376:   return ((seed == 0) ? seed : seed - 1);
  377: #endif
  378: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>