embedaddon/readline/mbutil.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / readline / mbutil.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Jul 30 08:16:45 2014 UTC (9 years, 11 months ago) by misho
Branches: readline, MAIN
CVS tags: v6_3p10_cross, v6_3p10, v6_3, p6, HEAD

readline 6.3

1: /* mbutil.c -- readline multibyte character utility functions */ 2: 3: /* Copyright (C) 2001-2009 Free Software Foundation, Inc. 4: 5: This file is part of the GNU Readline Library (Readline), a library 6: for reading lines of text with interactive input and history editing. 7: 8: Readline is free software: you can redistribute it and/or modify 9: it under the terms of the GNU General Public License as published by 10: the Free Software Foundation, either version 3 of the License, or 11: (at your option) any later version. 12: 13: Readline is distributed in the hope that it will be useful, 14: but WITHOUT ANY WARRANTY; without even the implied warranty of 15: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16: GNU General Public License for more details. 17: 18: You should have received a copy of the GNU General Public License 19: along with Readline. If not, see <http://www.gnu.org/licenses/>. 20: */ 21: 22: #define READLINE_LIBRARY 23: 24: #if defined (HAVE_CONFIG_H) 25: # include <config.h> 26: #endif 27: 28: #include <sys/types.h> 29: #include <fcntl.h> 30: #include "posixjmp.h" 31: 32: #if defined (HAVE_UNISTD_H) 33: # include <unistd.h> /* for _POSIX_VERSION */ 34: #endif /* HAVE_UNISTD_H */ 35: 36: #if defined (HAVE_STDLIB_H) 37: # include <stdlib.h> 38: #else 39: # include "ansi_stdlib.h" 40: #endif /* HAVE_STDLIB_H */ 41: 42: #include <stdio.h> 43: #include <ctype.h> 44: 45: /* System-specific feature definitions and include files. */ 46: #include "rldefs.h" 47: #include "rlmbutil.h" 48: 49: #if defined (TIOCSTAT_IN_SYS_IOCTL) 50: # include <sys/ioctl.h> 51: #endif /* TIOCSTAT_IN_SYS_IOCTL */ 52: 53: /* Some standard library routines. */ 54: #include "readline.h" 55: 56: #include "rlprivate.h" 57: #include "xmalloc.h" 58: 59: /* Declared here so it can be shared between the readline and history 60: libraries. */ 61: #if defined (HANDLE_MULTIBYTE) 62: int rl_byte_oriented = 0; 63: #else 64: int rl_byte_oriented = 1; 65: #endif 66: 67: /* Ditto */ 68: int _rl_utf8locale = 0; 69: 70: /* **************************************************************** */ 71: /* */ 72: /* Multibyte Character Utility Functions */ 73: /* */ 74: /* **************************************************************** */ 75: 76: #if defined(HANDLE_MULTIBYTE) 77: 78: static int 79: _rl_find_next_mbchar_internal (string, seed, count, find_non_zero) 80: char *string; 81: int seed, count, find_non_zero; 82: { 83: size_t tmp, len; 84: mbstate_t ps; 85: int point; 86: wchar_t wc; 87: 88: tmp = 0; 89: 90: memset(&ps, 0, sizeof (mbstate_t)); 91: if (seed < 0) 92: seed = 0; 93: if (count <= 0) 94: return seed; 95: 96: point = seed + _rl_adjust_point (string, seed, &ps); 97: /* if this is true, means that seed was not pointing to a byte indicating 98: the beginning of a multibyte character. Correct the point and consume 99: one char. */ 100: if (seed < point) 101: count--; 102: 103: while (count > 0) 104: { 105: len = strlen (string + point); 106: if (len == 0) 107: break; 108: tmp = mbrtowc (&wc, string+point, len, &ps); 109: if (MB_INVALIDCH ((size_t)tmp)) 110: { 111: /* invalid bytes. assume a byte represents a character */ 112: point++; 113: count--; 114: /* reset states. */ 115: memset(&ps, 0, sizeof(mbstate_t)); 116: } 117: else if (MB_NULLWCH (tmp)) 118: break; /* found wide '\0' */ 119: else 120: { 121: /* valid bytes */ 122: point += tmp; 123: if (find_non_zero) 124: { 125: if (WCWIDTH (wc) == 0) 126: continue; 127: else 128: count--; 129: } 130: else 131: count--; 132: } 133: } 134: 135: if (find_non_zero) 136: { 137: tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 138: while (MB_NULLWCH (tmp) == 0 && MB_INVALIDCH (tmp) == 0 && WCWIDTH (wc) == 0) 139: { 140: point += tmp; 141: tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 142: } 143: } 144: 145: return point; 146: } 147: 148: static int 149: _rl_find_prev_mbchar_internal (string, seed, find_non_zero) 150: char *string; 151: int seed, find_non_zero; 152: { 153: mbstate_t ps; 154: int prev, non_zero_prev, point, length; 155: size_t tmp; 156: wchar_t wc; 157: 158: memset(&ps, 0, sizeof(mbstate_t)); 159: length = strlen(string); 160: 161: if (seed < 0) 162: return 0; 163: else if (length < seed) 164: return length; 165: 166: prev = non_zero_prev = point = 0; 167: while (point < seed) 168: { 169: tmp = mbrtowc (&wc, string + point, length - point, &ps); 170: if (MB_INVALIDCH ((size_t)tmp)) 171: { 172: /* in this case, bytes are invalid or shorted to compose 173: multibyte char, so assume that the first byte represents 174: a single character anyway. */ 175: tmp = 1; 176: /* clear the state of the byte sequence, because 177: in this case effect of mbstate is undefined */ 178: memset(&ps, 0, sizeof (mbstate_t)); 179: 180: /* Since we're assuming that this byte represents a single 181: non-zero-width character, don't forget about it. */ 182: prev = point; 183: } 184: else if (MB_NULLWCH (tmp)) 185: break; /* Found '\0' char. Can this happen? */ 186: else 187: { 188: if (find_non_zero) 189: { 190: if (WCWIDTH (wc) != 0) 191: prev = point; 192: } 193: else 194: prev = point; 195: } 196: 197: point += tmp; 198: } 199: 200: return prev; 201: } 202: 203: /* return the number of bytes parsed from the multibyte sequence starting 204: at src, if a non-L'\0' wide character was recognized. It returns 0, 205: if a L'\0' wide character was recognized. It returns (size_t)(-1), 206: if an invalid multibyte sequence was encountered. It returns (size_t)(-2) 207: if it couldn't parse a complete multibyte character. */ 208: int 209: _rl_get_char_len (src, ps) 210: char *src; 211: mbstate_t *ps; 212: { 213: size_t tmp; 214: 215: tmp = mbrlen((const char *)src, (size_t)strlen (src), ps); 216: if (tmp == (size_t)(-2)) 217: { 218: /* shorted to compose multibyte char */ 219: if (ps) 220: memset (ps, 0, sizeof(mbstate_t)); 221: return -2; 222: } 223: else if (tmp == (size_t)(-1)) 224: { 225: /* invalid to compose multibyte char */ 226: /* initialize the conversion state */ 227: if (ps) 228: memset (ps, 0, sizeof(mbstate_t)); 229: return -1; 230: } 231: else if (tmp == (size_t)0) 232: return 0; 233: else 234: return (int)tmp; 235: } 236: 237: /* compare the specified two characters. If the characters matched, 238: return 1. Otherwise return 0. */ 239: int 240: _rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2) 241: char *buf1; 242: int pos1; 243: mbstate_t *ps1; 244: char *buf2; 245: int pos2; 246: mbstate_t *ps2; 247: { 248: int i, w1, w2; 249: 250: if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 || 251: (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 || 252: (w1 != w2) || 253: (buf1[pos1] != buf2[pos2])) 254: return 0; 255: 256: for (i = 1; i < w1; i++) 257: if (buf1[pos1+i] != buf2[pos2+i]) 258: return 0; 259: 260: return 1; 261: } 262: 263: /* adjust pointed byte and find mbstate of the point of string. 264: adjusted point will be point <= adjusted_point, and returns 265: differences of the byte(adjusted_point - point). 266: if point is invalied (point < 0 || more than string length), 267: it returns -1 */ 268: int 269: _rl_adjust_point (string, point, ps) 270: char *string; 271: int point; 272: mbstate_t *ps; 273: { 274: size_t tmp = 0; 275: int length; 276: int pos = 0; 277: 278: length = strlen(string); 279: if (point < 0) 280: return -1; 281: if (length < point) 282: return -1; 283: 284: while (pos < point) 285: { 286: tmp = mbrlen (string + pos, length - pos, ps); 287: if (MB_INVALIDCH ((size_t)tmp)) 288: { 289: /* in this case, bytes are invalid or shorted to compose 290: multibyte char, so assume that the first byte represents 291: a single character anyway. */ 292: pos++; 293: /* clear the state of the byte sequence, because 294: in this case effect of mbstate is undefined */ 295: if (ps) 296: memset (ps, 0, sizeof (mbstate_t)); 297: } 298: else if (MB_NULLWCH (tmp)) 299: pos++; 300: else 301: pos += tmp; 302: } 303: 304: return (pos - point); 305: } 306: 307: int 308: _rl_is_mbchar_matched (string, seed, end, mbchar, length) 309: char *string; 310: int seed, end; 311: char *mbchar; 312: int length; 313: { 314: int i; 315: 316: if ((end - seed) < length) 317: return 0; 318: 319: for (i = 0; i < length; i++) 320: if (string[seed + i] != mbchar[i]) 321: return 0; 322: return 1; 323: } 324: 325: wchar_t 326: _rl_char_value (buf, ind) 327: char *buf; 328: int ind; 329: { 330: size_t tmp; 331: wchar_t wc; 332: mbstate_t ps; 333: int l; 334: 335: if (MB_LEN_MAX == 1 || rl_byte_oriented) 336: return ((wchar_t) buf[ind]); 337: l = strlen (buf); 338: if (ind >= l - 1) 339: return ((wchar_t) buf[ind]); 340: memset (&ps, 0, sizeof (mbstate_t)); 341: tmp = mbrtowc (&wc, buf + ind, l - ind, &ps); 342: if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp)) 343: return ((wchar_t) buf[ind]); 344: return wc; 345: } 346: #endif /* HANDLE_MULTIBYTE */ 347: 348: /* Find next `count' characters started byte point of the specified seed. 349: If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte 350: characters. */ 351: #undef _rl_find_next_mbchar 352: int 353: _rl_find_next_mbchar (string, seed, count, flags) 354: char *string; 355: int seed, count, flags; 356: { 357: #if defined (HANDLE_MULTIBYTE) 358: return _rl_find_next_mbchar_internal (string, seed, count, flags); 359: #else 360: return (seed + count); 361: #endif 362: } 363: 364: /* Find previous character started byte point of the specified seed. 365: Returned point will be point <= seed. If flags is MB_FIND_NONZERO, 366: we look for non-zero-width multibyte characters. */ 367: #undef _rl_find_prev_mbchar 368: int 369: _rl_find_prev_mbchar (string, seed, flags) 370: char *string; 371: int seed, flags; 372: { 373: #if defined (HANDLE_MULTIBYTE) 374: return _rl_find_prev_mbchar_internal (string, seed, flags); 375: #else 376: return ((seed == 0) ? seed : seed - 1); 377: #endif 378: }