Annotation of embedaddon/quagga/lib/regex.c, revision 1.1
1.1 ! misho 1: /* Extended regular expression matching and search library,
! 2: version 0.12.
! 3: (Implements POSIX draft P1003.2/D11.2, except for some of the
! 4: internationalization features.)
! 5: Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
! 6:
! 7: The GNU C Library is free software; you can redistribute it and/or
! 8: modify it under the terms of the GNU Library General Public License as
! 9: published by the Free Software Foundation; either version 2 of the
! 10: License, or (at your option) any later version.
! 11:
! 12: The GNU C Library is distributed in the hope that it will be useful,
! 13: but WITHOUT ANY WARRANTY; without even the implied warranty of
! 14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 15: Library General Public License for more details.
! 16:
! 17: You should have received a copy of the GNU Library General Public
! 18: License along with the GNU C Library; see the file COPYING.LIB. If not,
! 19: write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
! 20: Boston, MA 02111-1307, USA. */
! 21:
! 22: /* AIX requires this to be the first thing in the file. */
! 23: #if defined _AIX && !defined REGEX_MALLOC
! 24: #pragma alloca
! 25: #endif
! 26:
! 27: #undef _GNU_SOURCE
! 28: #define _GNU_SOURCE
! 29:
! 30: #ifdef HAVE_CONFIG_H
! 31: # include <config.h>
! 32: #endif
! 33: #ifdef _WIN32
! 34: /* Windows does not provide unistd.h, which is required for abort() */
! 35: #include <process.h>
! 36: #endif /* _WIN32 */
! 37:
! 38: #ifndef PARAMS
! 39: # if defined __GNUC__ || (defined __STDC__ && __STDC__)
! 40: # define PARAMS(args) args
! 41: # else
! 42: # define PARAMS(args) ()
! 43: # endif /* GCC. */
! 44: #endif /* Not PARAMS. */
! 45:
! 46: #if defined STDC_HEADERS && !defined emacs
! 47: # include <stddef.h>
! 48: #else
! 49: /* We need this for `regex.h', and perhaps for the Emacs include files. */
! 50: # include <sys/types.h>
! 51: #endif
! 52:
! 53: #define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
! 54:
! 55: /* For platform which support the ISO C amendement 1 functionality we
! 56: support user defined character classes. */
! 57: #if defined _LIBC || WIDE_CHAR_SUPPORT
! 58: /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
! 59: # include <wchar.h>
! 60: # include <wctype.h>
! 61: #endif
! 62:
! 63: #ifdef _LIBC
! 64: /* We have to keep the namespace clean. */
! 65: # define regfree(preg) __regfree (preg)
! 66: # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
! 67: # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
! 68: # define regerror(errcode, preg, errbuf, errbuf_size) \
! 69: __regerror(errcode, preg, errbuf, errbuf_size)
! 70: # define re_set_registers(bu, re, nu, st, en) \
! 71: __re_set_registers (bu, re, nu, st, en)
! 72: # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
! 73: __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
! 74: # define re_match(bufp, string, size, pos, regs) \
! 75: __re_match (bufp, string, size, pos, regs)
! 76: # define re_search(bufp, string, size, startpos, range, regs) \
! 77: __re_search (bufp, string, size, startpos, range, regs)
! 78: # define re_compile_pattern(pattern, length, bufp) \
! 79: __re_compile_pattern (pattern, length, bufp)
! 80: # define re_set_syntax(syntax) __re_set_syntax (syntax)
! 81: # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
! 82: __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
! 83: # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
! 84:
! 85: #define btowc __btowc
! 86: #endif
! 87:
! 88: /* This is for other GNU distributions with internationalized messages. */
! 89: #if HAVE_LIBINTL_H || defined _LIBC
! 90: # include <libintl.h>
! 91: #else
! 92: # define gettext(msgid) (msgid)
! 93: #endif
! 94:
! 95: #ifndef gettext_noop
! 96: /* This define is so xgettext can find the internationalizable
! 97: strings. */
! 98: # define gettext_noop(String) String
! 99: #endif
! 100:
! 101: /* The `emacs' switch turns on certain matching commands
! 102: that make sense only in Emacs. */
! 103: #ifdef emacs
! 104:
! 105: # include "lisp.h"
! 106: # include "buffer.h"
! 107: # include "syntax.h"
! 108:
! 109: #else /* not emacs */
! 110:
! 111: /* If we are not linking with Emacs proper,
! 112: we can't use the relocating allocator
! 113: even if config.h says that we can. */
! 114: # undef REL_ALLOC
! 115:
! 116: # if defined STDC_HEADERS || defined _LIBC
! 117: # include <stdlib.h>
! 118: # else
! 119: char *malloc ();
! 120: char *realloc ();
! 121: # endif
! 122:
! 123: /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
! 124: If nothing else has been done, use the method below. */
! 125: # ifdef INHIBIT_STRING_HEADER
! 126: # if !(defined HAVE_BZERO && defined HAVE_BCOPY)
! 127: # if !defined bzero && !defined bcopy
! 128: # undef INHIBIT_STRING_HEADER
! 129: # endif
! 130: # endif
! 131: # endif
! 132:
! 133: /* This is the normal way of making sure we have a bcopy and a bzero.
! 134: This is used in most programs--a few other programs avoid this
! 135: by defining INHIBIT_STRING_HEADER. */
! 136: # ifndef INHIBIT_STRING_HEADER
! 137: # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
! 138: # include <string.h>
! 139: # ifndef bzero
! 140: # ifndef _LIBC
! 141: # define bzero(s, n) (memset (s, '\0', n), (s))
! 142: # else
! 143: # define bzero(s, n) __bzero (s, n)
! 144: # endif
! 145: # endif
! 146: # else
! 147: # include <strings.h>
! 148: # ifndef memcmp
! 149: # define memcmp(s1, s2, n) bcmp (s1, s2, n)
! 150: # endif
! 151: # ifndef memcpy
! 152: # define memcpy(d, s, n) (bcopy (s, d, n), (d))
! 153: # endif
! 154: # endif
! 155: # endif
! 156:
! 157: /* Define the syntax stuff for \<, \>, etc. */
! 158:
! 159: /* This must be nonzero for the wordchar and notwordchar pattern
! 160: commands in re_match_2. */
! 161: # ifndef Sword
! 162: # define Sword 1
! 163: # endif
! 164:
! 165: # ifdef SWITCH_ENUM_BUG
! 166: # define SWITCH_ENUM_CAST(x) ((int)(x))
! 167: # else
! 168: # define SWITCH_ENUM_CAST(x) (x)
! 169: # endif
! 170:
! 171: /* How many characters in the character set. */
! 172: # define CHAR_SET_SIZE 256
! 173:
! 174: # ifdef SYNTAX_TABLE
! 175:
! 176: extern char *re_syntax_table;
! 177:
! 178: # else /* not SYNTAX_TABLE */
! 179:
! 180: static char re_syntax_table[CHAR_SET_SIZE];
! 181:
! 182: static void
! 183: init_syntax_once ()
! 184: {
! 185: register int c;
! 186: static int done;
! 187:
! 188: if (done)
! 189: return;
! 190:
! 191: memset (re_syntax_table, 0, sizeof re_syntax_table);
! 192:
! 193: for (c = 'a'; c <= 'z'; c++)
! 194: re_syntax_table[c] = Sword;
! 195:
! 196: for (c = 'A'; c <= 'Z'; c++)
! 197: re_syntax_table[c] = Sword;
! 198:
! 199: for (c = '0'; c <= '9'; c++)
! 200: re_syntax_table[c] = Sword;
! 201:
! 202: re_syntax_table['_'] = Sword;
! 203:
! 204: done = 1;
! 205: }
! 206:
! 207: # endif /* not SYNTAX_TABLE */
! 208:
! 209: # define SYNTAX(c) re_syntax_table[c]
! 210:
! 211: #endif /* not emacs */
! 212:
! 213: /* Get the interface, including the syntax bits. */
! 214: #include <regex-gnu.h>
! 215:
! 216: /* isalpha etc. are used for the character classes. */
! 217: #include <ctype.h>
! 218:
! 219: /* Jim Meyering writes:
! 220:
! 221: "... Some ctype macros are valid only for character codes that
! 222: isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
! 223: using /bin/cc or gcc but without giving an ansi option). So, all
! 224: ctype uses should be through macros like ISPRINT... If
! 225: STDC_HEADERS is defined, then autoconf has verified that the ctype
! 226: macros don't need to be guarded with references to isascii. ...
! 227: Defining isascii to 1 should let any compiler worth its salt
! 228: eliminate the && through constant folding."
! 229: Solaris defines some of these symbols so we must undefine them first. */
! 230:
! 231: #undef ISASCII
! 232: #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
! 233: # define ISASCII(c) 1
! 234: #else
! 235: # define ISASCII(c) isascii(c)
! 236: #endif
! 237:
! 238: #ifdef isblank
! 239: # define ISBLANK(c) (ISASCII (c) && isblank (c))
! 240: #else
! 241: # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
! 242: #endif
! 243: #ifdef isgraph
! 244: # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
! 245: #else
! 246: # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
! 247: #endif
! 248:
! 249: #undef ISPRINT
! 250: #define ISPRINT(c) (ISASCII (c) && isprint (c))
! 251: #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
! 252: #define ISALNUM(c) (ISASCII (c) && isalnum (c))
! 253: #define ISALPHA(c) (ISASCII (c) && isalpha (c))
! 254: #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
! 255: #define ISLOWER(c) (ISASCII (c) && islower (c))
! 256: #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
! 257: #define ISSPACE(c) (ISASCII (c) && isspace (c))
! 258: #define ISUPPER(c) (ISASCII (c) && isupper (c))
! 259: #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
! 260:
! 261: #ifdef _tolower
! 262: # define TOLOWER(c) _tolower(c)
! 263: #else
! 264: # define TOLOWER(c) tolower(c)
! 265: #endif
! 266:
! 267: #ifndef NULL
! 268: # define NULL (void *)0
! 269: #endif
! 270:
! 271: /* We remove any previous definition of `SIGN_EXTEND_CHAR',
! 272: since ours (we hope) works properly with all combinations of
! 273: machines, compilers, `char' and `unsigned char' argument types.
! 274: (Per Bothner suggested the basic approach.) */
! 275: #undef SIGN_EXTEND_CHAR
! 276: #if __STDC__
! 277: # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
! 278: #else /* not __STDC__ */
! 279: /* As in Harbison and Steele. */
! 280: # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
! 281: #endif
! 282:
! 283: /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
! 284: use `alloca' instead of `malloc'. This is because using malloc in
! 285: re_search* or re_match* could cause memory leaks when C-g is used in
! 286: Emacs; also, malloc is slower and causes storage fragmentation. On
! 287: the other hand, malloc is more portable, and easier to debug.
! 288:
! 289: Because we sometimes use alloca, some routines have to be macros,
! 290: not functions -- `alloca'-allocated space disappears at the end of the
! 291: function it is called in. */
! 292:
! 293: #ifdef REGEX_MALLOC
! 294:
! 295: # define REGEX_ALLOCATE malloc
! 296: # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
! 297: # define REGEX_FREE free
! 298:
! 299: #else /* not REGEX_MALLOC */
! 300:
! 301: /* Emacs already defines alloca, sometimes. */
! 302: # ifndef alloca
! 303:
! 304: /* Make alloca work the best possible way. */
! 305: # ifdef __GNUC__
! 306: # define alloca __builtin_alloca
! 307: # else /* not __GNUC__ */
! 308: # if HAVE_ALLOCA_H
! 309: # include <alloca.h>
! 310: # endif /* HAVE_ALLOCA_H */
! 311: # endif /* not __GNUC__ */
! 312:
! 313: # endif /* not alloca */
! 314:
! 315: # define REGEX_ALLOCATE alloca
! 316:
! 317: /* Assumes a `char *destination' variable. */
! 318: # define REGEX_REALLOCATE(source, osize, nsize) \
! 319: (destination = (char *) alloca (nsize), \
! 320: memcpy (destination, source, osize))
! 321:
! 322: /* No need to do anything to free, after alloca. */
! 323: # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
! 324:
! 325: #endif /* not REGEX_MALLOC */
! 326:
! 327: /* Define how to allocate the failure stack. */
! 328:
! 329: #if defined REL_ALLOC && defined REGEX_MALLOC
! 330:
! 331: # define REGEX_ALLOCATE_STACK(size) \
! 332: r_alloc (&failure_stack_ptr, (size))
! 333: # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
! 334: r_re_alloc (&failure_stack_ptr, (nsize))
! 335: # define REGEX_FREE_STACK(ptr) \
! 336: r_alloc_free (&failure_stack_ptr)
! 337:
! 338: #else /* not using relocating allocator */
! 339:
! 340: # ifdef REGEX_MALLOC
! 341:
! 342: # define REGEX_ALLOCATE_STACK malloc
! 343: # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
! 344: # define REGEX_FREE_STACK free
! 345:
! 346: # else /* not REGEX_MALLOC */
! 347:
! 348: # define REGEX_ALLOCATE_STACK alloca
! 349:
! 350: # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
! 351: REGEX_REALLOCATE (source, osize, nsize)
! 352: /* No need to explicitly free anything. */
! 353: # define REGEX_FREE_STACK(arg)
! 354:
! 355: # endif /* not REGEX_MALLOC */
! 356: #endif /* not using relocating allocator */
! 357:
! 358:
! 359: /* True if `size1' is non-NULL and PTR is pointing anywhere inside
! 360: `string1' or just past its end. This works if PTR is NULL, which is
! 361: a good thing. */
! 362: #define FIRST_STRING_P(ptr) \
! 363: (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
! 364:
! 365: /* (Re)Allocate N items of type T using malloc, or fail. */
! 366: #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
! 367: #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
! 368: #define RETALLOC_IF(addr, n, t) \
! 369: if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
! 370: #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
! 371:
! 372: #define BYTEWIDTH 8 /* In bits. */
! 373:
! 374: #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
! 375:
! 376: #undef MAX
! 377: #undef MIN
! 378: #define MAX(a, b) ((a) > (b) ? (a) : (b))
! 379: #define MIN(a, b) ((a) < (b) ? (a) : (b))
! 380:
! 381: typedef char boolean;
! 382: #define false 0
! 383: #define true 1
! 384:
! 385: static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
! 386: const char *string1, int size1,
! 387: const char *string2, int size2,
! 388: int pos,
! 389: struct re_registers *regs,
! 390: int stop));
! 391:
! 392: /* These are the command codes that appear in compiled regular
! 393: expressions. Some opcodes are followed by argument bytes. A
! 394: command code can specify any interpretation whatsoever for its
! 395: arguments. Zero bytes may appear in the compiled regular expression. */
! 396:
! 397: typedef enum
! 398: {
! 399: no_op = 0,
! 400:
! 401: /* Succeed right away--no more backtracking. */
! 402: succeed,
! 403:
! 404: /* Followed by one byte giving n, then by n literal bytes. */
! 405: exactn,
! 406:
! 407: /* Matches any (more or less) character. */
! 408: anychar,
! 409:
! 410: /* Matches any one char belonging to specified set. First
! 411: following byte is number of bitmap bytes. Then come bytes
! 412: for a bitmap saying which chars are in. Bits in each byte
! 413: are ordered low-bit-first. A character is in the set if its
! 414: bit is 1. A character too large to have a bit in the map is
! 415: automatically not in the set. */
! 416: charset,
! 417:
! 418: /* Same parameters as charset, but match any character that is
! 419: not one of those specified. */
! 420: charset_not,
! 421:
! 422: /* Start remembering the text that is matched, for storing in a
! 423: register. Followed by one byte with the register number, in
! 424: the range 0 to one less than the pattern buffer's re_nsub
! 425: field. Then followed by one byte with the number of groups
! 426: inner to this one. (This last has to be part of the
! 427: start_memory only because we need it in the on_failure_jump
! 428: of re_match_2.) */
! 429: start_memory,
! 430:
! 431: /* Stop remembering the text that is matched and store it in a
! 432: memory register. Followed by one byte with the register
! 433: number, in the range 0 to one less than `re_nsub' in the
! 434: pattern buffer, and one byte with the number of inner groups,
! 435: just like `start_memory'. (We need the number of inner
! 436: groups here because we don't have any easy way of finding the
! 437: corresponding start_memory when we're at a stop_memory.) */
! 438: stop_memory,
! 439:
! 440: /* Match a duplicate of something remembered. Followed by one
! 441: byte containing the register number. */
! 442: duplicate,
! 443:
! 444: /* Fail unless at beginning of line. */
! 445: begline,
! 446:
! 447: /* Fail unless at end of line. */
! 448: endline,
! 449:
! 450: /* Succeeds if at beginning of buffer (if emacs) or at beginning
! 451: of string to be matched (if not). */
! 452: begbuf,
! 453:
! 454: /* Analogously, for end of buffer/string. */
! 455: endbuf,
! 456:
! 457: /* Followed by two byte relative address to which to jump. */
! 458: jump,
! 459:
! 460: /* Same as jump, but marks the end of an alternative. */
! 461: jump_past_alt,
! 462:
! 463: /* Followed by two-byte relative address of place to resume at
! 464: in case of failure. */
! 465: on_failure_jump,
! 466:
! 467: /* Like on_failure_jump, but pushes a placeholder instead of the
! 468: current string position when executed. */
! 469: on_failure_keep_string_jump,
! 470:
! 471: /* Throw away latest failure point and then jump to following
! 472: two-byte relative address. */
! 473: pop_failure_jump,
! 474:
! 475: /* Change to pop_failure_jump if know won't have to backtrack to
! 476: match; otherwise change to jump. This is used to jump
! 477: back to the beginning of a repeat. If what follows this jump
! 478: clearly won't match what the repeat does, such that we can be
! 479: sure that there is no use backtracking out of repetitions
! 480: already matched, then we change it to a pop_failure_jump.
! 481: Followed by two-byte address. */
! 482: maybe_pop_jump,
! 483:
! 484: /* Jump to following two-byte address, and push a dummy failure
! 485: point. This failure point will be thrown away if an attempt
! 486: is made to use it for a failure. A `+' construct makes this
! 487: before the first repeat. Also used as an intermediary kind
! 488: of jump when compiling an alternative. */
! 489: dummy_failure_jump,
! 490:
! 491: /* Push a dummy failure point and continue. Used at the end of
! 492: alternatives. */
! 493: push_dummy_failure,
! 494:
! 495: /* Followed by two-byte relative address and two-byte number n.
! 496: After matching N times, jump to the address upon failure. */
! 497: succeed_n,
! 498:
! 499: /* Followed by two-byte relative address, and two-byte number n.
! 500: Jump to the address N times, then fail. */
! 501: jump_n,
! 502:
! 503: /* Set the following two-byte relative address to the
! 504: subsequent two-byte number. The address *includes* the two
! 505: bytes of number. */
! 506: set_number_at,
! 507:
! 508: wordchar, /* Matches any word-constituent character. */
! 509: notwordchar, /* Matches any char that is not a word-constituent. */
! 510:
! 511: wordbeg, /* Succeeds if at word beginning. */
! 512: wordend, /* Succeeds if at word end. */
! 513:
! 514: wordbound, /* Succeeds if at a word boundary. */
! 515: notwordbound /* Succeeds if not at a word boundary. */
! 516:
! 517: #ifdef emacs
! 518: ,before_dot, /* Succeeds if before point. */
! 519: at_dot, /* Succeeds if at point. */
! 520: after_dot, /* Succeeds if after point. */
! 521:
! 522: /* Matches any character whose syntax is specified. Followed by
! 523: a byte which contains a syntax code, e.g., Sword. */
! 524: syntaxspec,
! 525:
! 526: /* Matches any character whose syntax is not that specified. */
! 527: notsyntaxspec
! 528: #endif /* emacs */
! 529: } re_opcode_t;
! 530:
! 531: /* Common operations on the compiled pattern. */
! 532:
! 533: /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
! 534:
! 535: #define STORE_NUMBER(destination, number) \
! 536: do { \
! 537: (destination)[0] = (number) & 0377; \
! 538: (destination)[1] = (number) >> 8; \
! 539: } while (0)
! 540:
! 541: /* Same as STORE_NUMBER, except increment DESTINATION to
! 542: the byte after where the number is stored. Therefore, DESTINATION
! 543: must be an lvalue. */
! 544:
! 545: #define STORE_NUMBER_AND_INCR(destination, number) \
! 546: do { \
! 547: STORE_NUMBER (destination, number); \
! 548: (destination) += 2; \
! 549: } while (0)
! 550:
! 551: /* Put into DESTINATION a number stored in two contiguous bytes starting
! 552: at SOURCE. */
! 553:
! 554: #define EXTRACT_NUMBER(destination, source) \
! 555: do { \
! 556: (destination) = *(source) & 0377; \
! 557: (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
! 558: } while (0)
! 559:
! 560: #ifdef DEBUG
! 561: static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
! 562: static void
! 563: extract_number (dest, source)
! 564: int *dest;
! 565: unsigned char *source;
! 566: {
! 567: int temp = SIGN_EXTEND_CHAR (*(source + 1));
! 568: *dest = *source & 0377;
! 569: *dest += temp << 8;
! 570: }
! 571:
! 572: # ifndef EXTRACT_MACROS /* To debug the macros. */
! 573: # undef EXTRACT_NUMBER
! 574: # define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
! 575: # endif /* not EXTRACT_MACROS */
! 576:
! 577: #endif /* DEBUG */
! 578:
! 579: /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
! 580: SOURCE must be an lvalue. */
! 581:
! 582: #define EXTRACT_NUMBER_AND_INCR(destination, source) \
! 583: do { \
! 584: EXTRACT_NUMBER (destination, source); \
! 585: (source) += 2; \
! 586: } while (0)
! 587:
! 588: #ifdef DEBUG
! 589: static void extract_number_and_incr _RE_ARGS ((int *destination,
! 590: unsigned char **source));
! 591: static void
! 592: extract_number_and_incr (destination, source)
! 593: int *destination;
! 594: unsigned char **source;
! 595: {
! 596: extract_number (destination, *source);
! 597: *source += 2;
! 598: }
! 599:
! 600: # ifndef EXTRACT_MACROS
! 601: # undef EXTRACT_NUMBER_AND_INCR
! 602: # define EXTRACT_NUMBER_AND_INCR(dest, src) \
! 603: extract_number_and_incr (&dest, &src)
! 604: # endif /* not EXTRACT_MACROS */
! 605:
! 606: #endif /* DEBUG */
! 607:
! 608: /* If DEBUG is defined, Regex prints many voluminous messages about what
! 609: it is doing (if the variable `debug' is nonzero). If linked with the
! 610: main program in `iregex.c', you can enter patterns and strings
! 611: interactively. And if linked with the main program in `main.c' and
! 612: the other test files, you can run the already-written tests. */
! 613:
! 614: #ifdef DEBUG
! 615:
! 616: /* We use standard I/O for debugging. */
! 617: # include <stdio.h>
! 618:
! 619: /* It is useful to test things that ``must'' be true when debugging. */
! 620: # include "zassert.h"
! 621:
! 622: static int debug;
! 623:
! 624: # define DEBUG_STATEMENT(e) e
! 625: # define DEBUG_PRINT1(x) if (debug) printf (x)
! 626: # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
! 627: # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
! 628: # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
! 629: # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
! 630: if (debug) print_partial_compiled_pattern (s, e)
! 631: # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
! 632: if (debug) print_double_string (w, s1, sz1, s2, sz2)
! 633:
! 634:
! 635: /* Print the fastmap in human-readable form. */
! 636:
! 637: void
! 638: print_fastmap (fastmap)
! 639: char *fastmap;
! 640: {
! 641: unsigned was_a_range = 0;
! 642: unsigned i = 0;
! 643:
! 644: while (i < (1 << BYTEWIDTH))
! 645: {
! 646: if (fastmap[i++])
! 647: {
! 648: was_a_range = 0;
! 649: putchar (i - 1);
! 650: while (i < (1 << BYTEWIDTH) && fastmap[i])
! 651: {
! 652: was_a_range = 1;
! 653: i++;
! 654: }
! 655: if (was_a_range)
! 656: {
! 657: printf ("-");
! 658: putchar (i - 1);
! 659: }
! 660: }
! 661: }
! 662: putchar ('\n');
! 663: }
! 664:
! 665:
! 666: /* Print a compiled pattern string in human-readable form, starting at
! 667: the START pointer into it and ending just before the pointer END. */
! 668:
! 669: void
! 670: print_partial_compiled_pattern (start, end)
! 671: unsigned char *start;
! 672: unsigned char *end;
! 673: {
! 674: int mcnt, mcnt2;
! 675: unsigned char *p1;
! 676: unsigned char *p = start;
! 677: unsigned char *pend = end;
! 678:
! 679: if (start == NULL)
! 680: {
! 681: printf ("(null)\n");
! 682: return;
! 683: }
! 684:
! 685: /* Loop over pattern commands. */
! 686: while (p < pend)
! 687: {
! 688: printf ("%d:\t", p - start);
! 689:
! 690: switch ((re_opcode_t) *p++)
! 691: {
! 692: case no_op:
! 693: printf ("/no_op");
! 694: break;
! 695:
! 696: case exactn:
! 697: mcnt = *p++;
! 698: printf ("/exactn/%d", mcnt);
! 699: do
! 700: {
! 701: putchar ('/');
! 702: putchar (*p++);
! 703: }
! 704: while (--mcnt);
! 705: break;
! 706:
! 707: case start_memory:
! 708: mcnt = *p++;
! 709: printf ("/start_memory/%d/%d", mcnt, *p++);
! 710: break;
! 711:
! 712: case stop_memory:
! 713: mcnt = *p++;
! 714: printf ("/stop_memory/%d/%d", mcnt, *p++);
! 715: break;
! 716:
! 717: case duplicate:
! 718: printf ("/duplicate/%d", *p++);
! 719: break;
! 720:
! 721: case anychar:
! 722: printf ("/anychar");
! 723: break;
! 724:
! 725: case charset:
! 726: case charset_not:
! 727: {
! 728: register int c, last = -100;
! 729: register int in_range = 0;
! 730:
! 731: printf ("/charset [%s",
! 732: (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
! 733:
! 734: assert (p + *p < pend);
! 735:
! 736: for (c = 0; c < 256; c++)
! 737: if (c / 8 < *p
! 738: && (p[1 + (c/8)] & (1 << (c % 8))))
! 739: {
! 740: /* Are we starting a range? */
! 741: if (last + 1 == c && ! in_range)
! 742: {
! 743: putchar ('-');
! 744: in_range = 1;
! 745: }
! 746: /* Have we broken a range? */
! 747: else if (last + 1 != c && in_range)
! 748: {
! 749: putchar (last);
! 750: in_range = 0;
! 751: }
! 752:
! 753: if (! in_range)
! 754: putchar (c);
! 755:
! 756: last = c;
! 757: }
! 758:
! 759: if (in_range)
! 760: putchar (last);
! 761:
! 762: putchar (']');
! 763:
! 764: p += 1 + *p;
! 765: }
! 766: break;
! 767:
! 768: case begline:
! 769: printf ("/begline");
! 770: break;
! 771:
! 772: case endline:
! 773: printf ("/endline");
! 774: break;
! 775:
! 776: case on_failure_jump:
! 777: extract_number_and_incr (&mcnt, &p);
! 778: printf ("/on_failure_jump to %d", p + mcnt - start);
! 779: break;
! 780:
! 781: case on_failure_keep_string_jump:
! 782: extract_number_and_incr (&mcnt, &p);
! 783: printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
! 784: break;
! 785:
! 786: case dummy_failure_jump:
! 787: extract_number_and_incr (&mcnt, &p);
! 788: printf ("/dummy_failure_jump to %d", p + mcnt - start);
! 789: break;
! 790:
! 791: case push_dummy_failure:
! 792: printf ("/push_dummy_failure");
! 793: break;
! 794:
! 795: case maybe_pop_jump:
! 796: extract_number_and_incr (&mcnt, &p);
! 797: printf ("/maybe_pop_jump to %d", p + mcnt - start);
! 798: break;
! 799:
! 800: case pop_failure_jump:
! 801: extract_number_and_incr (&mcnt, &p);
! 802: printf ("/pop_failure_jump to %d", p + mcnt - start);
! 803: break;
! 804:
! 805: case jump_past_alt:
! 806: extract_number_and_incr (&mcnt, &p);
! 807: printf ("/jump_past_alt to %d", p + mcnt - start);
! 808: break;
! 809:
! 810: case jump:
! 811: extract_number_and_incr (&mcnt, &p);
! 812: printf ("/jump to %d", p + mcnt - start);
! 813: break;
! 814:
! 815: case succeed_n:
! 816: extract_number_and_incr (&mcnt, &p);
! 817: p1 = p + mcnt;
! 818: extract_number_and_incr (&mcnt2, &p);
! 819: printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
! 820: break;
! 821:
! 822: case jump_n:
! 823: extract_number_and_incr (&mcnt, &p);
! 824: p1 = p + mcnt;
! 825: extract_number_and_incr (&mcnt2, &p);
! 826: printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
! 827: break;
! 828:
! 829: case set_number_at:
! 830: extract_number_and_incr (&mcnt, &p);
! 831: p1 = p + mcnt;
! 832: extract_number_and_incr (&mcnt2, &p);
! 833: printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
! 834: break;
! 835:
! 836: case wordbound:
! 837: printf ("/wordbound");
! 838: break;
! 839:
! 840: case notwordbound:
! 841: printf ("/notwordbound");
! 842: break;
! 843:
! 844: case wordbeg:
! 845: printf ("/wordbeg");
! 846: break;
! 847:
! 848: case wordend:
! 849: printf ("/wordend");
! 850:
! 851: # ifdef emacs
! 852: case before_dot:
! 853: printf ("/before_dot");
! 854: break;
! 855:
! 856: case at_dot:
! 857: printf ("/at_dot");
! 858: break;
! 859:
! 860: case after_dot:
! 861: printf ("/after_dot");
! 862: break;
! 863:
! 864: case syntaxspec:
! 865: printf ("/syntaxspec");
! 866: mcnt = *p++;
! 867: printf ("/%d", mcnt);
! 868: break;
! 869:
! 870: case notsyntaxspec:
! 871: printf ("/notsyntaxspec");
! 872: mcnt = *p++;
! 873: printf ("/%d", mcnt);
! 874: break;
! 875: # endif /* emacs */
! 876:
! 877: case wordchar:
! 878: printf ("/wordchar");
! 879: break;
! 880:
! 881: case notwordchar:
! 882: printf ("/notwordchar");
! 883: break;
! 884:
! 885: case begbuf:
! 886: printf ("/begbuf");
! 887: break;
! 888:
! 889: case endbuf:
! 890: printf ("/endbuf");
! 891: break;
! 892:
! 893: default:
! 894: printf ("?%d", *(p-1));
! 895: }
! 896:
! 897: putchar ('\n');
! 898: }
! 899:
! 900: printf ("%d:\tend of pattern.\n", p - start);
! 901: }
! 902:
! 903:
! 904: void
! 905: print_compiled_pattern (bufp)
! 906: struct re_pattern_buffer *bufp;
! 907: {
! 908: unsigned char *buffer = bufp->buffer;
! 909:
! 910: print_partial_compiled_pattern (buffer, buffer + bufp->used);
! 911: printf ("%ld bytes used/%ld bytes allocated.\n",
! 912: bufp->used, bufp->allocated);
! 913:
! 914: if (bufp->fastmap_accurate && bufp->fastmap)
! 915: {
! 916: printf ("fastmap: ");
! 917: print_fastmap (bufp->fastmap);
! 918: }
! 919:
! 920: printf ("re_nsub: %d\t", bufp->re_nsub);
! 921: printf ("regs_alloc: %d\t", bufp->regs_allocated);
! 922: printf ("can_be_null: %d\t", bufp->can_be_null);
! 923: printf ("newline_anchor: %d\n", bufp->newline_anchor);
! 924: printf ("no_sub: %d\t", bufp->no_sub);
! 925: printf ("not_bol: %d\t", bufp->not_bol);
! 926: printf ("not_eol: %d\t", bufp->not_eol);
! 927: printf ("syntax: %lx\n", bufp->syntax);
! 928: /* Perhaps we should print the translate table? */
! 929: }
! 930:
! 931:
! 932: void
! 933: print_double_string (where, string1, size1, string2, size2)
! 934: const char *where;
! 935: const char *string1;
! 936: const char *string2;
! 937: int size1;
! 938: int size2;
! 939: {
! 940: int this_char;
! 941:
! 942: if (where == NULL)
! 943: printf ("(null)");
! 944: else
! 945: {
! 946: if (FIRST_STRING_P (where))
! 947: {
! 948: for (this_char = where - string1; this_char < size1; this_char++)
! 949: putchar (string1[this_char]);
! 950:
! 951: where = string2;
! 952: }
! 953:
! 954: for (this_char = where - string2; this_char < size2; this_char++)
! 955: putchar (string2[this_char]);
! 956: }
! 957: }
! 958:
! 959: void
! 960: printchar (c)
! 961: int c;
! 962: {
! 963: putc (c, stderr);
! 964: }
! 965:
! 966: #else /* not DEBUG */
! 967:
! 968: # undef assert
! 969: # define assert(e)
! 970:
! 971: # define DEBUG_STATEMENT(e)
! 972: # define DEBUG_PRINT1(x)
! 973: # define DEBUG_PRINT2(x1, x2)
! 974: # define DEBUG_PRINT3(x1, x2, x3)
! 975: # define DEBUG_PRINT4(x1, x2, x3, x4)
! 976: # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
! 977: # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
! 978:
! 979: #endif /* not DEBUG */
! 980:
! 981: /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
! 982: also be assigned to arbitrarily: each pattern buffer stores its own
! 983: syntax, so it can be changed between regex compilations. */
! 984: /* This has no initializer because initialized variables in Emacs
! 985: become read-only after dumping. */
! 986: reg_syntax_t re_syntax_options;
! 987:
! 988:
! 989: /* Specify the precise syntax of regexps for compilation. This provides
! 990: for compatibility for various utilities which historically have
! 991: different, incompatible syntaxes.
! 992:
! 993: The argument SYNTAX is a bit mask comprised of the various bits
! 994: defined in regex.h. We return the old syntax. */
! 995:
! 996: reg_syntax_t
! 997: re_set_syntax (syntax)
! 998: reg_syntax_t syntax;
! 999: {
! 1000: reg_syntax_t ret = re_syntax_options;
! 1001:
! 1002: re_syntax_options = syntax;
! 1003: #ifdef DEBUG
! 1004: if (syntax & RE_DEBUG)
! 1005: debug = 1;
! 1006: else if (debug) /* was on but now is not */
! 1007: debug = 0;
! 1008: #endif /* DEBUG */
! 1009: return ret;
! 1010: }
! 1011: #ifdef _LIBC
! 1012: weak_alias (__re_set_syntax, re_set_syntax)
! 1013: #endif
! 1014:
! 1015: /* This table gives an error message for each of the error codes listed
! 1016: in regex.h. Obviously the order here has to be same as there.
! 1017: POSIX doesn't require that we do anything for REG_NOERROR,
! 1018: but why not be nice? */
! 1019:
! 1020: static const char re_error_msgid[] =
! 1021: {
! 1022: #define REG_NOERROR_IDX 0
! 1023: gettext_noop ("Success") /* REG_NOERROR */
! 1024: "\0"
! 1025: #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
! 1026: gettext_noop ("No match") /* REG_NOMATCH */
! 1027: "\0"
! 1028: #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
! 1029: gettext_noop ("Invalid regular expression") /* REG_BADPAT */
! 1030: "\0"
! 1031: #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
! 1032: gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
! 1033: "\0"
! 1034: #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
! 1035: gettext_noop ("Invalid character class name") /* REG_ECTYPE */
! 1036: "\0"
! 1037: #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
! 1038: gettext_noop ("Trailing backslash") /* REG_EESCAPE */
! 1039: "\0"
! 1040: #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
! 1041: gettext_noop ("Invalid back reference") /* REG_ESUBREG */
! 1042: "\0"
! 1043: #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
! 1044: gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
! 1045: "\0"
! 1046: #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
! 1047: gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
! 1048: "\0"
! 1049: #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
! 1050: gettext_noop ("Unmatched \\{") /* REG_EBRACE */
! 1051: "\0"
! 1052: #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
! 1053: gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
! 1054: "\0"
! 1055: #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
! 1056: gettext_noop ("Invalid range end") /* REG_ERANGE */
! 1057: "\0"
! 1058: #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
! 1059: gettext_noop ("Memory exhausted") /* REG_ESPACE */
! 1060: "\0"
! 1061: #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
! 1062: gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
! 1063: "\0"
! 1064: #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
! 1065: gettext_noop ("Premature end of regular expression") /* REG_EEND */
! 1066: "\0"
! 1067: #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
! 1068: gettext_noop ("Regular expression too big") /* REG_ESIZE */
! 1069: "\0"
! 1070: #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
! 1071: gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
! 1072: };
! 1073:
! 1074: static const size_t re_error_msgid_idx[] =
! 1075: {
! 1076: REG_NOERROR_IDX,
! 1077: REG_NOMATCH_IDX,
! 1078: REG_BADPAT_IDX,
! 1079: REG_ECOLLATE_IDX,
! 1080: REG_ECTYPE_IDX,
! 1081: REG_EESCAPE_IDX,
! 1082: REG_ESUBREG_IDX,
! 1083: REG_EBRACK_IDX,
! 1084: REG_EPAREN_IDX,
! 1085: REG_EBRACE_IDX,
! 1086: REG_BADBR_IDX,
! 1087: REG_ERANGE_IDX,
! 1088: REG_ESPACE_IDX,
! 1089: REG_BADRPT_IDX,
! 1090: REG_EEND_IDX,
! 1091: REG_ESIZE_IDX,
! 1092: REG_ERPAREN_IDX
! 1093: };
! 1094:
! 1095: /* Avoiding alloca during matching, to placate r_alloc. */
! 1096:
! 1097: /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
! 1098: searching and matching functions should not call alloca. On some
! 1099: systems, alloca is implemented in terms of malloc, and if we're
! 1100: using the relocating allocator routines, then malloc could cause a
! 1101: relocation, which might (if the strings being searched are in the
! 1102: ralloc heap) shift the data out from underneath the regexp
! 1103: routines.
! 1104:
! 1105: Here's another reason to avoid allocation: Emacs
! 1106: processes input from X in a signal handler; processing X input may
! 1107: call malloc; if input arrives while a matching routine is calling
! 1108: malloc, then we're scrod. But Emacs can't just block input while
! 1109: calling matching routines; then we don't notice interrupts when
! 1110: they come in. So, Emacs blocks input around all regexp calls
! 1111: except the matching calls, which it leaves unprotected, in the
! 1112: faith that they will not malloc. */
! 1113:
! 1114: /* Normally, this is fine. */
! 1115: #define MATCH_MAY_ALLOCATE
! 1116:
! 1117: /* When using GNU C, we are not REALLY using the C alloca, no matter
! 1118: what config.h may say. So don't take precautions for it. */
! 1119: #ifdef __GNUC__
! 1120: # undef C_ALLOCA
! 1121: #endif
! 1122:
! 1123: /* The match routines may not allocate if (1) they would do it with malloc
! 1124: and (2) it's not safe for them to use malloc.
! 1125: Note that if REL_ALLOC is defined, matching would not use malloc for the
! 1126: failure stack, but we would still use it for the register vectors;
! 1127: so REL_ALLOC should not affect this. */
! 1128: #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
! 1129: # undef MATCH_MAY_ALLOCATE
! 1130: #endif
! 1131:
! 1132:
! 1133: /* Failure stack declarations and macros; both re_compile_fastmap and
! 1134: re_match_2 use a failure stack. These have to be macros because of
! 1135: REGEX_ALLOCATE_STACK. */
! 1136:
! 1137:
! 1138: /* Number of failure points for which to initially allocate space
! 1139: when matching. If this number is exceeded, we allocate more
! 1140: space, so it is not a hard limit. */
! 1141: #ifndef INIT_FAILURE_ALLOC
! 1142: # define INIT_FAILURE_ALLOC 5
! 1143: #endif
! 1144:
! 1145: /* Roughly the maximum number of failure points on the stack. Would be
! 1146: exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
! 1147: This is a variable only so users of regex can assign to it; we never
! 1148: change it ourselves. */
! 1149:
! 1150: #ifdef INT_IS_16BIT
! 1151:
! 1152: # if defined MATCH_MAY_ALLOCATE
! 1153: /* 4400 was enough to cause a crash on Alpha OSF/1,
! 1154: whose default stack limit is 2mb. */
! 1155: long int re_max_failures = 4000;
! 1156: # else
! 1157: long int re_max_failures = 2000;
! 1158: # endif
! 1159:
! 1160: union fail_stack_elt
! 1161: {
! 1162: unsigned char *pointer;
! 1163: long int integer;
! 1164: };
! 1165:
! 1166: typedef union fail_stack_elt fail_stack_elt_t;
! 1167:
! 1168: typedef struct
! 1169: {
! 1170: fail_stack_elt_t *stack;
! 1171: unsigned long int size;
! 1172: unsigned long int avail; /* Offset of next open position. */
! 1173: } fail_stack_type;
! 1174:
! 1175: #else /* not INT_IS_16BIT */
! 1176:
! 1177: # if defined MATCH_MAY_ALLOCATE
! 1178: /* 4400 was enough to cause a crash on Alpha OSF/1,
! 1179: whose default stack limit is 2mb. */
! 1180: int re_max_failures = 20000;
! 1181: # else
! 1182: int re_max_failures = 2000;
! 1183: # endif
! 1184:
! 1185: union fail_stack_elt
! 1186: {
! 1187: unsigned char *pointer;
! 1188: int integer;
! 1189: };
! 1190:
! 1191: typedef union fail_stack_elt fail_stack_elt_t;
! 1192:
! 1193: typedef struct
! 1194: {
! 1195: fail_stack_elt_t *stack;
! 1196: unsigned size;
! 1197: unsigned avail; /* Offset of next open position. */
! 1198: } fail_stack_type;
! 1199:
! 1200: #endif /* INT_IS_16BIT */
! 1201:
! 1202: #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
! 1203: #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
! 1204: #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
! 1205:
! 1206:
! 1207: /* Define macros to initialize and free the failure stack.
! 1208: Do `return -2' if the alloc fails. */
! 1209:
! 1210: #ifdef MATCH_MAY_ALLOCATE
! 1211: # define INIT_FAIL_STACK() \
! 1212: do { \
! 1213: fail_stack.stack = (fail_stack_elt_t *) \
! 1214: REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
! 1215: \
! 1216: if (fail_stack.stack == NULL) \
! 1217: return -2; \
! 1218: \
! 1219: fail_stack.size = INIT_FAILURE_ALLOC; \
! 1220: fail_stack.avail = 0; \
! 1221: } while (0)
! 1222:
! 1223: # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
! 1224: #else
! 1225: # define INIT_FAIL_STACK() \
! 1226: do { \
! 1227: fail_stack.avail = 0; \
! 1228: } while (0)
! 1229:
! 1230: # define RESET_FAIL_STACK()
! 1231: #endif
! 1232:
! 1233:
! 1234: /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
! 1235:
! 1236: Return 1 if succeeds, and 0 if either ran out of memory
! 1237: allocating space for it or it was already too large.
! 1238:
! 1239: REGEX_REALLOCATE_STACK requires `destination' be declared. */
! 1240:
! 1241: #define DOUBLE_FAIL_STACK(fail_stack) \
! 1242: ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
! 1243: ? 0 \
! 1244: : ((fail_stack).stack = (fail_stack_elt_t *) \
! 1245: REGEX_REALLOCATE_STACK ((fail_stack).stack, \
! 1246: (fail_stack).size * sizeof (fail_stack_elt_t), \
! 1247: ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
! 1248: \
! 1249: (fail_stack).stack == NULL \
! 1250: ? 0 \
! 1251: : ((fail_stack).size <<= 1, \
! 1252: 1)))
! 1253:
! 1254:
! 1255: /* Push pointer POINTER on FAIL_STACK.
! 1256: Return 1 if was able to do so and 0 if ran out of memory allocating
! 1257: space to do so. */
! 1258: #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
! 1259: ((FAIL_STACK_FULL () \
! 1260: && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
! 1261: ? 0 \
! 1262: : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
! 1263: 1))
! 1264:
! 1265: /* Push a pointer value onto the failure stack.
! 1266: Assumes the variable `fail_stack'. Probably should only
! 1267: be called from within `PUSH_FAILURE_POINT'. */
! 1268: #define PUSH_FAILURE_POINTER(item) \
! 1269: fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
! 1270:
! 1271: /* This pushes an integer-valued item onto the failure stack.
! 1272: Assumes the variable `fail_stack'. Probably should only
! 1273: be called from within `PUSH_FAILURE_POINT'. */
! 1274: #define PUSH_FAILURE_INT(item) \
! 1275: fail_stack.stack[fail_stack.avail++].integer = (item)
! 1276:
! 1277: /* Push a fail_stack_elt_t value onto the failure stack.
! 1278: Assumes the variable `fail_stack'. Probably should only
! 1279: be called from within `PUSH_FAILURE_POINT'. */
! 1280: #define PUSH_FAILURE_ELT(item) \
! 1281: fail_stack.stack[fail_stack.avail++] = (item)
! 1282:
! 1283: /* These three POP... operations complement the three PUSH... operations.
! 1284: All assume that `fail_stack' is nonempty. */
! 1285: #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
! 1286: #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
! 1287: #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
! 1288:
! 1289: /* Used to omit pushing failure point id's when we're not debugging. */
! 1290: #ifdef DEBUG
! 1291: # define DEBUG_PUSH PUSH_FAILURE_INT
! 1292: # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
! 1293: #else
! 1294: # define DEBUG_PUSH(item)
! 1295: # define DEBUG_POP(item_addr)
! 1296: #endif
! 1297:
! 1298:
! 1299: /* Push the information about the state we will need
! 1300: if we ever fail back to it.
! 1301:
! 1302: Requires variables fail_stack, regstart, regend, reg_info, and
! 1303: num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
! 1304: be declared.
! 1305:
! 1306: Does `return FAILURE_CODE' if runs out of memory. */
! 1307:
! 1308: #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
! 1309: do { \
! 1310: char *destination; \
! 1311: /* Must be int, so when we don't save any registers, the arithmetic \
! 1312: of 0 + -1 isn't done as unsigned. */ \
! 1313: /* Can't be int, since there is not a shred of a guarantee that int \
! 1314: is wide enough to hold a value of something to which pointer can \
! 1315: be assigned */ \
! 1316: active_reg_t this_reg; \
! 1317: \
! 1318: DEBUG_STATEMENT (failure_id++); \
! 1319: DEBUG_STATEMENT (nfailure_points_pushed++); \
! 1320: DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
! 1321: DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
! 1322: DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
! 1323: \
! 1324: DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
! 1325: DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
! 1326: \
! 1327: /* Ensure we have enough space allocated for what we will push. */ \
! 1328: while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
! 1329: { \
! 1330: if (!DOUBLE_FAIL_STACK (fail_stack)) \
! 1331: return failure_code; \
! 1332: \
! 1333: DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
! 1334: (fail_stack).size); \
! 1335: DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
! 1336: } \
! 1337: \
! 1338: /* Push the info, starting with the registers. */ \
! 1339: DEBUG_PRINT1 ("\n"); \
! 1340: \
! 1341: if (1) \
! 1342: for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
! 1343: this_reg++) \
! 1344: { \
! 1345: DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
! 1346: DEBUG_STATEMENT (num_regs_pushed++); \
! 1347: \
! 1348: DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
! 1349: PUSH_FAILURE_POINTER (regstart[this_reg]); \
! 1350: \
! 1351: DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
! 1352: PUSH_FAILURE_POINTER (regend[this_reg]); \
! 1353: \
! 1354: DEBUG_PRINT2 (" info: %p\n ", \
! 1355: reg_info[this_reg].word.pointer); \
! 1356: DEBUG_PRINT2 (" match_null=%d", \
! 1357: REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
! 1358: DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
! 1359: DEBUG_PRINT2 (" matched_something=%d", \
! 1360: MATCHED_SOMETHING (reg_info[this_reg])); \
! 1361: DEBUG_PRINT2 (" ever_matched=%d", \
! 1362: EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
! 1363: DEBUG_PRINT1 ("\n"); \
! 1364: PUSH_FAILURE_ELT (reg_info[this_reg].word); \
! 1365: } \
! 1366: \
! 1367: DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
! 1368: PUSH_FAILURE_INT (lowest_active_reg); \
! 1369: \
! 1370: DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
! 1371: PUSH_FAILURE_INT (highest_active_reg); \
! 1372: \
! 1373: DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
! 1374: DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
! 1375: PUSH_FAILURE_POINTER (pattern_place); \
! 1376: \
! 1377: DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
! 1378: DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
! 1379: size2); \
! 1380: DEBUG_PRINT1 ("'\n"); \
! 1381: PUSH_FAILURE_POINTER (string_place); \
! 1382: \
! 1383: DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
! 1384: DEBUG_PUSH (failure_id); \
! 1385: } while (0)
! 1386:
! 1387: /* This is the number of items that are pushed and popped on the stack
! 1388: for each register. */
! 1389: #define NUM_REG_ITEMS 3
! 1390:
! 1391: /* Individual items aside from the registers. */
! 1392: #ifdef DEBUG
! 1393: # define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
! 1394: #else
! 1395: # define NUM_NONREG_ITEMS 4
! 1396: #endif
! 1397:
! 1398: /* We push at most this many items on the stack. */
! 1399: /* We used to use (num_regs - 1), which is the number of registers
! 1400: this regexp will save; but that was changed to 5
! 1401: to avoid stack overflow for a regexp with lots of parens. */
! 1402: #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
! 1403:
! 1404: /* We actually push this many items. */
! 1405: #define NUM_FAILURE_ITEMS \
! 1406: (((0 \
! 1407: ? 0 : highest_active_reg - lowest_active_reg + 1) \
! 1408: * NUM_REG_ITEMS) \
! 1409: + NUM_NONREG_ITEMS)
! 1410:
! 1411: /* How many items can still be added to the stack without overflowing it. */
! 1412: #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
! 1413:
! 1414:
! 1415: /* Pops what PUSH_FAIL_STACK pushes.
! 1416:
! 1417: We restore into the parameters, all of which should be lvalues:
! 1418: STR -- the saved data position.
! 1419: PAT -- the saved pattern position.
! 1420: LOW_REG, HIGH_REG -- the highest and lowest active registers.
! 1421: REGSTART, REGEND -- arrays of string positions.
! 1422: REG_INFO -- array of information about each subexpression.
! 1423:
! 1424: Also assumes the variables `fail_stack' and (if debugging), `bufp',
! 1425: `pend', `string1', `size1', `string2', and `size2'. */
! 1426:
! 1427: #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
! 1428: { \
! 1429: DEBUG_STATEMENT (unsigned failure_id;) \
! 1430: active_reg_t this_reg; \
! 1431: const unsigned char *string_temp; \
! 1432: \
! 1433: assert (!FAIL_STACK_EMPTY ()); \
! 1434: \
! 1435: /* Remove failure points and point to how many regs pushed. */ \
! 1436: DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
! 1437: DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
! 1438: DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
! 1439: \
! 1440: assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
! 1441: \
! 1442: DEBUG_POP (&failure_id); \
! 1443: DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
! 1444: \
! 1445: /* If the saved string location is NULL, it came from an \
! 1446: on_failure_keep_string_jump opcode, and we want to throw away the \
! 1447: saved NULL, thus retaining our current position in the string. */ \
! 1448: string_temp = POP_FAILURE_POINTER (); \
! 1449: if (string_temp != NULL) \
! 1450: str = (const char *) string_temp; \
! 1451: \
! 1452: DEBUG_PRINT2 (" Popping string %p: `", str); \
! 1453: DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
! 1454: DEBUG_PRINT1 ("'\n"); \
! 1455: \
! 1456: pat = (unsigned char *) POP_FAILURE_POINTER (); \
! 1457: DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
! 1458: DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
! 1459: \
! 1460: /* Restore register info. */ \
! 1461: high_reg = (active_reg_t) POP_FAILURE_INT (); \
! 1462: DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
! 1463: \
! 1464: low_reg = (active_reg_t) POP_FAILURE_INT (); \
! 1465: DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
! 1466: \
! 1467: if (1) \
! 1468: for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
! 1469: { \
! 1470: DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
! 1471: \
! 1472: reg_info[this_reg].word = POP_FAILURE_ELT (); \
! 1473: DEBUG_PRINT2 (" info: %p\n", \
! 1474: reg_info[this_reg].word.pointer); \
! 1475: \
! 1476: regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
! 1477: DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
! 1478: \
! 1479: regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
! 1480: DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
! 1481: } \
! 1482: else \
! 1483: { \
! 1484: for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
! 1485: { \
! 1486: reg_info[this_reg].word.integer = 0; \
! 1487: regend[this_reg] = 0; \
! 1488: regstart[this_reg] = 0; \
! 1489: } \
! 1490: highest_active_reg = high_reg; \
! 1491: } \
! 1492: \
! 1493: set_regs_matched_done = 0; \
! 1494: DEBUG_STATEMENT (nfailure_points_popped++); \
! 1495: } /* POP_FAILURE_POINT */
! 1496:
! 1497:
! 1498:
! 1499: /* Structure for per-register (a.k.a. per-group) information.
! 1500: Other register information, such as the
! 1501: starting and ending positions (which are addresses), and the list of
! 1502: inner groups (which is a bits list) are maintained in separate
! 1503: variables.
! 1504:
! 1505: We are making a (strictly speaking) nonportable assumption here: that
! 1506: the compiler will pack our bit fields into something that fits into
! 1507: the type of `word', i.e., is something that fits into one item on the
! 1508: failure stack. */
! 1509:
! 1510:
! 1511: /* Declarations and macros for re_match_2. */
! 1512:
! 1513: typedef union
! 1514: {
! 1515: fail_stack_elt_t word;
! 1516: struct
! 1517: {
! 1518: /* This field is one if this group can match the empty string,
! 1519: zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
! 1520: #define MATCH_NULL_UNSET_VALUE 3
! 1521: unsigned match_null_string_p : 2;
! 1522: unsigned is_active : 1;
! 1523: unsigned matched_something : 1;
! 1524: unsigned ever_matched_something : 1;
! 1525: } bits;
! 1526: } register_info_type;
! 1527:
! 1528: #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
! 1529: #define IS_ACTIVE(R) ((R).bits.is_active)
! 1530: #define MATCHED_SOMETHING(R) ((R).bits.matched_something)
! 1531: #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
! 1532:
! 1533:
! 1534: /* Call this when have matched a real character; it sets `matched' flags
! 1535: for the subexpressions which we are currently inside. Also records
! 1536: that those subexprs have matched. */
! 1537: #define SET_REGS_MATCHED() \
! 1538: do \
! 1539: { \
! 1540: if (!set_regs_matched_done) \
! 1541: { \
! 1542: active_reg_t r; \
! 1543: set_regs_matched_done = 1; \
! 1544: for (r = lowest_active_reg; r <= highest_active_reg; r++) \
! 1545: { \
! 1546: MATCHED_SOMETHING (reg_info[r]) \
! 1547: = EVER_MATCHED_SOMETHING (reg_info[r]) \
! 1548: = 1; \
! 1549: } \
! 1550: } \
! 1551: } \
! 1552: while (0)
! 1553:
! 1554: /* Registers are set to a sentinel when they haven't yet matched. */
! 1555: static char reg_unset_dummy;
! 1556: #define REG_UNSET_VALUE (®_unset_dummy)
! 1557: #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
! 1558:
! 1559: /* Subroutine declarations and macros for regex_compile. */
! 1560:
! 1561: static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
! 1562: reg_syntax_t syntax,
! 1563: struct re_pattern_buffer *bufp));
! 1564: static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
! 1565: static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
! 1566: int arg1, int arg2));
! 1567: static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
! 1568: int arg, unsigned char *end));
! 1569: static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
! 1570: int arg1, int arg2, unsigned char *end));
! 1571: static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
! 1572: reg_syntax_t syntax));
! 1573: static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
! 1574: reg_syntax_t syntax));
! 1575: static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
! 1576: const char *pend,
! 1577: char *translate,
! 1578: reg_syntax_t syntax,
! 1579: unsigned char *b));
! 1580:
! 1581: /* Fetch the next character in the uncompiled pattern---translating it
! 1582: if necessary. Also cast from a signed character in the constant
! 1583: string passed to us by the user to an unsigned char that we can use
! 1584: as an array index (in, e.g., `translate'). */
! 1585: #ifndef PATFETCH
! 1586: # define PATFETCH(c) \
! 1587: do {if (p == pend) return REG_EEND; \
! 1588: c = (unsigned char) *p++; \
! 1589: if (translate) c = (unsigned char) translate[c]; \
! 1590: } while (0)
! 1591: #endif
! 1592:
! 1593: /* Fetch the next character in the uncompiled pattern, with no
! 1594: translation. */
! 1595: #define PATFETCH_RAW(c) \
! 1596: do {if (p == pend) return REG_EEND; \
! 1597: c = (unsigned char) *p++; \
! 1598: } while (0)
! 1599:
! 1600: /* Go backwards one character in the pattern. */
! 1601: #define PATUNFETCH p--
! 1602:
! 1603:
! 1604: /* If `translate' is non-null, return translate[D], else just D. We
! 1605: cast the subscript to translate because some data is declared as
! 1606: `char *', to avoid warnings when a string constant is passed. But
! 1607: when we use a character as a subscript we must make it unsigned. */
! 1608: #ifndef TRANSLATE
! 1609: # define TRANSLATE(d) \
! 1610: (translate ? (char) translate[(unsigned char) (d)] : (d))
! 1611: #endif
! 1612:
! 1613:
! 1614: /* Macros for outputting the compiled pattern into `buffer'. */
! 1615:
! 1616: /* If the buffer isn't allocated when it comes in, use this. */
! 1617: #define INIT_BUF_SIZE 32
! 1618:
! 1619: /* Make sure we have at least N more bytes of space in buffer. */
! 1620: #define GET_BUFFER_SPACE(n) \
! 1621: while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
! 1622: EXTEND_BUFFER ()
! 1623:
! 1624: /* Make sure we have one more byte of buffer space and then add C to it. */
! 1625: #define BUF_PUSH(c) \
! 1626: do { \
! 1627: GET_BUFFER_SPACE (1); \
! 1628: *b++ = (unsigned char) (c); \
! 1629: } while (0)
! 1630:
! 1631:
! 1632: /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
! 1633: #define BUF_PUSH_2(c1, c2) \
! 1634: do { \
! 1635: GET_BUFFER_SPACE (2); \
! 1636: *b++ = (unsigned char) (c1); \
! 1637: *b++ = (unsigned char) (c2); \
! 1638: } while (0)
! 1639:
! 1640:
! 1641: /* As with BUF_PUSH_2, except for three bytes. */
! 1642: #define BUF_PUSH_3(c1, c2, c3) \
! 1643: do { \
! 1644: GET_BUFFER_SPACE (3); \
! 1645: *b++ = (unsigned char) (c1); \
! 1646: *b++ = (unsigned char) (c2); \
! 1647: *b++ = (unsigned char) (c3); \
! 1648: } while (0)
! 1649:
! 1650:
! 1651: /* Store a jump with opcode OP at LOC to location TO. We store a
! 1652: relative address offset by the three bytes the jump itself occupies. */
! 1653: #define STORE_JUMP(op, loc, to) \
! 1654: store_op1 (op, loc, (int) ((to) - (loc) - 3))
! 1655:
! 1656: /* Likewise, for a two-argument jump. */
! 1657: #define STORE_JUMP2(op, loc, to, arg) \
! 1658: store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
! 1659:
! 1660: /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
! 1661: #define INSERT_JUMP(op, loc, to) \
! 1662: insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
! 1663:
! 1664: /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
! 1665: #define INSERT_JUMP2(op, loc, to, arg) \
! 1666: insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
! 1667:
! 1668:
! 1669: /* This is not an arbitrary limit: the arguments which represent offsets
! 1670: into the pattern are two bytes long. So if 2^16 bytes turns out to
! 1671: be too small, many things would have to change. */
! 1672: /* Any other compiler which, like MSC, has allocation limit below 2^16
! 1673: bytes will have to use approach similar to what was done below for
! 1674: MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
! 1675: reallocating to 0 bytes. Such thing is not going to work too well.
! 1676: You have been warned!! */
! 1677: #if defined _MSC_VER && !defined _WIN32
! 1678: /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
! 1679: The REALLOC define eliminates a flurry of conversion warnings,
! 1680: but is not required. */
! 1681: # define MAX_BUF_SIZE 65500L
! 1682: # define REALLOC(p,s) realloc ((p), (size_t) (s))
! 1683: #else
! 1684: # define MAX_BUF_SIZE (1L << 16)
! 1685: # define REALLOC(p,s) realloc ((p), (s))
! 1686: #endif
! 1687:
! 1688: /* Extend the buffer by twice its current size via realloc and
! 1689: reset the pointers that pointed into the old block to point to the
! 1690: correct places in the new one. If extending the buffer results in it
! 1691: being larger than MAX_BUF_SIZE, then flag memory exhausted. */
! 1692: #define EXTEND_BUFFER() \
! 1693: do { \
! 1694: unsigned char *old_buffer = bufp->buffer; \
! 1695: if (bufp->allocated == MAX_BUF_SIZE) \
! 1696: return REG_ESIZE; \
! 1697: bufp->allocated <<= 1; \
! 1698: if (bufp->allocated > MAX_BUF_SIZE) \
! 1699: bufp->allocated = MAX_BUF_SIZE; \
! 1700: bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
! 1701: if (bufp->buffer == NULL) \
! 1702: return REG_ESPACE; \
! 1703: /* If the buffer moved, move all the pointers into it. */ \
! 1704: if (old_buffer != bufp->buffer) \
! 1705: { \
! 1706: b = (b - old_buffer) + bufp->buffer; \
! 1707: begalt = (begalt - old_buffer) + bufp->buffer; \
! 1708: if (fixup_alt_jump) \
! 1709: fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
! 1710: if (laststart) \
! 1711: laststart = (laststart - old_buffer) + bufp->buffer; \
! 1712: if (pending_exact) \
! 1713: pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
! 1714: } \
! 1715: } while (0)
! 1716:
! 1717:
! 1718: /* Since we have one byte reserved for the register number argument to
! 1719: {start,stop}_memory, the maximum number of groups we can report
! 1720: things about is what fits in that byte. */
! 1721: #define MAX_REGNUM 255
! 1722:
! 1723: /* But patterns can have more than `MAX_REGNUM' registers. We just
! 1724: ignore the excess. */
! 1725: typedef unsigned regnum_t;
! 1726:
! 1727:
! 1728: /* Macros for the compile stack. */
! 1729:
! 1730: /* Since offsets can go either forwards or backwards, this type needs to
! 1731: be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
! 1732: /* int may be not enough when sizeof(int) == 2. */
! 1733: typedef long pattern_offset_t;
! 1734:
! 1735: typedef struct
! 1736: {
! 1737: pattern_offset_t begalt_offset;
! 1738: pattern_offset_t fixup_alt_jump;
! 1739: pattern_offset_t inner_group_offset;
! 1740: pattern_offset_t laststart_offset;
! 1741: regnum_t regnum;
! 1742: } compile_stack_elt_t;
! 1743:
! 1744:
! 1745: typedef struct
! 1746: {
! 1747: compile_stack_elt_t *stack;
! 1748: unsigned size;
! 1749: unsigned avail; /* Offset of next open position. */
! 1750: } compile_stack_type;
! 1751:
! 1752:
! 1753: #define INIT_COMPILE_STACK_SIZE 32
! 1754:
! 1755: #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
! 1756: #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
! 1757:
! 1758: /* The next available element. */
! 1759: #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
! 1760:
! 1761:
! 1762: /* Set the bit for character C in a list. */
! 1763: #define SET_LIST_BIT(c) \
! 1764: (b[((unsigned char) (c)) / BYTEWIDTH] \
! 1765: |= 1 << (((unsigned char) c) % BYTEWIDTH))
! 1766:
! 1767:
! 1768: /* Get the next unsigned number in the uncompiled pattern. */
! 1769: #define GET_UNSIGNED_NUMBER(num) \
! 1770: { if (p != pend) \
! 1771: { \
! 1772: PATFETCH (c); \
! 1773: while (ISDIGIT (c)) \
! 1774: { \
! 1775: if (num < 0) \
! 1776: num = 0; \
! 1777: num = num * 10 + c - '0'; \
! 1778: if (p == pend) \
! 1779: break; \
! 1780: PATFETCH (c); \
! 1781: } \
! 1782: } \
! 1783: }
! 1784:
! 1785: #if defined _LIBC || WIDE_CHAR_SUPPORT
! 1786: /* The GNU C library provides support for user-defined character classes
! 1787: and the functions from ISO C amendement 1. */
! 1788: # ifdef CHARCLASS_NAME_MAX
! 1789: # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
! 1790: # else
! 1791: /* This shouldn't happen but some implementation might still have this
! 1792: problem. Use a reasonable default value. */
! 1793: # define CHAR_CLASS_MAX_LENGTH 256
! 1794: # endif
! 1795:
! 1796: # ifdef _LIBC
! 1797: # define IS_CHAR_CLASS(string) __wctype (string)
! 1798: # else
! 1799: # define IS_CHAR_CLASS(string) wctype (string)
! 1800: # endif
! 1801: #else
! 1802: # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
! 1803:
! 1804: # define IS_CHAR_CLASS(string) \
! 1805: (STREQ (string, "alpha") || STREQ (string, "upper") \
! 1806: || STREQ (string, "lower") || STREQ (string, "digit") \
! 1807: || STREQ (string, "alnum") || STREQ (string, "xdigit") \
! 1808: || STREQ (string, "space") || STREQ (string, "print") \
! 1809: || STREQ (string, "punct") || STREQ (string, "graph") \
! 1810: || STREQ (string, "cntrl") || STREQ (string, "blank"))
! 1811: #endif
! 1812:
! 1813: #ifndef MATCH_MAY_ALLOCATE
! 1814:
! 1815: /* If we cannot allocate large objects within re_match_2_internal,
! 1816: we make the fail stack and register vectors global.
! 1817: The fail stack, we grow to the maximum size when a regexp
! 1818: is compiled.
! 1819: The register vectors, we adjust in size each time we
! 1820: compile a regexp, according to the number of registers it needs. */
! 1821:
! 1822: static fail_stack_type fail_stack;
! 1823:
! 1824: /* Size with which the following vectors are currently allocated.
! 1825: That is so we can make them bigger as needed,
! 1826: but never make them smaller. */
! 1827: static int regs_allocated_size;
! 1828:
! 1829: static const char ** regstart, ** regend;
! 1830: static const char ** old_regstart, ** old_regend;
! 1831: static const char **best_regstart, **best_regend;
! 1832: static register_info_type *reg_info;
! 1833: static const char **reg_dummy;
! 1834: static register_info_type *reg_info_dummy;
! 1835:
! 1836: /* Make the register vectors big enough for NUM_REGS registers,
! 1837: but don't make them smaller. */
! 1838:
! 1839: static
! 1840: regex_grow_registers (num_regs)
! 1841: int num_regs;
! 1842: {
! 1843: if (num_regs > regs_allocated_size)
! 1844: {
! 1845: RETALLOC_IF (regstart, num_regs, const char *);
! 1846: RETALLOC_IF (regend, num_regs, const char *);
! 1847: RETALLOC_IF (old_regstart, num_regs, const char *);
! 1848: RETALLOC_IF (old_regend, num_regs, const char *);
! 1849: RETALLOC_IF (best_regstart, num_regs, const char *);
! 1850: RETALLOC_IF (best_regend, num_regs, const char *);
! 1851: RETALLOC_IF (reg_info, num_regs, register_info_type);
! 1852: RETALLOC_IF (reg_dummy, num_regs, const char *);
! 1853: RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
! 1854:
! 1855: regs_allocated_size = num_regs;
! 1856: }
! 1857: }
! 1858:
! 1859: #endif /* not MATCH_MAY_ALLOCATE */
! 1860:
! 1861: static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
! 1862: compile_stack,
! 1863: regnum_t regnum));
! 1864:
! 1865: /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
! 1866: Returns one of error codes defined in `regex.h', or zero for success.
! 1867:
! 1868: Assumes the `allocated' (and perhaps `buffer') and `translate'
! 1869: fields are set in BUFP on entry.
! 1870:
! 1871: If it succeeds, results are put in BUFP (if it returns an error, the
! 1872: contents of BUFP are undefined):
! 1873: `buffer' is the compiled pattern;
! 1874: `syntax' is set to SYNTAX;
! 1875: `used' is set to the length of the compiled pattern;
! 1876: `fastmap_accurate' is zero;
! 1877: `re_nsub' is the number of subexpressions in PATTERN;
! 1878: `not_bol' and `not_eol' are zero;
! 1879:
! 1880: The `fastmap' and `newline_anchor' fields are neither
! 1881: examined nor set. */
! 1882:
! 1883: /* Return, freeing storage we allocated. */
! 1884: #define FREE_STACK_RETURN(value) \
! 1885: return (free (compile_stack.stack), value)
! 1886:
! 1887: static reg_errcode_t
! 1888: regex_compile (pattern, size, syntax, bufp)
! 1889: const char *pattern;
! 1890: size_t size;
! 1891: reg_syntax_t syntax;
! 1892: struct re_pattern_buffer *bufp;
! 1893: {
! 1894: /* We fetch characters from PATTERN here. Even though PATTERN is
! 1895: `char *' (i.e., signed), we declare these variables as unsigned, so
! 1896: they can be reliably used as array indices. */
! 1897: register unsigned char c, c1;
! 1898:
! 1899: /* A random temporary spot in PATTERN. */
! 1900: const char *p1;
! 1901:
! 1902: /* Points to the end of the buffer, where we should append. */
! 1903: register unsigned char *b;
! 1904:
! 1905: /* Keeps track of unclosed groups. */
! 1906: compile_stack_type compile_stack;
! 1907:
! 1908: /* Points to the current (ending) position in the pattern. */
! 1909: const char *p = pattern;
! 1910: const char *pend = pattern + size;
! 1911:
! 1912: /* How to translate the characters in the pattern. */
! 1913: RE_TRANSLATE_TYPE translate = bufp->translate;
! 1914:
! 1915: /* Address of the count-byte of the most recently inserted `exactn'
! 1916: command. This makes it possible to tell if a new exact-match
! 1917: character can be added to that command or if the character requires
! 1918: a new `exactn' command. */
! 1919: unsigned char *pending_exact = 0;
! 1920:
! 1921: /* Address of start of the most recently finished expression.
! 1922: This tells, e.g., postfix * where to find the start of its
! 1923: operand. Reset at the beginning of groups and alternatives. */
! 1924: unsigned char *laststart = 0;
! 1925:
! 1926: /* Address of beginning of regexp, or inside of last group. */
! 1927: unsigned char *begalt;
! 1928:
! 1929: /* Place in the uncompiled pattern (i.e., the {) to
! 1930: which to go back if the interval is invalid. */
! 1931: const char *beg_interval;
! 1932:
! 1933: /* Address of the place where a forward jump should go to the end of
! 1934: the containing expression. Each alternative of an `or' -- except the
! 1935: last -- ends with a forward jump of this sort. */
! 1936: unsigned char *fixup_alt_jump = 0;
! 1937:
! 1938: /* Counts open-groups as they are encountered. Remembered for the
! 1939: matching close-group on the compile stack, so the same register
! 1940: number is put in the stop_memory as the start_memory. */
! 1941: regnum_t regnum = 0;
! 1942:
! 1943: #ifdef DEBUG
! 1944: DEBUG_PRINT1 ("\nCompiling pattern: ");
! 1945: if (debug)
! 1946: {
! 1947: unsigned debug_count;
! 1948:
! 1949: for (debug_count = 0; debug_count < size; debug_count++)
! 1950: putchar (pattern[debug_count]);
! 1951: putchar ('\n');
! 1952: }
! 1953: #endif /* DEBUG */
! 1954:
! 1955: /* Initialize the compile stack. */
! 1956: compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
! 1957: if (compile_stack.stack == NULL)
! 1958: return REG_ESPACE;
! 1959:
! 1960: compile_stack.size = INIT_COMPILE_STACK_SIZE;
! 1961: compile_stack.avail = 0;
! 1962:
! 1963: /* Initialize the pattern buffer. */
! 1964: bufp->syntax = syntax;
! 1965: bufp->fastmap_accurate = 0;
! 1966: bufp->not_bol = bufp->not_eol = 0;
! 1967:
! 1968: /* Set `used' to zero, so that if we return an error, the pattern
! 1969: printer (for debugging) will think there's no pattern. We reset it
! 1970: at the end. */
! 1971: bufp->used = 0;
! 1972:
! 1973: /* Always count groups, whether or not bufp->no_sub is set. */
! 1974: bufp->re_nsub = 0;
! 1975:
! 1976: #if !defined emacs && !defined SYNTAX_TABLE
! 1977: /* Initialize the syntax table. */
! 1978: init_syntax_once ();
! 1979: #endif
! 1980:
! 1981: if (bufp->allocated == 0)
! 1982: {
! 1983: if (bufp->buffer)
! 1984: { /* If zero allocated, but buffer is non-null, try to realloc
! 1985: enough space. This loses if buffer's address is bogus, but
! 1986: that is the user's responsibility. */
! 1987: RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
! 1988: }
! 1989: else
! 1990: { /* Caller did not allocate a buffer. Do it for them. */
! 1991: bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
! 1992: }
! 1993: if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
! 1994:
! 1995: bufp->allocated = INIT_BUF_SIZE;
! 1996: }
! 1997:
! 1998: begalt = b = bufp->buffer;
! 1999:
! 2000: /* Loop through the uncompiled pattern until we're at the end. */
! 2001: while (p != pend)
! 2002: {
! 2003: PATFETCH (c);
! 2004:
! 2005: switch (c)
! 2006: {
! 2007: case '^':
! 2008: {
! 2009: if ( /* If at start of pattern, it's an operator. */
! 2010: p == pattern + 1
! 2011: /* If context independent, it's an operator. */
! 2012: || syntax & RE_CONTEXT_INDEP_ANCHORS
! 2013: /* Otherwise, depends on what's come before. */
! 2014: || at_begline_loc_p (pattern, p, syntax))
! 2015: BUF_PUSH (begline);
! 2016: else
! 2017: goto normal_char;
! 2018: }
! 2019: break;
! 2020:
! 2021:
! 2022: case '$':
! 2023: {
! 2024: if ( /* If at end of pattern, it's an operator. */
! 2025: p == pend
! 2026: /* If context independent, it's an operator. */
! 2027: || syntax & RE_CONTEXT_INDEP_ANCHORS
! 2028: /* Otherwise, depends on what's next. */
! 2029: || at_endline_loc_p (p, pend, syntax))
! 2030: BUF_PUSH (endline);
! 2031: else
! 2032: goto normal_char;
! 2033: }
! 2034: break;
! 2035:
! 2036:
! 2037: case '+':
! 2038: case '?':
! 2039: if ((syntax & RE_BK_PLUS_QM)
! 2040: || (syntax & RE_LIMITED_OPS))
! 2041: goto normal_char;
! 2042: handle_plus:
! 2043: case '*':
! 2044: /* If there is no previous pattern... */
! 2045: if (!laststart)
! 2046: {
! 2047: if (syntax & RE_CONTEXT_INVALID_OPS)
! 2048: FREE_STACK_RETURN (REG_BADRPT);
! 2049: else if (!(syntax & RE_CONTEXT_INDEP_OPS))
! 2050: goto normal_char;
! 2051: }
! 2052:
! 2053: {
! 2054: /* Are we optimizing this jump? */
! 2055: boolean keep_string_p = false;
! 2056:
! 2057: /* 1 means zero (many) matches is allowed. */
! 2058: char zero_times_ok = 0, many_times_ok = 0;
! 2059:
! 2060: /* If there is a sequence of repetition chars, collapse it
! 2061: down to just one (the right one). We can't combine
! 2062: interval operators with these because of, e.g., `a{2}*',
! 2063: which should only match an even number of `a's. */
! 2064:
! 2065: for (;;)
! 2066: {
! 2067: zero_times_ok |= c != '+';
! 2068: many_times_ok |= c != '?';
! 2069:
! 2070: if (p == pend)
! 2071: break;
! 2072:
! 2073: PATFETCH (c);
! 2074:
! 2075: if (c == '*'
! 2076: || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
! 2077: ;
! 2078:
! 2079: else if (syntax & RE_BK_PLUS_QM && c == '\\')
! 2080: {
! 2081: if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
! 2082:
! 2083: PATFETCH (c1);
! 2084: if (!(c1 == '+' || c1 == '?'))
! 2085: {
! 2086: PATUNFETCH;
! 2087: PATUNFETCH;
! 2088: break;
! 2089: }
! 2090:
! 2091: c = c1;
! 2092: }
! 2093: else
! 2094: {
! 2095: PATUNFETCH;
! 2096: break;
! 2097: }
! 2098:
! 2099: /* If we get here, we found another repeat character. */
! 2100: }
! 2101:
! 2102: /* Star, etc. applied to an empty pattern is equivalent
! 2103: to an empty pattern. */
! 2104: if (!laststart)
! 2105: break;
! 2106:
! 2107: /* Now we know whether or not zero matches is allowed
! 2108: and also whether or not two or more matches is allowed. */
! 2109: if (many_times_ok)
! 2110: { /* More than one repetition is allowed, so put in at the
! 2111: end a backward relative jump from `b' to before the next
! 2112: jump we're going to put in below (which jumps from
! 2113: laststart to after this jump).
! 2114:
! 2115: But if we are at the `*' in the exact sequence `.*\n',
! 2116: insert an unconditional jump backwards to the .,
! 2117: instead of the beginning of the loop. This way we only
! 2118: push a failure point once, instead of every time
! 2119: through the loop. */
! 2120: assert (p - 1 > pattern);
! 2121:
! 2122: /* Allocate the space for the jump. */
! 2123: GET_BUFFER_SPACE (3);
! 2124:
! 2125: /* We know we are not at the first character of the pattern,
! 2126: because laststart was nonzero. And we've already
! 2127: incremented `p', by the way, to be the character after
! 2128: the `*'. Do we have to do something analogous here
! 2129: for null bytes, because of RE_DOT_NOT_NULL? */
! 2130: if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
! 2131: && zero_times_ok
! 2132: && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
! 2133: && !(syntax & RE_DOT_NEWLINE))
! 2134: { /* We have .*\n. */
! 2135: STORE_JUMP (jump, b, laststart);
! 2136: keep_string_p = true;
! 2137: }
! 2138: else
! 2139: /* Anything else. */
! 2140: STORE_JUMP (maybe_pop_jump, b, laststart - 3);
! 2141:
! 2142: /* We've added more stuff to the buffer. */
! 2143: b += 3;
! 2144: }
! 2145:
! 2146: /* On failure, jump from laststart to b + 3, which will be the
! 2147: end of the buffer after this jump is inserted. */
! 2148: GET_BUFFER_SPACE (3);
! 2149: INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
! 2150: : on_failure_jump,
! 2151: laststart, b + 3);
! 2152: pending_exact = 0;
! 2153: b += 3;
! 2154:
! 2155: if (!zero_times_ok)
! 2156: {
! 2157: /* At least one repetition is required, so insert a
! 2158: `dummy_failure_jump' before the initial
! 2159: `on_failure_jump' instruction of the loop. This
! 2160: effects a skip over that instruction the first time
! 2161: we hit that loop. */
! 2162: GET_BUFFER_SPACE (3);
! 2163: INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
! 2164: b += 3;
! 2165: }
! 2166: }
! 2167: break;
! 2168:
! 2169:
! 2170: case '.':
! 2171: laststart = b;
! 2172: BUF_PUSH (anychar);
! 2173: break;
! 2174:
! 2175:
! 2176: case '[':
! 2177: {
! 2178: boolean had_char_class = false;
! 2179:
! 2180: if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
! 2181:
! 2182: /* Ensure that we have enough space to push a charset: the
! 2183: opcode, the length count, and the bitset; 34 bytes in all. */
! 2184: GET_BUFFER_SPACE (34);
! 2185:
! 2186: laststart = b;
! 2187:
! 2188: /* We test `*p == '^' twice, instead of using an if
! 2189: statement, so we only need one BUF_PUSH. */
! 2190: BUF_PUSH (*p == '^' ? charset_not : charset);
! 2191: if (*p == '^')
! 2192: p++;
! 2193:
! 2194: /* Remember the first position in the bracket expression. */
! 2195: p1 = p;
! 2196:
! 2197: /* Push the number of bytes in the bitmap. */
! 2198: BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
! 2199:
! 2200: /* Clear the whole map. */
! 2201: memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
! 2202:
! 2203: /* charset_not matches newline according to a syntax bit. */
! 2204: if ((re_opcode_t) b[-2] == charset_not
! 2205: && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
! 2206: SET_LIST_BIT ('\n');
! 2207:
! 2208: /* Read in characters and ranges, setting map bits. */
! 2209: for (;;)
! 2210: {
! 2211: if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
! 2212:
! 2213: PATFETCH (c);
! 2214:
! 2215: /* \ might escape characters inside [...] and [^...]. */
! 2216: if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
! 2217: {
! 2218: if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
! 2219:
! 2220: PATFETCH (c1);
! 2221: SET_LIST_BIT (c1);
! 2222: continue;
! 2223: }
! 2224:
! 2225: /* Could be the end of the bracket expression. If it's
! 2226: not (i.e., when the bracket expression is `[]' so
! 2227: far), the ']' character bit gets set way below. */
! 2228: if (c == ']' && p != p1 + 1)
! 2229: break;
! 2230:
! 2231: /* Look ahead to see if it's a range when the last thing
! 2232: was a character class. */
! 2233: if (had_char_class && c == '-' && *p != ']')
! 2234: FREE_STACK_RETURN (REG_ERANGE);
! 2235:
! 2236: /* Look ahead to see if it's a range when the last thing
! 2237: was a character: if this is a hyphen not at the
! 2238: beginning or the end of a list, then it's the range
! 2239: operator. */
! 2240: if (c == '-'
! 2241: && !(p - 2 >= pattern && p[-2] == '[')
! 2242: && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
! 2243: && *p != ']')
! 2244: {
! 2245: reg_errcode_t ret
! 2246: = compile_range (&p, pend, translate, syntax, b);
! 2247: if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
! 2248: }
! 2249:
! 2250: else if (p[0] == '-' && p[1] != ']')
! 2251: { /* This handles ranges made up of characters only. */
! 2252: reg_errcode_t ret;
! 2253:
! 2254: /* Move past the `-'. */
! 2255: PATFETCH (c1);
! 2256:
! 2257: ret = compile_range (&p, pend, translate, syntax, b);
! 2258: if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
! 2259: }
! 2260:
! 2261: /* See if we're at the beginning of a possible character
! 2262: class. */
! 2263:
! 2264: else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
! 2265: { /* Leave room for the null. */
! 2266: char str[CHAR_CLASS_MAX_LENGTH + 1];
! 2267:
! 2268: PATFETCH (c);
! 2269: c1 = 0;
! 2270:
! 2271: /* If pattern is `[[:'. */
! 2272: if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
! 2273:
! 2274: for (;;)
! 2275: {
! 2276: PATFETCH (c);
! 2277: if ((c == ':' && *p == ']') || p == pend)
! 2278: break;
! 2279: if (c1 < CHAR_CLASS_MAX_LENGTH)
! 2280: str[c1++] = c;
! 2281: else
! 2282: /* This is in any case an invalid class name. */
! 2283: str[0] = '\0';
! 2284: }
! 2285: str[c1] = '\0';
! 2286:
! 2287: /* If isn't a word bracketed by `[:' and `:]':
! 2288: undo the ending character, the letters, and leave
! 2289: the leading `:' and `[' (but set bits for them). */
! 2290: if (c == ':' && *p == ']')
! 2291: {
! 2292: #if defined _LIBC || WIDE_CHAR_SUPPORT
! 2293: boolean is_lower = STREQ (str, "lower");
! 2294: boolean is_upper = STREQ (str, "upper");
! 2295: wctype_t wt;
! 2296: int ch;
! 2297:
! 2298: wt = IS_CHAR_CLASS (str);
! 2299: if (wt == 0)
! 2300: FREE_STACK_RETURN (REG_ECTYPE);
! 2301:
! 2302: /* Throw away the ] at the end of the character
! 2303: class. */
! 2304: PATFETCH (c);
! 2305:
! 2306: if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
! 2307:
! 2308: for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
! 2309: {
! 2310: # ifdef _LIBC
! 2311: if (__iswctype (__btowc (ch), wt))
! 2312: SET_LIST_BIT (ch);
! 2313: # else
! 2314: if (iswctype (btowc (ch), wt))
! 2315: SET_LIST_BIT (ch);
! 2316: # endif
! 2317:
! 2318: if (translate && (is_upper || is_lower)
! 2319: && (ISUPPER (ch) || ISLOWER (ch)))
! 2320: SET_LIST_BIT (ch);
! 2321: }
! 2322:
! 2323: had_char_class = true;
! 2324: #else
! 2325: int ch;
! 2326: boolean is_alnum = STREQ (str, "alnum");
! 2327: boolean is_alpha = STREQ (str, "alpha");
! 2328: boolean is_blank = STREQ (str, "blank");
! 2329: boolean is_cntrl = STREQ (str, "cntrl");
! 2330: boolean is_digit = STREQ (str, "digit");
! 2331: boolean is_graph = STREQ (str, "graph");
! 2332: boolean is_lower = STREQ (str, "lower");
! 2333: boolean is_print = STREQ (str, "print");
! 2334: boolean is_punct = STREQ (str, "punct");
! 2335: boolean is_space = STREQ (str, "space");
! 2336: boolean is_upper = STREQ (str, "upper");
! 2337: boolean is_xdigit = STREQ (str, "xdigit");
! 2338:
! 2339: if (!IS_CHAR_CLASS (str))
! 2340: FREE_STACK_RETURN (REG_ECTYPE);
! 2341:
! 2342: /* Throw away the ] at the end of the character
! 2343: class. */
! 2344: PATFETCH (c);
! 2345:
! 2346: if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
! 2347:
! 2348: for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
! 2349: {
! 2350: /* This was split into 3 if's to
! 2351: avoid an arbitrary limit in some compiler. */
! 2352: if ( (is_alnum && ISALNUM (ch))
! 2353: || (is_alpha && ISALPHA (ch))
! 2354: || (is_blank && ISBLANK (ch))
! 2355: || (is_cntrl && ISCNTRL (ch)))
! 2356: SET_LIST_BIT (ch);
! 2357: if ( (is_digit && ISDIGIT (ch))
! 2358: || (is_graph && ISGRAPH (ch))
! 2359: || (is_lower && ISLOWER (ch))
! 2360: || (is_print && ISPRINT (ch)))
! 2361: SET_LIST_BIT (ch);
! 2362: if ( (is_punct && ISPUNCT (ch))
! 2363: || (is_space && ISSPACE (ch))
! 2364: || (is_upper && ISUPPER (ch))
! 2365: || (is_xdigit && ISXDIGIT (ch)))
! 2366: SET_LIST_BIT (ch);
! 2367: if ( translate && (is_upper || is_lower)
! 2368: && (ISUPPER (ch) || ISLOWER (ch)))
! 2369: SET_LIST_BIT (ch);
! 2370: }
! 2371: had_char_class = true;
! 2372: #endif /* libc || wctype.h */
! 2373: }
! 2374: else
! 2375: {
! 2376: c1++;
! 2377: while (c1--)
! 2378: PATUNFETCH;
! 2379: SET_LIST_BIT ('[');
! 2380: SET_LIST_BIT (':');
! 2381: had_char_class = false;
! 2382: }
! 2383: }
! 2384: else
! 2385: {
! 2386: had_char_class = false;
! 2387: SET_LIST_BIT (c);
! 2388: }
! 2389: }
! 2390:
! 2391: /* Discard any (non)matching list bytes that are all 0 at the
! 2392: end of the map. Decrease the map-length byte too. */
! 2393: while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
! 2394: b[-1]--;
! 2395: b += b[-1];
! 2396: }
! 2397: break;
! 2398:
! 2399:
! 2400: case '(':
! 2401: if (syntax & RE_NO_BK_PARENS)
! 2402: goto handle_open;
! 2403: else
! 2404: goto normal_char;
! 2405:
! 2406:
! 2407: case ')':
! 2408: if (syntax & RE_NO_BK_PARENS)
! 2409: goto handle_close;
! 2410: else
! 2411: goto normal_char;
! 2412:
! 2413:
! 2414: case '\n':
! 2415: if (syntax & RE_NEWLINE_ALT)
! 2416: goto handle_alt;
! 2417: else
! 2418: goto normal_char;
! 2419:
! 2420:
! 2421: case '|':
! 2422: if (syntax & RE_NO_BK_VBAR)
! 2423: goto handle_alt;
! 2424: else
! 2425: goto normal_char;
! 2426:
! 2427:
! 2428: case '{':
! 2429: if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
! 2430: goto handle_interval;
! 2431: else
! 2432: goto normal_char;
! 2433:
! 2434:
! 2435: case '\\':
! 2436: if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
! 2437:
! 2438: /* Do not translate the character after the \, so that we can
! 2439: distinguish, e.g., \B from \b, even if we normally would
! 2440: translate, e.g., B to b. */
! 2441: PATFETCH_RAW (c);
! 2442:
! 2443: switch (c)
! 2444: {
! 2445: case '(':
! 2446: if (syntax & RE_NO_BK_PARENS)
! 2447: goto normal_backslash;
! 2448:
! 2449: handle_open:
! 2450: bufp->re_nsub++;
! 2451: regnum++;
! 2452:
! 2453: if (COMPILE_STACK_FULL)
! 2454: {
! 2455: RETALLOC (compile_stack.stack, compile_stack.size << 1,
! 2456: compile_stack_elt_t);
! 2457: if (compile_stack.stack == NULL) return REG_ESPACE;
! 2458:
! 2459: compile_stack.size <<= 1;
! 2460: }
! 2461:
! 2462: /* These are the values to restore when we hit end of this
! 2463: group. They are all relative offsets, so that if the
! 2464: whole pattern moves because of realloc, they will still
! 2465: be valid. */
! 2466: COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
! 2467: COMPILE_STACK_TOP.fixup_alt_jump
! 2468: = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
! 2469: COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
! 2470: COMPILE_STACK_TOP.regnum = regnum;
! 2471:
! 2472: /* We will eventually replace the 0 with the number of
! 2473: groups inner to this one. But do not push a
! 2474: start_memory for groups beyond the last one we can
! 2475: represent in the compiled pattern. */
! 2476: if (regnum <= MAX_REGNUM)
! 2477: {
! 2478: COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
! 2479: BUF_PUSH_3 (start_memory, regnum, 0);
! 2480: }
! 2481:
! 2482: compile_stack.avail++;
! 2483:
! 2484: fixup_alt_jump = 0;
! 2485: laststart = 0;
! 2486: begalt = b;
! 2487: /* If we've reached MAX_REGNUM groups, then this open
! 2488: won't actually generate any code, so we'll have to
! 2489: clear pending_exact explicitly. */
! 2490: pending_exact = 0;
! 2491: break;
! 2492:
! 2493:
! 2494: case ')':
! 2495: if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
! 2496:
! 2497: if (COMPILE_STACK_EMPTY)
! 2498: {
! 2499: if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
! 2500: goto normal_backslash;
! 2501: else
! 2502: FREE_STACK_RETURN (REG_ERPAREN);
! 2503: }
! 2504:
! 2505: handle_close:
! 2506: if (fixup_alt_jump)
! 2507: { /* Push a dummy failure point at the end of the
! 2508: alternative for a possible future
! 2509: `pop_failure_jump' to pop. See comments at
! 2510: `push_dummy_failure' in `re_match_2'. */
! 2511: BUF_PUSH (push_dummy_failure);
! 2512:
! 2513: /* We allocated space for this jump when we assigned
! 2514: to `fixup_alt_jump', in the `handle_alt' case below. */
! 2515: STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
! 2516: }
! 2517:
! 2518: /* See similar code for backslashed left paren above. */
! 2519: if (COMPILE_STACK_EMPTY)
! 2520: {
! 2521: if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
! 2522: goto normal_char;
! 2523: else
! 2524: FREE_STACK_RETURN (REG_ERPAREN);
! 2525: }
! 2526:
! 2527: /* Since we just checked for an empty stack above, this
! 2528: ``can't happen''. */
! 2529: assert (compile_stack.avail != 0);
! 2530: {
! 2531: /* We don't just want to restore into `regnum', because
! 2532: later groups should continue to be numbered higher,
! 2533: as in `(ab)c(de)' -- the second group is #2. */
! 2534: regnum_t this_group_regnum;
! 2535:
! 2536: compile_stack.avail--;
! 2537: begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
! 2538: fixup_alt_jump
! 2539: = COMPILE_STACK_TOP.fixup_alt_jump
! 2540: ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
! 2541: : 0;
! 2542: laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
! 2543: this_group_regnum = COMPILE_STACK_TOP.regnum;
! 2544: /* If we've reached MAX_REGNUM groups, then this open
! 2545: won't actually generate any code, so we'll have to
! 2546: clear pending_exact explicitly. */
! 2547: pending_exact = 0;
! 2548:
! 2549: /* We're at the end of the group, so now we know how many
! 2550: groups were inside this one. */
! 2551: if (this_group_regnum <= MAX_REGNUM)
! 2552: {
! 2553: unsigned char *inner_group_loc
! 2554: = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
! 2555:
! 2556: *inner_group_loc = regnum - this_group_regnum;
! 2557: BUF_PUSH_3 (stop_memory, this_group_regnum,
! 2558: regnum - this_group_regnum);
! 2559: }
! 2560: }
! 2561: break;
! 2562:
! 2563:
! 2564: case '|': /* `\|'. */
! 2565: if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
! 2566: goto normal_backslash;
! 2567: handle_alt:
! 2568: if (syntax & RE_LIMITED_OPS)
! 2569: goto normal_char;
! 2570:
! 2571: /* Insert before the previous alternative a jump which
! 2572: jumps to this alternative if the former fails. */
! 2573: GET_BUFFER_SPACE (3);
! 2574: INSERT_JUMP (on_failure_jump, begalt, b + 6);
! 2575: pending_exact = 0;
! 2576: b += 3;
! 2577:
! 2578: /* The alternative before this one has a jump after it
! 2579: which gets executed if it gets matched. Adjust that
! 2580: jump so it will jump to this alternative's analogous
! 2581: jump (put in below, which in turn will jump to the next
! 2582: (if any) alternative's such jump, etc.). The last such
! 2583: jump jumps to the correct final destination. A picture:
! 2584: _____ _____
! 2585: | | | |
! 2586: | v | v
! 2587: a | b | c
! 2588:
! 2589: If we are at `b', then fixup_alt_jump right now points to a
! 2590: three-byte space after `a'. We'll put in the jump, set
! 2591: fixup_alt_jump to right after `b', and leave behind three
! 2592: bytes which we'll fill in when we get to after `c'. */
! 2593:
! 2594: if (fixup_alt_jump)
! 2595: STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
! 2596:
! 2597: /* Mark and leave space for a jump after this alternative,
! 2598: to be filled in later either by next alternative or
! 2599: when know we're at the end of a series of alternatives. */
! 2600: fixup_alt_jump = b;
! 2601: GET_BUFFER_SPACE (3);
! 2602: b += 3;
! 2603:
! 2604: laststart = 0;
! 2605: begalt = b;
! 2606: break;
! 2607:
! 2608:
! 2609: case '{':
! 2610: /* If \{ is a literal. */
! 2611: if (!(syntax & RE_INTERVALS)
! 2612: /* If we're at `\{' and it's not the open-interval
! 2613: operator. */
! 2614: || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
! 2615: || (p - 2 == pattern && p == pend))
! 2616: goto normal_backslash;
! 2617:
! 2618: handle_interval:
! 2619: {
! 2620: /* If got here, then the syntax allows intervals. */
! 2621:
! 2622: /* At least (most) this many matches must be made. */
! 2623: int lower_bound = -1, upper_bound = -1;
! 2624:
! 2625: beg_interval = p - 1;
! 2626:
! 2627: if (p == pend)
! 2628: {
! 2629: if (syntax & RE_NO_BK_BRACES)
! 2630: goto unfetch_interval;
! 2631: else
! 2632: FREE_STACK_RETURN (REG_EBRACE);
! 2633: }
! 2634:
! 2635: GET_UNSIGNED_NUMBER (lower_bound);
! 2636:
! 2637: if (c == ',')
! 2638: {
! 2639: GET_UNSIGNED_NUMBER (upper_bound);
! 2640: if (upper_bound < 0) upper_bound = RE_DUP_MAX;
! 2641: }
! 2642: else
! 2643: /* Interval such as `{1}' => match exactly once. */
! 2644: upper_bound = lower_bound;
! 2645:
! 2646: if (lower_bound < 0 || upper_bound > RE_DUP_MAX
! 2647: || lower_bound > upper_bound)
! 2648: {
! 2649: if (syntax & RE_NO_BK_BRACES)
! 2650: goto unfetch_interval;
! 2651: else
! 2652: FREE_STACK_RETURN (REG_BADBR);
! 2653: }
! 2654:
! 2655: if (!(syntax & RE_NO_BK_BRACES))
! 2656: {
! 2657: if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
! 2658:
! 2659: PATFETCH (c);
! 2660: }
! 2661:
! 2662: if (c != '}')
! 2663: {
! 2664: if (syntax & RE_NO_BK_BRACES)
! 2665: goto unfetch_interval;
! 2666: else
! 2667: FREE_STACK_RETURN (REG_BADBR);
! 2668: }
! 2669:
! 2670: /* We just parsed a valid interval. */
! 2671:
! 2672: /* If it's invalid to have no preceding re. */
! 2673: if (!laststart)
! 2674: {
! 2675: if (syntax & RE_CONTEXT_INVALID_OPS)
! 2676: FREE_STACK_RETURN (REG_BADRPT);
! 2677: else if (syntax & RE_CONTEXT_INDEP_OPS)
! 2678: laststart = b;
! 2679: else
! 2680: goto unfetch_interval;
! 2681: }
! 2682:
! 2683: /* If the upper bound is zero, don't want to succeed at
! 2684: all; jump from `laststart' to `b + 3', which will be
! 2685: the end of the buffer after we insert the jump. */
! 2686: if (upper_bound == 0)
! 2687: {
! 2688: GET_BUFFER_SPACE (3);
! 2689: INSERT_JUMP (jump, laststart, b + 3);
! 2690: b += 3;
! 2691: }
! 2692:
! 2693: /* Otherwise, we have a nontrivial interval. When
! 2694: we're all done, the pattern will look like:
! 2695: set_number_at <jump count> <upper bound>
! 2696: set_number_at <succeed_n count> <lower bound>
! 2697: succeed_n <after jump addr> <succeed_n count>
! 2698: <body of loop>
! 2699: jump_n <succeed_n addr> <jump count>
! 2700: (The upper bound and `jump_n' are omitted if
! 2701: `upper_bound' is 1, though.) */
! 2702: else
! 2703: { /* If the upper bound is > 1, we need to insert
! 2704: more at the end of the loop. */
! 2705: unsigned nbytes = 10 + (upper_bound > 1) * 10;
! 2706:
! 2707: GET_BUFFER_SPACE (nbytes);
! 2708:
! 2709: /* Initialize lower bound of the `succeed_n', even
! 2710: though it will be set during matching by its
! 2711: attendant `set_number_at' (inserted next),
! 2712: because `re_compile_fastmap' needs to know.
! 2713: Jump to the `jump_n' we might insert below. */
! 2714: INSERT_JUMP2 (succeed_n, laststart,
! 2715: b + 5 + (upper_bound > 1) * 5,
! 2716: lower_bound);
! 2717: b += 5;
! 2718:
! 2719: /* Code to initialize the lower bound. Insert
! 2720: before the `succeed_n'. The `5' is the last two
! 2721: bytes of this `set_number_at', plus 3 bytes of
! 2722: the following `succeed_n'. */
! 2723: insert_op2 (set_number_at, laststart, 5, lower_bound, b);
! 2724: b += 5;
! 2725:
! 2726: if (upper_bound > 1)
! 2727: { /* More than one repetition is allowed, so
! 2728: append a backward jump to the `succeed_n'
! 2729: that starts this interval.
! 2730:
! 2731: When we've reached this during matching,
! 2732: we'll have matched the interval once, so
! 2733: jump back only `upper_bound - 1' times. */
! 2734: STORE_JUMP2 (jump_n, b, laststart + 5,
! 2735: upper_bound - 1);
! 2736: b += 5;
! 2737:
! 2738: /* The location we want to set is the second
! 2739: parameter of the `jump_n'; that is `b-2' as
! 2740: an absolute address. `laststart' will be
! 2741: the `set_number_at' we're about to insert;
! 2742: `laststart+3' the number to set, the source
! 2743: for the relative address. But we are
! 2744: inserting into the middle of the pattern --
! 2745: so everything is getting moved up by 5.
! 2746: Conclusion: (b - 2) - (laststart + 3) + 5,
! 2747: i.e., b - laststart.
! 2748:
! 2749: We insert this at the beginning of the loop
! 2750: so that if we fail during matching, we'll
! 2751: reinitialize the bounds. */
! 2752: insert_op2 (set_number_at, laststart, b - laststart,
! 2753: upper_bound - 1, b);
! 2754: b += 5;
! 2755: }
! 2756: }
! 2757: pending_exact = 0;
! 2758: beg_interval = NULL;
! 2759: }
! 2760: break;
! 2761:
! 2762: unfetch_interval:
! 2763: /* If an invalid interval, match the characters as literals. */
! 2764: assert (beg_interval);
! 2765: p = beg_interval;
! 2766: beg_interval = NULL;
! 2767:
! 2768: /* normal_char and normal_backslash need `c'. */
! 2769: PATFETCH (c);
! 2770:
! 2771: if (!(syntax & RE_NO_BK_BRACES))
! 2772: {
! 2773: if (p > pattern && p[-1] == '\\')
! 2774: goto normal_backslash;
! 2775: }
! 2776: goto normal_char;
! 2777:
! 2778: #ifdef emacs
! 2779: /* There is no way to specify the before_dot and after_dot
! 2780: operators. rms says this is ok. --karl */
! 2781: case '=':
! 2782: BUF_PUSH (at_dot);
! 2783: break;
! 2784:
! 2785: case 's':
! 2786: laststart = b;
! 2787: PATFETCH (c);
! 2788: BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
! 2789: break;
! 2790:
! 2791: case 'S':
! 2792: laststart = b;
! 2793: PATFETCH (c);
! 2794: BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
! 2795: break;
! 2796: #endif /* emacs */
! 2797:
! 2798:
! 2799: case 'w':
! 2800: if (syntax & RE_NO_GNU_OPS)
! 2801: goto normal_char;
! 2802: laststart = b;
! 2803: BUF_PUSH (wordchar);
! 2804: break;
! 2805:
! 2806:
! 2807: case 'W':
! 2808: if (syntax & RE_NO_GNU_OPS)
! 2809: goto normal_char;
! 2810: laststart = b;
! 2811: BUF_PUSH (notwordchar);
! 2812: break;
! 2813:
! 2814:
! 2815: case '<':
! 2816: if (syntax & RE_NO_GNU_OPS)
! 2817: goto normal_char;
! 2818: BUF_PUSH (wordbeg);
! 2819: break;
! 2820:
! 2821: case '>':
! 2822: if (syntax & RE_NO_GNU_OPS)
! 2823: goto normal_char;
! 2824: BUF_PUSH (wordend);
! 2825: break;
! 2826:
! 2827: case 'b':
! 2828: if (syntax & RE_NO_GNU_OPS)
! 2829: goto normal_char;
! 2830: BUF_PUSH (wordbound);
! 2831: break;
! 2832:
! 2833: case 'B':
! 2834: if (syntax & RE_NO_GNU_OPS)
! 2835: goto normal_char;
! 2836: BUF_PUSH (notwordbound);
! 2837: break;
! 2838:
! 2839: case '`':
! 2840: if (syntax & RE_NO_GNU_OPS)
! 2841: goto normal_char;
! 2842: BUF_PUSH (begbuf);
! 2843: break;
! 2844:
! 2845: case '\'':
! 2846: if (syntax & RE_NO_GNU_OPS)
! 2847: goto normal_char;
! 2848: BUF_PUSH (endbuf);
! 2849: break;
! 2850:
! 2851: case '1': case '2': case '3': case '4': case '5':
! 2852: case '6': case '7': case '8': case '9':
! 2853: if (syntax & RE_NO_BK_REFS)
! 2854: goto normal_char;
! 2855:
! 2856: c1 = c - '0';
! 2857:
! 2858: if (c1 > regnum)
! 2859: FREE_STACK_RETURN (REG_ESUBREG);
! 2860:
! 2861: /* Can't back reference to a subexpression if inside of it. */
! 2862: if (group_in_compile_stack (compile_stack, (regnum_t) c1))
! 2863: goto normal_char;
! 2864:
! 2865: laststart = b;
! 2866: BUF_PUSH_2 (duplicate, c1);
! 2867: break;
! 2868:
! 2869:
! 2870: case '+':
! 2871: case '?':
! 2872: if (syntax & RE_BK_PLUS_QM)
! 2873: goto handle_plus;
! 2874: else
! 2875: goto normal_backslash;
! 2876:
! 2877: default:
! 2878: normal_backslash:
! 2879: /* You might think it would be useful for \ to mean
! 2880: not to translate; but if we don't translate it
! 2881: it will never match anything. */
! 2882: c = TRANSLATE (c);
! 2883: goto normal_char;
! 2884: }
! 2885: break;
! 2886:
! 2887:
! 2888: default:
! 2889: /* Expects the character in `c'. */
! 2890: normal_char:
! 2891: /* If no exactn currently being built. */
! 2892: if (!pending_exact
! 2893:
! 2894: /* If last exactn not at current position. */
! 2895: || pending_exact + *pending_exact + 1 != b
! 2896:
! 2897: /* We have only one byte following the exactn for the count. */
! 2898: || *pending_exact == (1 << BYTEWIDTH) - 1
! 2899:
! 2900: /* If followed by a repetition operator. */
! 2901: || *p == '*' || *p == '^'
! 2902: || ((syntax & RE_BK_PLUS_QM)
! 2903: ? *p == '\\' && (p[1] == '+' || p[1] == '?')
! 2904: : (*p == '+' || *p == '?'))
! 2905: || ((syntax & RE_INTERVALS)
! 2906: && ((syntax & RE_NO_BK_BRACES)
! 2907: ? *p == '{'
! 2908: : (p[0] == '\\' && p[1] == '{'))))
! 2909: {
! 2910: /* Start building a new exactn. */
! 2911:
! 2912: laststart = b;
! 2913:
! 2914: BUF_PUSH_2 (exactn, 0);
! 2915: pending_exact = b - 1;
! 2916: }
! 2917:
! 2918: BUF_PUSH (c);
! 2919: (*pending_exact)++;
! 2920: break;
! 2921: } /* switch (c) */
! 2922: } /* while p != pend */
! 2923:
! 2924:
! 2925: /* Through the pattern now. */
! 2926:
! 2927: if (fixup_alt_jump)
! 2928: STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
! 2929:
! 2930: if (!COMPILE_STACK_EMPTY)
! 2931: FREE_STACK_RETURN (REG_EPAREN);
! 2932:
! 2933: /* If we don't want backtracking, force success
! 2934: the first time we reach the end of the compiled pattern. */
! 2935: if (syntax & RE_NO_POSIX_BACKTRACKING)
! 2936: BUF_PUSH (succeed);
! 2937:
! 2938: free (compile_stack.stack);
! 2939:
! 2940: /* We have succeeded; set the length of the buffer. */
! 2941: bufp->used = b - bufp->buffer;
! 2942:
! 2943: #ifdef DEBUG
! 2944: if (debug)
! 2945: {
! 2946: DEBUG_PRINT1 ("\nCompiled pattern: \n");
! 2947: print_compiled_pattern (bufp);
! 2948: }
! 2949: #endif /* DEBUG */
! 2950:
! 2951: #ifndef MATCH_MAY_ALLOCATE
! 2952: /* Initialize the failure stack to the largest possible stack. This
! 2953: isn't necessary unless we're trying to avoid calling alloca in
! 2954: the search and match routines. */
! 2955: {
! 2956: int num_regs = bufp->re_nsub + 1;
! 2957:
! 2958: /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
! 2959: is strictly greater than re_max_failures, the largest possible stack
! 2960: is 2 * re_max_failures failure points. */
! 2961: if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
! 2962: {
! 2963: fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
! 2964:
! 2965: # ifdef emacs
! 2966: if (! fail_stack.stack)
! 2967: fail_stack.stack
! 2968: = (fail_stack_elt_t *) xmalloc (fail_stack.size
! 2969: * sizeof (fail_stack_elt_t));
! 2970: else
! 2971: fail_stack.stack
! 2972: = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
! 2973: (fail_stack.size
! 2974: * sizeof (fail_stack_elt_t)));
! 2975: # else /* not emacs */
! 2976: if (! fail_stack.stack)
! 2977: fail_stack.stack
! 2978: = (fail_stack_elt_t *) malloc (fail_stack.size
! 2979: * sizeof (fail_stack_elt_t));
! 2980: else
! 2981: fail_stack.stack
! 2982: = (fail_stack_elt_t *) realloc (fail_stack.stack,
! 2983: (fail_stack.size
! 2984: * sizeof (fail_stack_elt_t)));
! 2985: # endif /* not emacs */
! 2986: }
! 2987:
! 2988: regex_grow_registers (num_regs);
! 2989: }
! 2990: #endif /* not MATCH_MAY_ALLOCATE */
! 2991:
! 2992: return REG_NOERROR;
! 2993: } /* regex_compile */
! 2994:
! 2995: /* Subroutines for `regex_compile'. */
! 2996:
! 2997: /* Store OP at LOC followed by two-byte integer parameter ARG. */
! 2998:
! 2999: static void
! 3000: store_op1 (op, loc, arg)
! 3001: re_opcode_t op;
! 3002: unsigned char *loc;
! 3003: int arg;
! 3004: {
! 3005: *loc = (unsigned char) op;
! 3006: STORE_NUMBER (loc + 1, arg);
! 3007: }
! 3008:
! 3009:
! 3010: /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
! 3011:
! 3012: static void
! 3013: store_op2 (op, loc, arg1, arg2)
! 3014: re_opcode_t op;
! 3015: unsigned char *loc;
! 3016: int arg1, arg2;
! 3017: {
! 3018: *loc = (unsigned char) op;
! 3019: STORE_NUMBER (loc + 1, arg1);
! 3020: STORE_NUMBER (loc + 3, arg2);
! 3021: }
! 3022:
! 3023:
! 3024: /* Copy the bytes from LOC to END to open up three bytes of space at LOC
! 3025: for OP followed by two-byte integer parameter ARG. */
! 3026:
! 3027: static void
! 3028: insert_op1 (op, loc, arg, end)
! 3029: re_opcode_t op;
! 3030: unsigned char *loc;
! 3031: int arg;
! 3032: unsigned char *end;
! 3033: {
! 3034: register unsigned char *pfrom = end;
! 3035: register unsigned char *pto = end + 3;
! 3036:
! 3037: while (pfrom != loc)
! 3038: *--pto = *--pfrom;
! 3039:
! 3040: store_op1 (op, loc, arg);
! 3041: }
! 3042:
! 3043:
! 3044: /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
! 3045:
! 3046: static void
! 3047: insert_op2 (op, loc, arg1, arg2, end)
! 3048: re_opcode_t op;
! 3049: unsigned char *loc;
! 3050: int arg1, arg2;
! 3051: unsigned char *end;
! 3052: {
! 3053: register unsigned char *pfrom = end;
! 3054: register unsigned char *pto = end + 5;
! 3055:
! 3056: while (pfrom != loc)
! 3057: *--pto = *--pfrom;
! 3058:
! 3059: store_op2 (op, loc, arg1, arg2);
! 3060: }
! 3061:
! 3062:
! 3063: /* P points to just after a ^ in PATTERN. Return true if that ^ comes
! 3064: after an alternative or a begin-subexpression. We assume there is at
! 3065: least one character before the ^. */
! 3066:
! 3067: static boolean
! 3068: at_begline_loc_p (pattern, p, syntax)
! 3069: const char *pattern, *p;
! 3070: reg_syntax_t syntax;
! 3071: {
! 3072: const char *prev = p - 2;
! 3073: boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
! 3074:
! 3075: return
! 3076: /* After a subexpression? */
! 3077: (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
! 3078: /* After an alternative? */
! 3079: || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
! 3080: }
! 3081:
! 3082:
! 3083: /* The dual of at_begline_loc_p. This one is for $. We assume there is
! 3084: at least one character after the $, i.e., `P < PEND'. */
! 3085:
! 3086: static boolean
! 3087: at_endline_loc_p (p, pend, syntax)
! 3088: const char *p, *pend;
! 3089: reg_syntax_t syntax;
! 3090: {
! 3091: const char *next = p;
! 3092: boolean next_backslash = *next == '\\';
! 3093: const char *next_next = p + 1 < pend ? p + 1 : 0;
! 3094:
! 3095: return
! 3096: /* Before a subexpression? */
! 3097: (syntax & RE_NO_BK_PARENS ? *next == ')'
! 3098: : next_backslash && next_next && *next_next == ')')
! 3099: /* Before an alternative? */
! 3100: || (syntax & RE_NO_BK_VBAR ? *next == '|'
! 3101: : next_backslash && next_next && *next_next == '|');
! 3102: }
! 3103:
! 3104:
! 3105: /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
! 3106: false if it's not. */
! 3107:
! 3108: static boolean
! 3109: group_in_compile_stack (compile_stack, regnum)
! 3110: compile_stack_type compile_stack;
! 3111: regnum_t regnum;
! 3112: {
! 3113: int this_element;
! 3114:
! 3115: for (this_element = compile_stack.avail - 1;
! 3116: this_element >= 0;
! 3117: this_element--)
! 3118: if (compile_stack.stack[this_element].regnum == regnum)
! 3119: return true;
! 3120:
! 3121: return false;
! 3122: }
! 3123:
! 3124:
! 3125: /* Read the ending character of a range (in a bracket expression) from the
! 3126: uncompiled pattern *P_PTR (which ends at PEND). We assume the
! 3127: starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
! 3128: Then we set the translation of all bits between the starting and
! 3129: ending characters (inclusive) in the compiled pattern B.
! 3130:
! 3131: Return an error code.
! 3132:
! 3133: We use these short variable names so we can use the same macros as
! 3134: `regex_compile' itself. */
! 3135:
! 3136: static reg_errcode_t
! 3137: compile_range (p_ptr, pend, translate, syntax, b)
! 3138: const char **p_ptr, *pend;
! 3139: RE_TRANSLATE_TYPE translate;
! 3140: reg_syntax_t syntax;
! 3141: unsigned char *b;
! 3142: {
! 3143: unsigned this_char;
! 3144:
! 3145: const char *p = *p_ptr;
! 3146: unsigned int range_start, range_end;
! 3147:
! 3148: if (p == pend)
! 3149: return REG_ERANGE;
! 3150:
! 3151: /* Even though the pattern is a signed `char *', we need to fetch
! 3152: with unsigned char *'s; if the high bit of the pattern character
! 3153: is set, the range endpoints will be negative if we fetch using a
! 3154: signed char *.
! 3155:
! 3156: We also want to fetch the endpoints without translating them; the
! 3157: appropriate translation is done in the bit-setting loop below. */
! 3158: /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
! 3159: range_start = ((const unsigned char *) p)[-2];
! 3160: range_end = ((const unsigned char *) p)[0];
! 3161:
! 3162: /* Have to increment the pointer into the pattern string, so the
! 3163: caller isn't still at the ending character. */
! 3164: (*p_ptr)++;
! 3165:
! 3166: /* If the start is after the end, the range is empty. */
! 3167: if (range_start > range_end)
! 3168: return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
! 3169:
! 3170: /* Here we see why `this_char' has to be larger than an `unsigned
! 3171: char' -- the range is inclusive, so if `range_end' == 0xff
! 3172: (assuming 8-bit characters), we would otherwise go into an infinite
! 3173: loop, since all characters <= 0xff. */
! 3174: for (this_char = range_start; this_char <= range_end; this_char++)
! 3175: {
! 3176: SET_LIST_BIT (TRANSLATE (this_char));
! 3177: }
! 3178:
! 3179: return REG_NOERROR;
! 3180: }
! 3181:
! 3182: /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
! 3183: BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
! 3184: characters can start a string that matches the pattern. This fastmap
! 3185: is used by re_search to skip quickly over impossible starting points.
! 3186:
! 3187: The caller must supply the address of a (1 << BYTEWIDTH)-byte data
! 3188: area as BUFP->fastmap.
! 3189:
! 3190: We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
! 3191: the pattern buffer.
! 3192:
! 3193: Returns 0 if we succeed, -2 if an internal error. */
! 3194:
! 3195: int
! 3196: re_compile_fastmap (bufp)
! 3197: struct re_pattern_buffer *bufp;
! 3198: {
! 3199: int j, k;
! 3200: #ifdef MATCH_MAY_ALLOCATE
! 3201: fail_stack_type fail_stack;
! 3202: #endif
! 3203: #ifndef REGEX_MALLOC
! 3204: char *destination;
! 3205: #endif
! 3206:
! 3207: register char *fastmap = bufp->fastmap;
! 3208: unsigned char *pattern = bufp->buffer;
! 3209: unsigned char *p = pattern;
! 3210: register unsigned char *pend = pattern + bufp->used;
! 3211:
! 3212: #ifdef REL_ALLOC
! 3213: /* This holds the pointer to the failure stack, when
! 3214: it is allocated relocatably. */
! 3215: fail_stack_elt_t *failure_stack_ptr;
! 3216: #endif
! 3217:
! 3218: /* Assume that each path through the pattern can be null until
! 3219: proven otherwise. We set this false at the bottom of switch
! 3220: statement, to which we get only if a particular path doesn't
! 3221: match the empty string. */
! 3222: boolean path_can_be_null = true;
! 3223:
! 3224: /* We aren't doing a `succeed_n' to begin with. */
! 3225: boolean succeed_n_p = false;
! 3226:
! 3227: assert (fastmap != NULL && p != NULL);
! 3228:
! 3229: INIT_FAIL_STACK ();
! 3230: memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */
! 3231: bufp->fastmap_accurate = 1; /* It will be when we're done. */
! 3232: bufp->can_be_null = 0;
! 3233:
! 3234: while (1)
! 3235: {
! 3236: if (p == pend || *p == succeed)
! 3237: {
! 3238: /* We have reached the (effective) end of pattern. */
! 3239: if (!FAIL_STACK_EMPTY ())
! 3240: {
! 3241: bufp->can_be_null |= path_can_be_null;
! 3242:
! 3243: /* Reset for next path. */
! 3244: path_can_be_null = true;
! 3245:
! 3246: p = fail_stack.stack[--fail_stack.avail].pointer;
! 3247:
! 3248: continue;
! 3249: }
! 3250: else
! 3251: break;
! 3252: }
! 3253:
! 3254: /* We should never be about to go beyond the end of the pattern. */
! 3255: assert (p < pend);
! 3256:
! 3257: switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
! 3258: {
! 3259:
! 3260: /* I guess the idea here is to simply not bother with a fastmap
! 3261: if a backreference is used, since it's too hard to figure out
! 3262: the fastmap for the corresponding group. Setting
! 3263: `can_be_null' stops `re_search_2' from using the fastmap, so
! 3264: that is all we do. */
! 3265: case duplicate:
! 3266: bufp->can_be_null = 1;
! 3267: goto done;
! 3268:
! 3269:
! 3270: /* Following are the cases which match a character. These end
! 3271: with `break'. */
! 3272:
! 3273: case exactn:
! 3274: fastmap[p[1]] = 1;
! 3275: break;
! 3276:
! 3277:
! 3278: case charset:
! 3279: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
! 3280: if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
! 3281: fastmap[j] = 1;
! 3282: break;
! 3283:
! 3284:
! 3285: case charset_not:
! 3286: /* Chars beyond end of map must be allowed. */
! 3287: for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
! 3288: fastmap[j] = 1;
! 3289:
! 3290: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
! 3291: if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
! 3292: fastmap[j] = 1;
! 3293: break;
! 3294:
! 3295:
! 3296: case wordchar:
! 3297: for (j = 0; j < (1 << BYTEWIDTH); j++)
! 3298: if (SYNTAX (j) == Sword)
! 3299: fastmap[j] = 1;
! 3300: break;
! 3301:
! 3302:
! 3303: case notwordchar:
! 3304: for (j = 0; j < (1 << BYTEWIDTH); j++)
! 3305: if (SYNTAX (j) != Sword)
! 3306: fastmap[j] = 1;
! 3307: break;
! 3308:
! 3309:
! 3310: case anychar:
! 3311: {
! 3312: int fastmap_newline = fastmap['\n'];
! 3313:
! 3314: /* `.' matches anything ... */
! 3315: for (j = 0; j < (1 << BYTEWIDTH); j++)
! 3316: fastmap[j] = 1;
! 3317:
! 3318: /* ... except perhaps newline. */
! 3319: if (!(bufp->syntax & RE_DOT_NEWLINE))
! 3320: fastmap['\n'] = fastmap_newline;
! 3321:
! 3322: /* Return if we have already set `can_be_null'; if we have,
! 3323: then the fastmap is irrelevant. Something's wrong here. */
! 3324: else if (bufp->can_be_null)
! 3325: goto done;
! 3326:
! 3327: /* Otherwise, have to check alternative paths. */
! 3328: break;
! 3329: }
! 3330:
! 3331: #ifdef emacs
! 3332: case syntaxspec:
! 3333: k = *p++;
! 3334: for (j = 0; j < (1 << BYTEWIDTH); j++)
! 3335: if (SYNTAX (j) == (enum syntaxcode) k)
! 3336: fastmap[j] = 1;
! 3337: break;
! 3338:
! 3339:
! 3340: case notsyntaxspec:
! 3341: k = *p++;
! 3342: for (j = 0; j < (1 << BYTEWIDTH); j++)
! 3343: if (SYNTAX (j) != (enum syntaxcode) k)
! 3344: fastmap[j] = 1;
! 3345: break;
! 3346:
! 3347:
! 3348: /* All cases after this match the empty string. These end with
! 3349: `continue'. */
! 3350:
! 3351:
! 3352: case before_dot:
! 3353: case at_dot:
! 3354: case after_dot:
! 3355: continue;
! 3356: #endif /* emacs */
! 3357:
! 3358:
! 3359: case no_op:
! 3360: case begline:
! 3361: case endline:
! 3362: case begbuf:
! 3363: case endbuf:
! 3364: case wordbound:
! 3365: case notwordbound:
! 3366: case wordbeg:
! 3367: case wordend:
! 3368: case push_dummy_failure:
! 3369: continue;
! 3370:
! 3371:
! 3372: case jump_n:
! 3373: case pop_failure_jump:
! 3374: case maybe_pop_jump:
! 3375: case jump:
! 3376: case jump_past_alt:
! 3377: case dummy_failure_jump:
! 3378: EXTRACT_NUMBER_AND_INCR (j, p);
! 3379: p += j;
! 3380: if (j > 0)
! 3381: continue;
! 3382:
! 3383: /* Jump backward implies we just went through the body of a
! 3384: loop and matched nothing. Opcode jumped to should be
! 3385: `on_failure_jump' or `succeed_n'. Just treat it like an
! 3386: ordinary jump. For a * loop, it has pushed its failure
! 3387: point already; if so, discard that as redundant. */
! 3388: if ((re_opcode_t) *p != on_failure_jump
! 3389: && (re_opcode_t) *p != succeed_n)
! 3390: continue;
! 3391:
! 3392: p++;
! 3393: EXTRACT_NUMBER_AND_INCR (j, p);
! 3394: p += j;
! 3395:
! 3396: /* If what's on the stack is where we are now, pop it. */
! 3397: if (!FAIL_STACK_EMPTY ()
! 3398: && fail_stack.stack[fail_stack.avail - 1].pointer == p)
! 3399: fail_stack.avail--;
! 3400:
! 3401: continue;
! 3402:
! 3403:
! 3404: case on_failure_jump:
! 3405: case on_failure_keep_string_jump:
! 3406: handle_on_failure_jump:
! 3407: EXTRACT_NUMBER_AND_INCR (j, p);
! 3408:
! 3409: /* For some patterns, e.g., `(a?)?', `p+j' here points to the
! 3410: end of the pattern. We don't want to push such a point,
! 3411: since when we restore it above, entering the switch will
! 3412: increment `p' past the end of the pattern. We don't need
! 3413: to push such a point since we obviously won't find any more
! 3414: fastmap entries beyond `pend'. Such a pattern can match
! 3415: the null string, though. */
! 3416: if (p + j < pend)
! 3417: {
! 3418: if (!PUSH_PATTERN_OP (p + j, fail_stack))
! 3419: {
! 3420: RESET_FAIL_STACK ();
! 3421: return -2;
! 3422: }
! 3423: }
! 3424: else
! 3425: bufp->can_be_null = 1;
! 3426:
! 3427: if (succeed_n_p)
! 3428: {
! 3429: EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
! 3430: succeed_n_p = false;
! 3431: }
! 3432:
! 3433: continue;
! 3434:
! 3435:
! 3436: case succeed_n:
! 3437: /* Get to the number of times to succeed. */
! 3438: p += 2;
! 3439:
! 3440: /* Increment p past the n for when k != 0. */
! 3441: EXTRACT_NUMBER_AND_INCR (k, p);
! 3442: if (k == 0)
! 3443: {
! 3444: p -= 4;
! 3445: succeed_n_p = true; /* Spaghetti code alert. */
! 3446: goto handle_on_failure_jump;
! 3447: }
! 3448: continue;
! 3449:
! 3450:
! 3451: case set_number_at:
! 3452: p += 4;
! 3453: continue;
! 3454:
! 3455:
! 3456: case start_memory:
! 3457: case stop_memory:
! 3458: p += 2;
! 3459: continue;
! 3460:
! 3461:
! 3462: default:
! 3463: abort (); /* We have listed all the cases. */
! 3464: } /* switch *p++ */
! 3465:
! 3466: /* Getting here means we have found the possible starting
! 3467: characters for one path of the pattern -- and that the empty
! 3468: string does not match. We need not follow this path further.
! 3469: Instead, look at the next alternative (remembered on the
! 3470: stack), or quit if no more. The test at the top of the loop
! 3471: does these things. */
! 3472: path_can_be_null = false;
! 3473: p = pend;
! 3474: } /* while p */
! 3475:
! 3476: /* Set `can_be_null' for the last path (also the first path, if the
! 3477: pattern is empty). */
! 3478: bufp->can_be_null |= path_can_be_null;
! 3479:
! 3480: done:
! 3481: RESET_FAIL_STACK ();
! 3482: return 0;
! 3483: } /* re_compile_fastmap */
! 3484: #ifdef _LIBC
! 3485: weak_alias (__re_compile_fastmap, re_compile_fastmap)
! 3486: #endif
! 3487:
! 3488: /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
! 3489: ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
! 3490: this memory for recording register information. STARTS and ENDS
! 3491: must be allocated using the malloc library routine, and must each
! 3492: be at least NUM_REGS * sizeof (regoff_t) bytes long.
! 3493:
! 3494: If NUM_REGS == 0, then subsequent matches should allocate their own
! 3495: register data.
! 3496:
! 3497: Unless this function is called, the first search or match using
! 3498: PATTERN_BUFFER will allocate its own register data, without
! 3499: freeing the old data. */
! 3500:
! 3501: void
! 3502: re_set_registers (bufp, regs, num_regs, starts, ends)
! 3503: struct re_pattern_buffer *bufp;
! 3504: struct re_registers *regs;
! 3505: unsigned num_regs;
! 3506: regoff_t *starts, *ends;
! 3507: {
! 3508: if (num_regs)
! 3509: {
! 3510: bufp->regs_allocated = REGS_REALLOCATE;
! 3511: regs->num_regs = num_regs;
! 3512: regs->start = starts;
! 3513: regs->end = ends;
! 3514: }
! 3515: else
! 3516: {
! 3517: bufp->regs_allocated = REGS_UNALLOCATED;
! 3518: regs->num_regs = 0;
! 3519: regs->start = regs->end = (regoff_t *) 0;
! 3520: }
! 3521: }
! 3522: #ifdef _LIBC
! 3523: weak_alias (__re_set_registers, re_set_registers)
! 3524: #endif
! 3525:
! 3526: /* Searching routines. */
! 3527:
! 3528: /* Like re_search_2, below, but only one string is specified, and
! 3529: doesn't let you say where to stop matching. */
! 3530:
! 3531: int
! 3532: re_search (bufp, string, size, startpos, range, regs)
! 3533: struct re_pattern_buffer *bufp;
! 3534: const char *string;
! 3535: int size, startpos, range;
! 3536: struct re_registers *regs;
! 3537: {
! 3538: return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
! 3539: regs, size);
! 3540: }
! 3541: #ifdef _LIBC
! 3542: weak_alias (__re_search, re_search)
! 3543: #endif
! 3544:
! 3545:
! 3546: /* Using the compiled pattern in BUFP->buffer, first tries to match the
! 3547: virtual concatenation of STRING1 and STRING2, starting first at index
! 3548: STARTPOS, then at STARTPOS + 1, and so on.
! 3549:
! 3550: STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
! 3551:
! 3552: RANGE is how far to scan while trying to match. RANGE = 0 means try
! 3553: only at STARTPOS; in general, the last start tried is STARTPOS +
! 3554: RANGE.
! 3555:
! 3556: In REGS, return the indices of the virtual concatenation of STRING1
! 3557: and STRING2 that matched the entire BUFP->buffer and its contained
! 3558: subexpressions.
! 3559:
! 3560: Do not consider matching one past the index STOP in the virtual
! 3561: concatenation of STRING1 and STRING2.
! 3562:
! 3563: We return either the position in the strings at which the match was
! 3564: found, -1 if no match, or -2 if error (such as failure
! 3565: stack overflow). */
! 3566:
! 3567: int
! 3568: re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
! 3569: struct re_pattern_buffer *bufp;
! 3570: const char *string1, *string2;
! 3571: int size1, size2;
! 3572: int startpos;
! 3573: int range;
! 3574: struct re_registers *regs;
! 3575: int stop;
! 3576: {
! 3577: int val;
! 3578: register char *fastmap = bufp->fastmap;
! 3579: register RE_TRANSLATE_TYPE translate = bufp->translate;
! 3580: int total_size = size1 + size2;
! 3581: int endpos = startpos + range;
! 3582:
! 3583: /* Check for out-of-range STARTPOS. */
! 3584: if (startpos < 0 || startpos > total_size)
! 3585: return -1;
! 3586:
! 3587: /* Fix up RANGE if it might eventually take us outside
! 3588: the virtual concatenation of STRING1 and STRING2.
! 3589: Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
! 3590: if (endpos < 0)
! 3591: range = 0 - startpos;
! 3592: else if (endpos > total_size)
! 3593: range = total_size - startpos;
! 3594:
! 3595: /* If the search isn't to be a backwards one, don't waste time in a
! 3596: search for a pattern that must be anchored. */
! 3597: if (bufp->used > 0 && range > 0
! 3598: && ((re_opcode_t) bufp->buffer[0] == begbuf
! 3599: /* `begline' is like `begbuf' if it cannot match at newlines. */
! 3600: || ((re_opcode_t) bufp->buffer[0] == begline
! 3601: && !bufp->newline_anchor)))
! 3602: {
! 3603: if (startpos > 0)
! 3604: return -1;
! 3605: else
! 3606: range = 1;
! 3607: }
! 3608:
! 3609: #ifdef emacs
! 3610: /* In a forward search for something that starts with \=.
! 3611: don't keep searching past point. */
! 3612: if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
! 3613: {
! 3614: range = PT - startpos;
! 3615: if (range <= 0)
! 3616: return -1;
! 3617: }
! 3618: #endif /* emacs */
! 3619:
! 3620: /* Update the fastmap now if not correct already. */
! 3621: if (fastmap && !bufp->fastmap_accurate)
! 3622: if (re_compile_fastmap (bufp) == -2)
! 3623: return -2;
! 3624:
! 3625: /* Loop through the string, looking for a place to start matching. */
! 3626: for (;;)
! 3627: {
! 3628: /* If a fastmap is supplied, skip quickly over characters that
! 3629: cannot be the start of a match. If the pattern can match the
! 3630: null string, however, we don't need to skip characters; we want
! 3631: the first null string. */
! 3632: if (fastmap && startpos < total_size && !bufp->can_be_null)
! 3633: {
! 3634: if (range > 0) /* Searching forwards. */
! 3635: {
! 3636: register const char *d;
! 3637: register int lim = 0;
! 3638: int irange = range;
! 3639:
! 3640: if (startpos < size1 && startpos + range >= size1)
! 3641: lim = range - (size1 - startpos);
! 3642:
! 3643: d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
! 3644:
! 3645: /* Written out as an if-else to avoid testing `translate'
! 3646: inside the loop. */
! 3647: if (translate)
! 3648: while (range > lim
! 3649: && !fastmap[(unsigned char)
! 3650: translate[(unsigned char) *d++]])
! 3651: range--;
! 3652: else
! 3653: while (range > lim && !fastmap[(unsigned char) *d++])
! 3654: range--;
! 3655:
! 3656: startpos += irange - range;
! 3657: }
! 3658: else /* Searching backwards. */
! 3659: {
! 3660: register char c = (size1 == 0 || startpos >= size1
! 3661: ? string2[startpos - size1]
! 3662: : string1[startpos]);
! 3663:
! 3664: if (!fastmap[(unsigned char) TRANSLATE (c)])
! 3665: goto advance;
! 3666: }
! 3667: }
! 3668:
! 3669: /* If can't match the null string, and that's all we have left, fail. */
! 3670: if (range >= 0 && startpos == total_size && fastmap
! 3671: && !bufp->can_be_null)
! 3672: return -1;
! 3673:
! 3674: val = re_match_2_internal (bufp, string1, size1, string2, size2,
! 3675: startpos, regs, stop);
! 3676: #ifndef REGEX_MALLOC
! 3677: # ifdef C_ALLOCA
! 3678: alloca (0);
! 3679: # endif
! 3680: #endif
! 3681:
! 3682: if (val >= 0)
! 3683: return startpos;
! 3684:
! 3685: if (val == -2)
! 3686: return -2;
! 3687:
! 3688: advance:
! 3689: if (!range)
! 3690: break;
! 3691: else if (range > 0)
! 3692: {
! 3693: range--;
! 3694: startpos++;
! 3695: }
! 3696: else
! 3697: {
! 3698: range++;
! 3699: startpos--;
! 3700: }
! 3701: }
! 3702: return -1;
! 3703: } /* re_search_2 */
! 3704: #ifdef _LIBC
! 3705: weak_alias (__re_search_2, re_search_2)
! 3706: #endif
! 3707:
! 3708: /* This converts PTR, a pointer into one of the search strings `string1'
! 3709: and `string2' into an offset from the beginning of that string. */
! 3710: #define POINTER_TO_OFFSET(ptr) \
! 3711: (FIRST_STRING_P (ptr) \
! 3712: ? ((regoff_t) ((ptr) - string1)) \
! 3713: : ((regoff_t) ((ptr) - string2 + size1)))
! 3714:
! 3715: /* Macros for dealing with the split strings in re_match_2. */
! 3716:
! 3717: #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
! 3718:
! 3719: /* Call before fetching a character with *d. This switches over to
! 3720: string2 if necessary. */
! 3721: #define PREFETCH() \
! 3722: while (d == dend) \
! 3723: { \
! 3724: /* End of string2 => fail. */ \
! 3725: if (dend == end_match_2) \
! 3726: goto fail; \
! 3727: /* End of string1 => advance to string2. */ \
! 3728: d = string2; \
! 3729: dend = end_match_2; \
! 3730: }
! 3731:
! 3732:
! 3733: /* Test if at very beginning or at very end of the virtual concatenation
! 3734: of `string1' and `string2'. If only one string, it's `string2'. */
! 3735: #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
! 3736: #define AT_STRINGS_END(d) ((d) == end2)
! 3737:
! 3738:
! 3739: /* Test if D points to a character which is word-constituent. We have
! 3740: two special cases to check for: if past the end of string1, look at
! 3741: the first character in string2; and if before the beginning of
! 3742: string2, look at the last character in string1. */
! 3743: #define WORDCHAR_P(d) \
! 3744: (SYNTAX ((d) == end1 ? *string2 \
! 3745: : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
! 3746: == Sword)
! 3747:
! 3748: /* Disabled due to a compiler bug -- see comment at case wordbound */
! 3749: #if 0
! 3750: /* Test if the character before D and the one at D differ with respect
! 3751: to being word-constituent. */
! 3752: #define AT_WORD_BOUNDARY(d) \
! 3753: (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
! 3754: || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
! 3755: #endif
! 3756:
! 3757: /* Free everything we malloc. */
! 3758: #ifdef MATCH_MAY_ALLOCATE
! 3759: # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
! 3760: # define FREE_VARIABLES() \
! 3761: do { \
! 3762: REGEX_FREE_STACK (fail_stack.stack); \
! 3763: FREE_VAR (regstart); \
! 3764: FREE_VAR (regend); \
! 3765: FREE_VAR (old_regstart); \
! 3766: FREE_VAR (old_regend); \
! 3767: FREE_VAR (best_regstart); \
! 3768: FREE_VAR (best_regend); \
! 3769: FREE_VAR (reg_info); \
! 3770: FREE_VAR (reg_dummy); \
! 3771: FREE_VAR (reg_info_dummy); \
! 3772: } while (0)
! 3773: #else
! 3774: # define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
! 3775: #endif /* not MATCH_MAY_ALLOCATE */
! 3776:
! 3777: /* These values must meet several constraints. They must not be valid
! 3778: register values; since we have a limit of 255 registers (because
! 3779: we use only one byte in the pattern for the register number), we can
! 3780: use numbers larger than 255. They must differ by 1, because of
! 3781: NUM_FAILURE_ITEMS above. And the value for the lowest register must
! 3782: be larger than the value for the highest register, so we do not try
! 3783: to actually save any registers when none are active. */
! 3784: #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
! 3785: #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
! 3786:
! 3787: /* Matching routines. */
! 3788:
! 3789: #ifndef emacs /* Emacs never uses this. */
! 3790: /* re_match is like re_match_2 except it takes only a single string. */
! 3791:
! 3792: int
! 3793: re_match (bufp, string, size, pos, regs)
! 3794: struct re_pattern_buffer *bufp;
! 3795: const char *string;
! 3796: int size, pos;
! 3797: struct re_registers *regs;
! 3798: {
! 3799: int result = re_match_2_internal (bufp, NULL, 0, string, size,
! 3800: pos, regs, size);
! 3801: # ifndef REGEX_MALLOC
! 3802: # ifdef C_ALLOCA
! 3803: alloca (0);
! 3804: # endif
! 3805: # endif
! 3806: return result;
! 3807: }
! 3808: # ifdef _LIBC
! 3809: weak_alias (__re_match, re_match)
! 3810: # endif
! 3811: #endif /* not emacs */
! 3812:
! 3813: static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
! 3814: unsigned char *end,
! 3815: register_info_type *reg_info));
! 3816: static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
! 3817: unsigned char *end,
! 3818: register_info_type *reg_info));
! 3819: static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
! 3820: unsigned char *end,
! 3821: register_info_type *reg_info));
! 3822: static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
! 3823: int len, char *translate));
! 3824:
! 3825: /* re_match_2 matches the compiled pattern in BUFP against the
! 3826: the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
! 3827: and SIZE2, respectively). We start matching at POS, and stop
! 3828: matching at STOP.
! 3829:
! 3830: If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
! 3831: store offsets for the substring each group matched in REGS. See the
! 3832: documentation for exactly how many groups we fill.
! 3833:
! 3834: We return -1 if no match, -2 if an internal error (such as the
! 3835: failure stack overflowing). Otherwise, we return the length of the
! 3836: matched substring. */
! 3837:
! 3838: int
! 3839: re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
! 3840: struct re_pattern_buffer *bufp;
! 3841: const char *string1, *string2;
! 3842: int size1, size2;
! 3843: int pos;
! 3844: struct re_registers *regs;
! 3845: int stop;
! 3846: {
! 3847: int result = re_match_2_internal (bufp, string1, size1, string2, size2,
! 3848: pos, regs, stop);
! 3849: #ifndef REGEX_MALLOC
! 3850: # ifdef C_ALLOCA
! 3851: alloca (0);
! 3852: # endif
! 3853: #endif
! 3854: return result;
! 3855: }
! 3856: #ifdef _LIBC
! 3857: weak_alias (__re_match_2, re_match_2)
! 3858: #endif
! 3859:
! 3860: /* This is a separate function so that we can force an alloca cleanup
! 3861: afterwards. */
! 3862: static int
! 3863: re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
! 3864: struct re_pattern_buffer *bufp;
! 3865: const char *string1, *string2;
! 3866: int size1, size2;
! 3867: int pos;
! 3868: struct re_registers *regs;
! 3869: int stop;
! 3870: {
! 3871: /* General temporaries. */
! 3872: int mcnt;
! 3873: unsigned char *p1;
! 3874:
! 3875: /* Just past the end of the corresponding string. */
! 3876: const char *end1, *end2;
! 3877:
! 3878: /* Pointers into string1 and string2, just past the last characters in
! 3879: each to consider matching. */
! 3880: const char *end_match_1, *end_match_2;
! 3881:
! 3882: /* Where we are in the data, and the end of the current string. */
! 3883: const char *d, *dend;
! 3884:
! 3885: /* Where we are in the pattern, and the end of the pattern. */
! 3886: unsigned char *p = bufp->buffer;
! 3887: register unsigned char *pend = p + bufp->used;
! 3888:
! 3889: /* Mark the opcode just after a start_memory, so we can test for an
! 3890: empty subpattern when we get to the stop_memory. */
! 3891: unsigned char *just_past_start_mem = 0;
! 3892:
! 3893: /* We use this to map every character in the string. */
! 3894: RE_TRANSLATE_TYPE translate = bufp->translate;
! 3895:
! 3896: /* Failure point stack. Each place that can handle a failure further
! 3897: down the line pushes a failure point on this stack. It consists of
! 3898: restart, regend, and reg_info for all registers corresponding to
! 3899: the subexpressions we're currently inside, plus the number of such
! 3900: registers, and, finally, two char *'s. The first char * is where
! 3901: to resume scanning the pattern; the second one is where to resume
! 3902: scanning the strings. If the latter is zero, the failure point is
! 3903: a ``dummy''; if a failure happens and the failure point is a dummy,
! 3904: it gets discarded and the next next one is tried. */
! 3905: #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
! 3906: fail_stack_type fail_stack;
! 3907: #endif
! 3908: #ifdef DEBUG
! 3909: static unsigned failure_id;
! 3910: unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
! 3911: #endif
! 3912:
! 3913: #ifdef REL_ALLOC
! 3914: /* This holds the pointer to the failure stack, when
! 3915: it is allocated relocatably. */
! 3916: fail_stack_elt_t *failure_stack_ptr;
! 3917: #endif
! 3918:
! 3919: /* We fill all the registers internally, independent of what we
! 3920: return, for use in backreferences. The number here includes
! 3921: an element for register zero. */
! 3922: size_t num_regs = bufp->re_nsub + 1;
! 3923:
! 3924: /* The currently active registers. */
! 3925: active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
! 3926: active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
! 3927:
! 3928: /* Information on the contents of registers. These are pointers into
! 3929: the input strings; they record just what was matched (on this
! 3930: attempt) by a subexpression part of the pattern, that is, the
! 3931: regnum-th regstart pointer points to where in the pattern we began
! 3932: matching and the regnum-th regend points to right after where we
! 3933: stopped matching the regnum-th subexpression. (The zeroth register
! 3934: keeps track of what the whole pattern matches.) */
! 3935: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
! 3936: const char **regstart, **regend;
! 3937: #endif
! 3938:
! 3939: /* If a group that's operated upon by a repetition operator fails to
! 3940: match anything, then the register for its start will need to be
! 3941: restored because it will have been set to wherever in the string we
! 3942: are when we last see its open-group operator. Similarly for a
! 3943: register's end. */
! 3944: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
! 3945: const char **old_regstart, **old_regend;
! 3946: #endif
! 3947:
! 3948: /* The is_active field of reg_info helps us keep track of which (possibly
! 3949: nested) subexpressions we are currently in. The matched_something
! 3950: field of reg_info[reg_num] helps us tell whether or not we have
! 3951: matched any of the pattern so far this time through the reg_num-th
! 3952: subexpression. These two fields get reset each time through any
! 3953: loop their register is in. */
! 3954: #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
! 3955: register_info_type *reg_info;
! 3956: #endif
! 3957:
! 3958: /* The following record the register info as found in the above
! 3959: variables when we find a match better than any we've seen before.
! 3960: This happens as we backtrack through the failure points, which in
! 3961: turn happens only if we have not yet matched the entire string. */
! 3962: unsigned best_regs_set = false;
! 3963: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
! 3964: const char **best_regstart, **best_regend;
! 3965: #endif
! 3966:
! 3967: /* Logically, this is `best_regend[0]'. But we don't want to have to
! 3968: allocate space for that if we're not allocating space for anything
! 3969: else (see below). Also, we never need info about register 0 for
! 3970: any of the other register vectors, and it seems rather a kludge to
! 3971: treat `best_regend' differently than the rest. So we keep track of
! 3972: the end of the best match so far in a separate variable. We
! 3973: initialize this to NULL so that when we backtrack the first time
! 3974: and need to test it, it's not garbage. */
! 3975: const char *match_end = NULL;
! 3976:
! 3977: /* This helps SET_REGS_MATCHED avoid doing redundant work. */
! 3978: int set_regs_matched_done = 0;
! 3979:
! 3980: /* Used when we pop values we don't care about. */
! 3981: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
! 3982: const char **reg_dummy;
! 3983: register_info_type *reg_info_dummy;
! 3984: #endif
! 3985:
! 3986: #ifdef DEBUG
! 3987: /* Counts the total number of registers pushed. */
! 3988: unsigned num_regs_pushed = 0;
! 3989: #endif
! 3990:
! 3991: DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
! 3992:
! 3993: INIT_FAIL_STACK ();
! 3994:
! 3995: #ifdef MATCH_MAY_ALLOCATE
! 3996: /* Do not bother to initialize all the register variables if there are
! 3997: no groups in the pattern, as it takes a fair amount of time. If
! 3998: there are groups, we include space for register 0 (the whole
! 3999: pattern), even though we never use it, since it simplifies the
! 4000: array indexing. We should fix this. */
! 4001: if (bufp->re_nsub)
! 4002: {
! 4003: regstart = REGEX_TALLOC (num_regs, const char *);
! 4004: regend = REGEX_TALLOC (num_regs, const char *);
! 4005: old_regstart = REGEX_TALLOC (num_regs, const char *);
! 4006: old_regend = REGEX_TALLOC (num_regs, const char *);
! 4007: best_regstart = REGEX_TALLOC (num_regs, const char *);
! 4008: best_regend = REGEX_TALLOC (num_regs, const char *);
! 4009: reg_info = REGEX_TALLOC (num_regs, register_info_type);
! 4010: reg_dummy = REGEX_TALLOC (num_regs, const char *);
! 4011: reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
! 4012:
! 4013: if (!(regstart && regend && old_regstart && old_regend && reg_info
! 4014: && best_regstart && best_regend && reg_dummy && reg_info_dummy))
! 4015: {
! 4016: FREE_VARIABLES ();
! 4017: return -2;
! 4018: }
! 4019: }
! 4020: else
! 4021: {
! 4022: /* We must initialize all our variables to NULL, so that
! 4023: `FREE_VARIABLES' doesn't try to free them. */
! 4024: regstart = regend = old_regstart = old_regend = best_regstart
! 4025: = best_regend = reg_dummy = NULL;
! 4026: reg_info = reg_info_dummy = (register_info_type *) NULL;
! 4027: }
! 4028: #endif /* MATCH_MAY_ALLOCATE */
! 4029:
! 4030: /* The starting position is bogus. */
! 4031: if (pos < 0 || pos > size1 + size2)
! 4032: {
! 4033: FREE_VARIABLES ();
! 4034: return -1;
! 4035: }
! 4036:
! 4037: /* Initialize subexpression text positions to -1 to mark ones that no
! 4038: start_memory/stop_memory has been seen for. Also initialize the
! 4039: register information struct. */
! 4040: for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
! 4041: {
! 4042: regstart[mcnt] = regend[mcnt]
! 4043: = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
! 4044:
! 4045: REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
! 4046: IS_ACTIVE (reg_info[mcnt]) = 0;
! 4047: MATCHED_SOMETHING (reg_info[mcnt]) = 0;
! 4048: EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
! 4049: }
! 4050:
! 4051: /* We move `string1' into `string2' if the latter's empty -- but not if
! 4052: `string1' is null. */
! 4053: if (size2 == 0 && string1 != NULL)
! 4054: {
! 4055: string2 = string1;
! 4056: size2 = size1;
! 4057: string1 = 0;
! 4058: size1 = 0;
! 4059: }
! 4060: end1 = string1 + size1;
! 4061: end2 = string2 + size2;
! 4062:
! 4063: /* Compute where to stop matching, within the two strings. */
! 4064: if (stop <= size1)
! 4065: {
! 4066: end_match_1 = string1 + stop;
! 4067: end_match_2 = string2;
! 4068: }
! 4069: else
! 4070: {
! 4071: end_match_1 = end1;
! 4072: end_match_2 = string2 + stop - size1;
! 4073: }
! 4074:
! 4075: /* `p' scans through the pattern as `d' scans through the data.
! 4076: `dend' is the end of the input string that `d' points within. `d'
! 4077: is advanced into the following input string whenever necessary, but
! 4078: this happens before fetching; therefore, at the beginning of the
! 4079: loop, `d' can be pointing at the end of a string, but it cannot
! 4080: equal `string2'. */
! 4081: if (size1 > 0 && pos <= size1)
! 4082: {
! 4083: d = string1 + pos;
! 4084: dend = end_match_1;
! 4085: }
! 4086: else
! 4087: {
! 4088: d = string2 + pos - size1;
! 4089: dend = end_match_2;
! 4090: }
! 4091:
! 4092: DEBUG_PRINT1 ("The compiled pattern is:\n");
! 4093: DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
! 4094: DEBUG_PRINT1 ("The string to match is: `");
! 4095: DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
! 4096: DEBUG_PRINT1 ("'\n");
! 4097:
! 4098: /* This loops over pattern commands. It exits by returning from the
! 4099: function if the match is complete, or it drops through if the match
! 4100: fails at this starting point in the input data. */
! 4101: for (;;)
! 4102: {
! 4103: #ifdef _LIBC
! 4104: DEBUG_PRINT2 ("\n%p: ", p);
! 4105: #else
! 4106: DEBUG_PRINT2 ("\n0x%x: ", p);
! 4107: #endif
! 4108:
! 4109: if (p == pend)
! 4110: { /* End of pattern means we might have succeeded. */
! 4111: DEBUG_PRINT1 ("end of pattern ... ");
! 4112:
! 4113: /* If we haven't matched the entire string, and we want the
! 4114: longest match, try backtracking. */
! 4115: if (d != end_match_2)
! 4116: {
! 4117: /* 1 if this match ends in the same string (string1 or string2)
! 4118: as the best previous match. */
! 4119: boolean same_str_p = (FIRST_STRING_P (match_end)
! 4120: == MATCHING_IN_FIRST_STRING);
! 4121: /* 1 if this match is the best seen so far. */
! 4122: boolean best_match_p;
! 4123:
! 4124: /* AIX compiler got confused when this was combined
! 4125: with the previous declaration. */
! 4126: if (same_str_p)
! 4127: best_match_p = d > match_end;
! 4128: else
! 4129: best_match_p = !MATCHING_IN_FIRST_STRING;
! 4130:
! 4131: DEBUG_PRINT1 ("backtracking.\n");
! 4132:
! 4133: if (!FAIL_STACK_EMPTY ())
! 4134: { /* More failure points to try. */
! 4135:
! 4136: /* If exceeds best match so far, save it. */
! 4137: if (!best_regs_set || best_match_p)
! 4138: {
! 4139: best_regs_set = true;
! 4140: match_end = d;
! 4141:
! 4142: DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
! 4143:
! 4144: for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
! 4145: {
! 4146: best_regstart[mcnt] = regstart[mcnt];
! 4147: best_regend[mcnt] = regend[mcnt];
! 4148: }
! 4149: }
! 4150: goto fail;
! 4151: }
! 4152:
! 4153: /* If no failure points, don't restore garbage. And if
! 4154: last match is real best match, don't restore second
! 4155: best one. */
! 4156: else if (best_regs_set && !best_match_p)
! 4157: {
! 4158: restore_best_regs:
! 4159: /* Restore best match. It may happen that `dend ==
! 4160: end_match_1' while the restored d is in string2.
! 4161: For example, the pattern `x.*y.*z' against the
! 4162: strings `x-' and `y-z-', if the two strings are
! 4163: not consecutive in memory. */
! 4164: DEBUG_PRINT1 ("Restoring best registers.\n");
! 4165:
! 4166: d = match_end;
! 4167: dend = ((d >= string1 && d <= end1)
! 4168: ? end_match_1 : end_match_2);
! 4169:
! 4170: for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
! 4171: {
! 4172: regstart[mcnt] = best_regstart[mcnt];
! 4173: regend[mcnt] = best_regend[mcnt];
! 4174: }
! 4175: }
! 4176: } /* d != end_match_2 */
! 4177:
! 4178: succeed_label:
! 4179: DEBUG_PRINT1 ("Accepting match.\n");
! 4180:
! 4181: /* If caller wants register contents data back, do it. */
! 4182: if (regs && !bufp->no_sub)
! 4183: {
! 4184: /* Have the register data arrays been allocated? */
! 4185: if (bufp->regs_allocated == REGS_UNALLOCATED)
! 4186: { /* No. So allocate them with malloc. We need one
! 4187: extra element beyond `num_regs' for the `-1' marker
! 4188: GNU code uses. */
! 4189: regs->num_regs = MAX (RE_NREGS, num_regs + 1);
! 4190: regs->start = TALLOC (regs->num_regs, regoff_t);
! 4191: regs->end = TALLOC (regs->num_regs, regoff_t);
! 4192: if (regs->start == NULL || regs->end == NULL)
! 4193: {
! 4194: FREE_VARIABLES ();
! 4195: return -2;
! 4196: }
! 4197: bufp->regs_allocated = REGS_REALLOCATE;
! 4198: }
! 4199: else if (bufp->regs_allocated == REGS_REALLOCATE)
! 4200: { /* Yes. If we need more elements than were already
! 4201: allocated, reallocate them. If we need fewer, just
! 4202: leave it alone. */
! 4203: if (regs->num_regs < num_regs + 1)
! 4204: {
! 4205: regs->num_regs = num_regs + 1;
! 4206: RETALLOC (regs->start, regs->num_regs, regoff_t);
! 4207: RETALLOC (regs->end, regs->num_regs, regoff_t);
! 4208: if (regs->start == NULL || regs->end == NULL)
! 4209: {
! 4210: FREE_VARIABLES ();
! 4211: return -2;
! 4212: }
! 4213: }
! 4214: }
! 4215: else
! 4216: {
! 4217: /* These braces fend off a "empty body in an else-statement"
! 4218: warning under GCC when assert expands to nothing. */
! 4219: assert (bufp->regs_allocated == REGS_FIXED);
! 4220: }
! 4221:
! 4222: /* Convert the pointer data in `regstart' and `regend' to
! 4223: indices. Register zero has to be set differently,
! 4224: since we haven't kept track of any info for it. */
! 4225: if (regs->num_regs > 0)
! 4226: {
! 4227: regs->start[0] = pos;
! 4228: regs->end[0] = (MATCHING_IN_FIRST_STRING
! 4229: ? ((regoff_t) (d - string1))
! 4230: : ((regoff_t) (d - string2 + size1)));
! 4231: }
! 4232:
! 4233: /* Go through the first `min (num_regs, regs->num_regs)'
! 4234: registers, since that is all we initialized. */
! 4235: for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
! 4236: mcnt++)
! 4237: {
! 4238: if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
! 4239: regs->start[mcnt] = regs->end[mcnt] = -1;
! 4240: else
! 4241: {
! 4242: regs->start[mcnt]
! 4243: = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
! 4244: regs->end[mcnt]
! 4245: = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
! 4246: }
! 4247: }
! 4248:
! 4249: /* If the regs structure we return has more elements than
! 4250: were in the pattern, set the extra elements to -1. If
! 4251: we (re)allocated the registers, this is the case,
! 4252: because we always allocate enough to have at least one
! 4253: -1 at the end. */
! 4254: for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
! 4255: regs->start[mcnt] = regs->end[mcnt] = -1;
! 4256: } /* regs && !bufp->no_sub */
! 4257:
! 4258: DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
! 4259: nfailure_points_pushed, nfailure_points_popped,
! 4260: nfailure_points_pushed - nfailure_points_popped);
! 4261: DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
! 4262:
! 4263: mcnt = d - pos - (MATCHING_IN_FIRST_STRING
! 4264: ? string1
! 4265: : string2 - size1);
! 4266:
! 4267: DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
! 4268:
! 4269: FREE_VARIABLES ();
! 4270: return mcnt;
! 4271: }
! 4272:
! 4273: /* Otherwise match next pattern command. */
! 4274: switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
! 4275: {
! 4276: /* Ignore these. Used to ignore the n of succeed_n's which
! 4277: currently have n == 0. */
! 4278: case no_op:
! 4279: DEBUG_PRINT1 ("EXECUTING no_op.\n");
! 4280: break;
! 4281:
! 4282: case succeed:
! 4283: DEBUG_PRINT1 ("EXECUTING succeed.\n");
! 4284: goto succeed_label;
! 4285:
! 4286: /* Match the next n pattern characters exactly. The following
! 4287: byte in the pattern defines n, and the n bytes after that
! 4288: are the characters to match. */
! 4289: case exactn:
! 4290: mcnt = *p++;
! 4291: DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
! 4292:
! 4293: /* This is written out as an if-else so we don't waste time
! 4294: testing `translate' inside the loop. */
! 4295: if (translate)
! 4296: {
! 4297: do
! 4298: {
! 4299: PREFETCH ();
! 4300: if ((unsigned char) translate[(unsigned char) *d++]
! 4301: != (unsigned char) *p++)
! 4302: goto fail;
! 4303: }
! 4304: while (--mcnt);
! 4305: }
! 4306: else
! 4307: {
! 4308: do
! 4309: {
! 4310: PREFETCH ();
! 4311: if (*d++ != (char) *p++) goto fail;
! 4312: }
! 4313: while (--mcnt);
! 4314: }
! 4315: SET_REGS_MATCHED ();
! 4316: break;
! 4317:
! 4318:
! 4319: /* Match any character except possibly a newline or a null. */
! 4320: case anychar:
! 4321: DEBUG_PRINT1 ("EXECUTING anychar.\n");
! 4322:
! 4323: PREFETCH ();
! 4324:
! 4325: if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
! 4326: || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
! 4327: goto fail;
! 4328:
! 4329: SET_REGS_MATCHED ();
! 4330: DEBUG_PRINT2 (" Matched `%d'.\n", *d);
! 4331: d++;
! 4332: break;
! 4333:
! 4334:
! 4335: case charset:
! 4336: case charset_not:
! 4337: {
! 4338: register unsigned char c;
! 4339: boolean not = (re_opcode_t) *(p - 1) == charset_not;
! 4340:
! 4341: DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
! 4342:
! 4343: PREFETCH ();
! 4344: c = TRANSLATE (*d); /* The character to match. */
! 4345:
! 4346: /* Cast to `unsigned' instead of `unsigned char' in case the
! 4347: bit list is a full 32 bytes long. */
! 4348: if (c < (unsigned) (*p * BYTEWIDTH)
! 4349: && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
! 4350: not = !not;
! 4351:
! 4352: p += 1 + *p;
! 4353:
! 4354: if (!not) goto fail;
! 4355:
! 4356: SET_REGS_MATCHED ();
! 4357: d++;
! 4358: break;
! 4359: }
! 4360:
! 4361:
! 4362: /* The beginning of a group is represented by start_memory.
! 4363: The arguments are the register number in the next byte, and the
! 4364: number of groups inner to this one in the next. The text
! 4365: matched within the group is recorded (in the internal
! 4366: registers data structure) under the register number. */
! 4367: case start_memory:
! 4368: DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
! 4369:
! 4370: /* Find out if this group can match the empty string. */
! 4371: p1 = p; /* To send to group_match_null_string_p. */
! 4372:
! 4373: if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
! 4374: REG_MATCH_NULL_STRING_P (reg_info[*p])
! 4375: = group_match_null_string_p (&p1, pend, reg_info);
! 4376:
! 4377: /* Save the position in the string where we were the last time
! 4378: we were at this open-group operator in case the group is
! 4379: operated upon by a repetition operator, e.g., with `(a*)*b'
! 4380: against `ab'; then we want to ignore where we are now in
! 4381: the string in case this attempt to match fails. */
! 4382: old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
! 4383: ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
! 4384: : regstart[*p];
! 4385: DEBUG_PRINT2 (" old_regstart: %d\n",
! 4386: POINTER_TO_OFFSET (old_regstart[*p]));
! 4387:
! 4388: regstart[*p] = d;
! 4389: DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
! 4390:
! 4391: IS_ACTIVE (reg_info[*p]) = 1;
! 4392: MATCHED_SOMETHING (reg_info[*p]) = 0;
! 4393:
! 4394: /* Clear this whenever we change the register activity status. */
! 4395: set_regs_matched_done = 0;
! 4396:
! 4397: /* This is the new highest active register. */
! 4398: highest_active_reg = *p;
! 4399:
! 4400: /* If nothing was active before, this is the new lowest active
! 4401: register. */
! 4402: if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
! 4403: lowest_active_reg = *p;
! 4404:
! 4405: /* Move past the register number and inner group count. */
! 4406: p += 2;
! 4407: just_past_start_mem = p;
! 4408:
! 4409: break;
! 4410:
! 4411:
! 4412: /* The stop_memory opcode represents the end of a group. Its
! 4413: arguments are the same as start_memory's: the register
! 4414: number, and the number of inner groups. */
! 4415: case stop_memory:
! 4416: DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
! 4417:
! 4418: /* We need to save the string position the last time we were at
! 4419: this close-group operator in case the group is operated
! 4420: upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
! 4421: against `aba'; then we want to ignore where we are now in
! 4422: the string in case this attempt to match fails. */
! 4423: old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
! 4424: ? REG_UNSET (regend[*p]) ? d : regend[*p]
! 4425: : regend[*p];
! 4426: DEBUG_PRINT2 (" old_regend: %d\n",
! 4427: POINTER_TO_OFFSET (old_regend[*p]));
! 4428:
! 4429: regend[*p] = d;
! 4430: DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
! 4431:
! 4432: /* This register isn't active anymore. */
! 4433: IS_ACTIVE (reg_info[*p]) = 0;
! 4434:
! 4435: /* Clear this whenever we change the register activity status. */
! 4436: set_regs_matched_done = 0;
! 4437:
! 4438: /* If this was the only register active, nothing is active
! 4439: anymore. */
! 4440: if (lowest_active_reg == highest_active_reg)
! 4441: {
! 4442: lowest_active_reg = NO_LOWEST_ACTIVE_REG;
! 4443: highest_active_reg = NO_HIGHEST_ACTIVE_REG;
! 4444: }
! 4445: else
! 4446: { /* We must scan for the new highest active register, since
! 4447: it isn't necessarily one less than now: consider
! 4448: (a(b)c(d(e)f)g). When group 3 ends, after the f), the
! 4449: new highest active register is 1. */
! 4450: unsigned char r = *p - 1;
! 4451: while (r > 0 && !IS_ACTIVE (reg_info[r]))
! 4452: r--;
! 4453:
! 4454: /* If we end up at register zero, that means that we saved
! 4455: the registers as the result of an `on_failure_jump', not
! 4456: a `start_memory', and we jumped to past the innermost
! 4457: `stop_memory'. For example, in ((.)*) we save
! 4458: registers 1 and 2 as a result of the *, but when we pop
! 4459: back to the second ), we are at the stop_memory 1.
! 4460: Thus, nothing is active. */
! 4461: if (r == 0)
! 4462: {
! 4463: lowest_active_reg = NO_LOWEST_ACTIVE_REG;
! 4464: highest_active_reg = NO_HIGHEST_ACTIVE_REG;
! 4465: }
! 4466: else
! 4467: highest_active_reg = r;
! 4468: }
! 4469:
! 4470: /* If just failed to match something this time around with a
! 4471: group that's operated on by a repetition operator, try to
! 4472: force exit from the ``loop'', and restore the register
! 4473: information for this group that we had before trying this
! 4474: last match. */
! 4475: if ((!MATCHED_SOMETHING (reg_info[*p])
! 4476: || just_past_start_mem == p - 1)
! 4477: && (p + 2) < pend)
! 4478: {
! 4479: boolean is_a_jump_n = false;
! 4480:
! 4481: p1 = p + 2;
! 4482: mcnt = 0;
! 4483: switch ((re_opcode_t) *p1++)
! 4484: {
! 4485: case jump_n:
! 4486: is_a_jump_n = true;
! 4487: case pop_failure_jump:
! 4488: case maybe_pop_jump:
! 4489: case jump:
! 4490: case dummy_failure_jump:
! 4491: EXTRACT_NUMBER_AND_INCR (mcnt, p1);
! 4492: if (is_a_jump_n)
! 4493: p1 += 2;
! 4494: break;
! 4495:
! 4496: default:
! 4497: /* do nothing */ ;
! 4498: }
! 4499: p1 += mcnt;
! 4500:
! 4501: /* If the next operation is a jump backwards in the pattern
! 4502: to an on_failure_jump right before the start_memory
! 4503: corresponding to this stop_memory, exit from the loop
! 4504: by forcing a failure after pushing on the stack the
! 4505: on_failure_jump's jump in the pattern, and d. */
! 4506: if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
! 4507: && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
! 4508: {
! 4509: /* If this group ever matched anything, then restore
! 4510: what its registers were before trying this last
! 4511: failed match, e.g., with `(a*)*b' against `ab' for
! 4512: regstart[1], and, e.g., with `((a*)*(b*)*)*'
! 4513: against `aba' for regend[3].
! 4514:
! 4515: Also restore the registers for inner groups for,
! 4516: e.g., `((a*)(b*))*' against `aba' (register 3 would
! 4517: otherwise get trashed). */
! 4518:
! 4519: if (EVER_MATCHED_SOMETHING (reg_info[*p]))
! 4520: {
! 4521: unsigned r;
! 4522:
! 4523: EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
! 4524:
! 4525: /* Restore this and inner groups' (if any) registers. */
! 4526: for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
! 4527: r++)
! 4528: {
! 4529: regstart[r] = old_regstart[r];
! 4530:
! 4531: /* xx why this test? */
! 4532: if (old_regend[r] >= regstart[r])
! 4533: regend[r] = old_regend[r];
! 4534: }
! 4535: }
! 4536: p1++;
! 4537: EXTRACT_NUMBER_AND_INCR (mcnt, p1);
! 4538: PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
! 4539:
! 4540: goto fail;
! 4541: }
! 4542: }
! 4543:
! 4544: /* Move past the register number and the inner group count. */
! 4545: p += 2;
! 4546: break;
! 4547:
! 4548:
! 4549: /* \<digit> has been turned into a `duplicate' command which is
! 4550: followed by the numeric value of <digit> as the register number. */
! 4551: case duplicate:
! 4552: {
! 4553: register const char *d2, *dend2;
! 4554: int regno = *p++; /* Get which register to match against. */
! 4555: DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
! 4556:
! 4557: /* Can't back reference a group which we've never matched. */
! 4558: if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
! 4559: goto fail;
! 4560:
! 4561: /* Where in input to try to start matching. */
! 4562: d2 = regstart[regno];
! 4563:
! 4564: /* Where to stop matching; if both the place to start and
! 4565: the place to stop matching are in the same string, then
! 4566: set to the place to stop, otherwise, for now have to use
! 4567: the end of the first string. */
! 4568:
! 4569: dend2 = ((FIRST_STRING_P (regstart[regno])
! 4570: == FIRST_STRING_P (regend[regno]))
! 4571: ? regend[regno] : end_match_1);
! 4572: for (;;)
! 4573: {
! 4574: /* If necessary, advance to next segment in register
! 4575: contents. */
! 4576: while (d2 == dend2)
! 4577: {
! 4578: if (dend2 == end_match_2) break;
! 4579: if (dend2 == regend[regno]) break;
! 4580:
! 4581: /* End of string1 => advance to string2. */
! 4582: d2 = string2;
! 4583: dend2 = regend[regno];
! 4584: }
! 4585: /* At end of register contents => success */
! 4586: if (d2 == dend2) break;
! 4587:
! 4588: /* If necessary, advance to next segment in data. */
! 4589: PREFETCH ();
! 4590:
! 4591: /* How many characters left in this segment to match. */
! 4592: mcnt = dend - d;
! 4593:
! 4594: /* Want how many consecutive characters we can match in
! 4595: one shot, so, if necessary, adjust the count. */
! 4596: if (mcnt > dend2 - d2)
! 4597: mcnt = dend2 - d2;
! 4598:
! 4599: /* Compare that many; failure if mismatch, else move
! 4600: past them. */
! 4601: if (translate
! 4602: ? bcmp_translate (d, d2, mcnt, translate)
! 4603: : memcmp (d, d2, mcnt))
! 4604: goto fail;
! 4605: d += mcnt, d2 += mcnt;
! 4606:
! 4607: /* Do this because we've match some characters. */
! 4608: SET_REGS_MATCHED ();
! 4609: }
! 4610: }
! 4611: break;
! 4612:
! 4613:
! 4614: /* begline matches the empty string at the beginning of the string
! 4615: (unless `not_bol' is set in `bufp'), and, if
! 4616: `newline_anchor' is set, after newlines. */
! 4617: case begline:
! 4618: DEBUG_PRINT1 ("EXECUTING begline.\n");
! 4619:
! 4620: if (AT_STRINGS_BEG (d))
! 4621: {
! 4622: if (!bufp->not_bol) break;
! 4623: }
! 4624: else if (d[-1] == '\n' && bufp->newline_anchor)
! 4625: {
! 4626: break;
! 4627: }
! 4628: /* In all other cases, we fail. */
! 4629: goto fail;
! 4630:
! 4631:
! 4632: /* endline is the dual of begline. */
! 4633: case endline:
! 4634: DEBUG_PRINT1 ("EXECUTING endline.\n");
! 4635:
! 4636: if (AT_STRINGS_END (d))
! 4637: {
! 4638: if (!bufp->not_eol) break;
! 4639: }
! 4640:
! 4641: /* We have to ``prefetch'' the next character. */
! 4642: else if ((d == end1 ? *string2 : *d) == '\n'
! 4643: && bufp->newline_anchor)
! 4644: {
! 4645: break;
! 4646: }
! 4647: goto fail;
! 4648:
! 4649:
! 4650: /* Match at the very beginning of the data. */
! 4651: case begbuf:
! 4652: DEBUG_PRINT1 ("EXECUTING begbuf.\n");
! 4653: if (AT_STRINGS_BEG (d))
! 4654: break;
! 4655: goto fail;
! 4656:
! 4657:
! 4658: /* Match at the very end of the data. */
! 4659: case endbuf:
! 4660: DEBUG_PRINT1 ("EXECUTING endbuf.\n");
! 4661: if (AT_STRINGS_END (d))
! 4662: break;
! 4663: goto fail;
! 4664:
! 4665:
! 4666: /* on_failure_keep_string_jump is used to optimize `.*\n'. It
! 4667: pushes NULL as the value for the string on the stack. Then
! 4668: `pop_failure_point' will keep the current value for the
! 4669: string, instead of restoring it. To see why, consider
! 4670: matching `foo\nbar' against `.*\n'. The .* matches the foo;
! 4671: then the . fails against the \n. But the next thing we want
! 4672: to do is match the \n against the \n; if we restored the
! 4673: string value, we would be back at the foo.
! 4674:
! 4675: Because this is used only in specific cases, we don't need to
! 4676: check all the things that `on_failure_jump' does, to make
! 4677: sure the right things get saved on the stack. Hence we don't
! 4678: share its code. The only reason to push anything on the
! 4679: stack at all is that otherwise we would have to change
! 4680: `anychar's code to do something besides goto fail in this
! 4681: case; that seems worse than this. */
! 4682: case on_failure_keep_string_jump:
! 4683: DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
! 4684:
! 4685: EXTRACT_NUMBER_AND_INCR (mcnt, p);
! 4686: #ifdef _LIBC
! 4687: DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
! 4688: #else
! 4689: DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
! 4690: #endif
! 4691:
! 4692: PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
! 4693: break;
! 4694:
! 4695:
! 4696: /* Uses of on_failure_jump:
! 4697:
! 4698: Each alternative starts with an on_failure_jump that points
! 4699: to the beginning of the next alternative. Each alternative
! 4700: except the last ends with a jump that in effect jumps past
! 4701: the rest of the alternatives. (They really jump to the
! 4702: ending jump of the following alternative, because tensioning
! 4703: these jumps is a hassle.)
! 4704:
! 4705: Repeats start with an on_failure_jump that points past both
! 4706: the repetition text and either the following jump or
! 4707: pop_failure_jump back to this on_failure_jump. */
! 4708: case on_failure_jump:
! 4709: on_failure:
! 4710: DEBUG_PRINT1 ("EXECUTING on_failure_jump");
! 4711:
! 4712: EXTRACT_NUMBER_AND_INCR (mcnt, p);
! 4713: #ifdef _LIBC
! 4714: DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
! 4715: #else
! 4716: DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
! 4717: #endif
! 4718:
! 4719: /* If this on_failure_jump comes right before a group (i.e.,
! 4720: the original * applied to a group), save the information
! 4721: for that group and all inner ones, so that if we fail back
! 4722: to this point, the group's information will be correct.
! 4723: For example, in \(a*\)*\1, we need the preceding group,
! 4724: and in \(zz\(a*\)b*\)\2, we need the inner group. */
! 4725:
! 4726: /* We can't use `p' to check ahead because we push
! 4727: a failure point to `p + mcnt' after we do this. */
! 4728: p1 = p;
! 4729:
! 4730: /* We need to skip no_op's before we look for the
! 4731: start_memory in case this on_failure_jump is happening as
! 4732: the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
! 4733: against aba. */
! 4734: while (p1 < pend && (re_opcode_t) *p1 == no_op)
! 4735: p1++;
! 4736:
! 4737: if (p1 < pend && (re_opcode_t) *p1 == start_memory)
! 4738: {
! 4739: /* We have a new highest active register now. This will
! 4740: get reset at the start_memory we are about to get to,
! 4741: but we will have saved all the registers relevant to
! 4742: this repetition op, as described above. */
! 4743: highest_active_reg = *(p1 + 1) + *(p1 + 2);
! 4744: if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
! 4745: lowest_active_reg = *(p1 + 1);
! 4746: }
! 4747:
! 4748: DEBUG_PRINT1 (":\n");
! 4749: PUSH_FAILURE_POINT (p + mcnt, d, -2);
! 4750: break;
! 4751:
! 4752:
! 4753: /* A smart repeat ends with `maybe_pop_jump'.
! 4754: We change it to either `pop_failure_jump' or `jump'. */
! 4755: case maybe_pop_jump:
! 4756: EXTRACT_NUMBER_AND_INCR (mcnt, p);
! 4757: DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
! 4758: {
! 4759: register unsigned char *p2 = p;
! 4760:
! 4761: /* Compare the beginning of the repeat with what in the
! 4762: pattern follows its end. If we can establish that there
! 4763: is nothing that they would both match, i.e., that we
! 4764: would have to backtrack because of (as in, e.g., `a*a')
! 4765: then we can change to pop_failure_jump, because we'll
! 4766: never have to backtrack.
! 4767:
! 4768: This is not true in the case of alternatives: in
! 4769: `(a|ab)*' we do need to backtrack to the `ab' alternative
! 4770: (e.g., if the string was `ab'). But instead of trying to
! 4771: detect that here, the alternative has put on a dummy
! 4772: failure point which is what we will end up popping. */
! 4773:
! 4774: /* Skip over open/close-group commands.
! 4775: If what follows this loop is a ...+ construct,
! 4776: look at what begins its body, since we will have to
! 4777: match at least one of that. */
! 4778: while (1)
! 4779: {
! 4780: if (p2 + 2 < pend
! 4781: && ((re_opcode_t) *p2 == stop_memory
! 4782: || (re_opcode_t) *p2 == start_memory))
! 4783: p2 += 3;
! 4784: else if (p2 + 6 < pend
! 4785: && (re_opcode_t) *p2 == dummy_failure_jump)
! 4786: p2 += 6;
! 4787: else
! 4788: break;
! 4789: }
! 4790:
! 4791: p1 = p + mcnt;
! 4792: /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
! 4793: to the `maybe_finalize_jump' of this case. Examine what
! 4794: follows. */
! 4795:
! 4796: /* If we're at the end of the pattern, we can change. */
! 4797: if (p2 == pend)
! 4798: {
! 4799: /* Consider what happens when matching ":\(.*\)"
! 4800: against ":/". I don't really understand this code
! 4801: yet. */
! 4802: p[-3] = (unsigned char) pop_failure_jump;
! 4803: DEBUG_PRINT1
! 4804: (" End of pattern: change to `pop_failure_jump'.\n");
! 4805: }
! 4806:
! 4807: else if ((re_opcode_t) *p2 == exactn
! 4808: || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
! 4809: {
! 4810: register unsigned char c
! 4811: = *p2 == (unsigned char) endline ? '\n' : p2[2];
! 4812:
! 4813: if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
! 4814: {
! 4815: p[-3] = (unsigned char) pop_failure_jump;
! 4816: DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
! 4817: c, p1[5]);
! 4818: }
! 4819:
! 4820: else if ((re_opcode_t) p1[3] == charset
! 4821: || (re_opcode_t) p1[3] == charset_not)
! 4822: {
! 4823: int not = (re_opcode_t) p1[3] == charset_not;
! 4824:
! 4825: if (c < (unsigned char) (p1[4] * BYTEWIDTH)
! 4826: && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
! 4827: not = !not;
! 4828:
! 4829: /* `not' is equal to 1 if c would match, which means
! 4830: that we can't change to pop_failure_jump. */
! 4831: if (!not)
! 4832: {
! 4833: p[-3] = (unsigned char) pop_failure_jump;
! 4834: DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
! 4835: }
! 4836: }
! 4837: }
! 4838: else if ((re_opcode_t) *p2 == charset)
! 4839: {
! 4840: #ifdef DEBUG
! 4841: register unsigned char c
! 4842: = *p2 == (unsigned char) endline ? '\n' : p2[2];
! 4843: #endif
! 4844:
! 4845: #if 0
! 4846: if ((re_opcode_t) p1[3] == exactn
! 4847: && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
! 4848: && (p2[2 + p1[5] / BYTEWIDTH]
! 4849: & (1 << (p1[5] % BYTEWIDTH)))))
! 4850: #else
! 4851: if ((re_opcode_t) p1[3] == exactn
! 4852: && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
! 4853: && (p2[2 + p1[4] / BYTEWIDTH]
! 4854: & (1 << (p1[4] % BYTEWIDTH)))))
! 4855: #endif
! 4856: {
! 4857: p[-3] = (unsigned char) pop_failure_jump;
! 4858: DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
! 4859: c, p1[5]);
! 4860: }
! 4861:
! 4862: else if ((re_opcode_t) p1[3] == charset_not)
! 4863: {
! 4864: int idx;
! 4865: /* We win if the charset_not inside the loop
! 4866: lists every character listed in the charset after. */
! 4867: for (idx = 0; idx < (int) p2[1]; idx++)
! 4868: if (! (p2[2 + idx] == 0
! 4869: || (idx < (int) p1[4]
! 4870: && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
! 4871: break;
! 4872:
! 4873: if (idx == p2[1])
! 4874: {
! 4875: p[-3] = (unsigned char) pop_failure_jump;
! 4876: DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
! 4877: }
! 4878: }
! 4879: else if ((re_opcode_t) p1[3] == charset)
! 4880: {
! 4881: int idx;
! 4882: /* We win if the charset inside the loop
! 4883: has no overlap with the one after the loop. */
! 4884: for (idx = 0;
! 4885: idx < (int) p2[1] && idx < (int) p1[4];
! 4886: idx++)
! 4887: if ((p2[2 + idx] & p1[5 + idx]) != 0)
! 4888: break;
! 4889:
! 4890: if (idx == p2[1] || idx == p1[4])
! 4891: {
! 4892: p[-3] = (unsigned char) pop_failure_jump;
! 4893: DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
! 4894: }
! 4895: }
! 4896: }
! 4897: }
! 4898: p -= 2; /* Point at relative address again. */
! 4899: if ((re_opcode_t) p[-1] != pop_failure_jump)
! 4900: {
! 4901: p[-1] = (unsigned char) jump;
! 4902: DEBUG_PRINT1 (" Match => jump.\n");
! 4903: goto unconditional_jump;
! 4904: }
! 4905: /* Note fall through. */
! 4906:
! 4907:
! 4908: /* The end of a simple repeat has a pop_failure_jump back to
! 4909: its matching on_failure_jump, where the latter will push a
! 4910: failure point. The pop_failure_jump takes off failure
! 4911: points put on by this pop_failure_jump's matching
! 4912: on_failure_jump; we got through the pattern to here from the
! 4913: matching on_failure_jump, so didn't fail. */
! 4914: case pop_failure_jump:
! 4915: {
! 4916: /* We need to pass separate storage for the lowest and
! 4917: highest registers, even though we don't care about the
! 4918: actual values. Otherwise, we will restore only one
! 4919: register from the stack, since lowest will == highest in
! 4920: `pop_failure_point'. */
! 4921: active_reg_t dummy_low_reg, dummy_high_reg;
! 4922: unsigned char *pdummy;
! 4923: const char *sdummy;
! 4924:
! 4925: DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
! 4926: POP_FAILURE_POINT (sdummy, pdummy,
! 4927: dummy_low_reg, dummy_high_reg,
! 4928: reg_dummy, reg_dummy, reg_info_dummy);
! 4929: }
! 4930: /* Note fall through. */
! 4931:
! 4932: unconditional_jump:
! 4933: #ifdef _LIBC
! 4934: DEBUG_PRINT2 ("\n%p: ", p);
! 4935: #else
! 4936: DEBUG_PRINT2 ("\n0x%x: ", p);
! 4937: #endif
! 4938: /* Note fall through. */
! 4939:
! 4940: /* Unconditionally jump (without popping any failure points). */
! 4941: case jump:
! 4942: EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
! 4943: DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
! 4944: p += mcnt; /* Do the jump. */
! 4945: #ifdef _LIBC
! 4946: DEBUG_PRINT2 ("(to %p).\n", p);
! 4947: #else
! 4948: DEBUG_PRINT2 ("(to 0x%x).\n", p);
! 4949: #endif
! 4950: break;
! 4951:
! 4952:
! 4953: /* We need this opcode so we can detect where alternatives end
! 4954: in `group_match_null_string_p' et al. */
! 4955: case jump_past_alt:
! 4956: DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
! 4957: goto unconditional_jump;
! 4958:
! 4959:
! 4960: /* Normally, the on_failure_jump pushes a failure point, which
! 4961: then gets popped at pop_failure_jump. We will end up at
! 4962: pop_failure_jump, also, and with a pattern of, say, `a+', we
! 4963: are skipping over the on_failure_jump, so we have to push
! 4964: something meaningless for pop_failure_jump to pop. */
! 4965: case dummy_failure_jump:
! 4966: DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
! 4967: /* It doesn't matter what we push for the string here. What
! 4968: the code at `fail' tests is the value for the pattern. */
! 4969: PUSH_FAILURE_POINT (NULL, NULL, -2);
! 4970: goto unconditional_jump;
! 4971:
! 4972:
! 4973: /* At the end of an alternative, we need to push a dummy failure
! 4974: point in case we are followed by a `pop_failure_jump', because
! 4975: we don't want the failure point for the alternative to be
! 4976: popped. For example, matching `(a|ab)*' against `aab'
! 4977: requires that we match the `ab' alternative. */
! 4978: case push_dummy_failure:
! 4979: DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
! 4980: /* See comments just above at `dummy_failure_jump' about the
! 4981: two zeroes. */
! 4982: PUSH_FAILURE_POINT (NULL, NULL, -2);
! 4983: break;
! 4984:
! 4985: /* Have to succeed matching what follows at least n times.
! 4986: After that, handle like `on_failure_jump'. */
! 4987: case succeed_n:
! 4988: EXTRACT_NUMBER (mcnt, p + 2);
! 4989: DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
! 4990:
! 4991: assert (mcnt >= 0);
! 4992: /* Originally, this is how many times we HAVE to succeed. */
! 4993: if (mcnt > 0)
! 4994: {
! 4995: mcnt--;
! 4996: p += 2;
! 4997: STORE_NUMBER_AND_INCR (p, mcnt);
! 4998: #ifdef _LIBC
! 4999: DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt);
! 5000: #else
! 5001: DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt);
! 5002: #endif
! 5003: }
! 5004: else if (mcnt == 0)
! 5005: {
! 5006: #ifdef _LIBC
! 5007: DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2);
! 5008: #else
! 5009: DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
! 5010: #endif
! 5011: p[2] = (unsigned char) no_op;
! 5012: p[3] = (unsigned char) no_op;
! 5013: goto on_failure;
! 5014: }
! 5015: break;
! 5016:
! 5017: case jump_n:
! 5018: EXTRACT_NUMBER (mcnt, p + 2);
! 5019: DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
! 5020:
! 5021: /* Originally, this is how many times we CAN jump. */
! 5022: if (mcnt)
! 5023: {
! 5024: mcnt--;
! 5025: STORE_NUMBER (p + 2, mcnt);
! 5026: #ifdef _LIBC
! 5027: DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt);
! 5028: #else
! 5029: DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt);
! 5030: #endif
! 5031: goto unconditional_jump;
! 5032: }
! 5033: /* If don't have to jump any more, skip over the rest of command. */
! 5034: else
! 5035: p += 4;
! 5036: break;
! 5037:
! 5038: case set_number_at:
! 5039: {
! 5040: DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
! 5041:
! 5042: EXTRACT_NUMBER_AND_INCR (mcnt, p);
! 5043: p1 = p + mcnt;
! 5044: EXTRACT_NUMBER_AND_INCR (mcnt, p);
! 5045: #ifdef _LIBC
! 5046: DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
! 5047: #else
! 5048: DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
! 5049: #endif
! 5050: STORE_NUMBER (p1, mcnt);
! 5051: break;
! 5052: }
! 5053:
! 5054: #if 0
! 5055: /* The DEC Alpha C compiler 3.x generates incorrect code for the
! 5056: test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
! 5057: AT_WORD_BOUNDARY, so this code is disabled. Expanding the
! 5058: macro and introducing temporary variables works around the bug. */
! 5059:
! 5060: case wordbound:
! 5061: DEBUG_PRINT1 ("EXECUTING wordbound.\n");
! 5062: if (AT_WORD_BOUNDARY (d))
! 5063: break;
! 5064: goto fail;
! 5065:
! 5066: case notwordbound:
! 5067: DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
! 5068: if (AT_WORD_BOUNDARY (d))
! 5069: goto fail;
! 5070: break;
! 5071: #else
! 5072: case wordbound:
! 5073: {
! 5074: boolean prevchar, thischar;
! 5075:
! 5076: DEBUG_PRINT1 ("EXECUTING wordbound.\n");
! 5077: if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
! 5078: break;
! 5079:
! 5080: prevchar = WORDCHAR_P (d - 1);
! 5081: thischar = WORDCHAR_P (d);
! 5082: if (prevchar != thischar)
! 5083: break;
! 5084: goto fail;
! 5085: }
! 5086:
! 5087: case notwordbound:
! 5088: {
! 5089: boolean prevchar, thischar;
! 5090:
! 5091: DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
! 5092: if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
! 5093: goto fail;
! 5094:
! 5095: prevchar = WORDCHAR_P (d - 1);
! 5096: thischar = WORDCHAR_P (d);
! 5097: if (prevchar != thischar)
! 5098: goto fail;
! 5099: break;
! 5100: }
! 5101: #endif
! 5102:
! 5103: case wordbeg:
! 5104: DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
! 5105: if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
! 5106: break;
! 5107: goto fail;
! 5108:
! 5109: case wordend:
! 5110: DEBUG_PRINT1 ("EXECUTING wordend.\n");
! 5111: if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
! 5112: && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
! 5113: break;
! 5114: goto fail;
! 5115:
! 5116: #ifdef emacs
! 5117: case before_dot:
! 5118: DEBUG_PRINT1 ("EXECUTING before_dot.\n");
! 5119: if (PTR_CHAR_POS ((unsigned char *) d) >= point)
! 5120: goto fail;
! 5121: break;
! 5122:
! 5123: case at_dot:
! 5124: DEBUG_PRINT1 ("EXECUTING at_dot.\n");
! 5125: if (PTR_CHAR_POS ((unsigned char *) d) != point)
! 5126: goto fail;
! 5127: break;
! 5128:
! 5129: case after_dot:
! 5130: DEBUG_PRINT1 ("EXECUTING after_dot.\n");
! 5131: if (PTR_CHAR_POS ((unsigned char *) d) <= point)
! 5132: goto fail;
! 5133: break;
! 5134:
! 5135: case syntaxspec:
! 5136: DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
! 5137: mcnt = *p++;
! 5138: goto matchsyntax;
! 5139:
! 5140: case wordchar:
! 5141: DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
! 5142: mcnt = (int) Sword;
! 5143: matchsyntax:
! 5144: PREFETCH ();
! 5145: /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
! 5146: d++;
! 5147: if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
! 5148: goto fail;
! 5149: SET_REGS_MATCHED ();
! 5150: break;
! 5151:
! 5152: case notsyntaxspec:
! 5153: DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
! 5154: mcnt = *p++;
! 5155: goto matchnotsyntax;
! 5156:
! 5157: case notwordchar:
! 5158: DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
! 5159: mcnt = (int) Sword;
! 5160: matchnotsyntax:
! 5161: PREFETCH ();
! 5162: /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
! 5163: d++;
! 5164: if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
! 5165: goto fail;
! 5166: SET_REGS_MATCHED ();
! 5167: break;
! 5168:
! 5169: #else /* not emacs */
! 5170: case wordchar:
! 5171: DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
! 5172: PREFETCH ();
! 5173: if (!WORDCHAR_P (d))
! 5174: goto fail;
! 5175: SET_REGS_MATCHED ();
! 5176: d++;
! 5177: break;
! 5178:
! 5179: case notwordchar:
! 5180: DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
! 5181: PREFETCH ();
! 5182: if (WORDCHAR_P (d))
! 5183: goto fail;
! 5184: SET_REGS_MATCHED ();
! 5185: d++;
! 5186: break;
! 5187: #endif /* not emacs */
! 5188:
! 5189: default:
! 5190: abort ();
! 5191: }
! 5192: continue; /* Successfully executed one pattern command; keep going. */
! 5193:
! 5194:
! 5195: /* We goto here if a matching operation fails. */
! 5196: fail:
! 5197: if (!FAIL_STACK_EMPTY ())
! 5198: { /* A restart point is known. Restore to that state. */
! 5199: DEBUG_PRINT1 ("\nFAIL:\n");
! 5200: POP_FAILURE_POINT (d, p,
! 5201: lowest_active_reg, highest_active_reg,
! 5202: regstart, regend, reg_info);
! 5203:
! 5204: /* If this failure point is a dummy, try the next one. */
! 5205: if (!p)
! 5206: goto fail;
! 5207:
! 5208: /* If we failed to the end of the pattern, don't examine *p. */
! 5209: assert (p <= pend);
! 5210: if (p < pend)
! 5211: {
! 5212: boolean is_a_jump_n = false;
! 5213:
! 5214: /* If failed to a backwards jump that's part of a repetition
! 5215: loop, need to pop this failure point and use the next one. */
! 5216: switch ((re_opcode_t) *p)
! 5217: {
! 5218: case jump_n:
! 5219: is_a_jump_n = true;
! 5220: case maybe_pop_jump:
! 5221: case pop_failure_jump:
! 5222: case jump:
! 5223: p1 = p + 1;
! 5224: EXTRACT_NUMBER_AND_INCR (mcnt, p1);
! 5225: p1 += mcnt;
! 5226:
! 5227: if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
! 5228: || (!is_a_jump_n
! 5229: && (re_opcode_t) *p1 == on_failure_jump))
! 5230: goto fail;
! 5231: break;
! 5232: default:
! 5233: /* do nothing */ ;
! 5234: }
! 5235: }
! 5236:
! 5237: if (d >= string1 && d <= end1)
! 5238: dend = end_match_1;
! 5239: }
! 5240: else
! 5241: break; /* Matching at this starting point really fails. */
! 5242: } /* for (;;) */
! 5243:
! 5244: if (best_regs_set)
! 5245: goto restore_best_regs;
! 5246:
! 5247: FREE_VARIABLES ();
! 5248:
! 5249: return -1; /* Failure to match. */
! 5250: } /* re_match_2 */
! 5251:
! 5252: /* Subroutine definitions for re_match_2. */
! 5253:
! 5254:
! 5255: /* We are passed P pointing to a register number after a start_memory.
! 5256:
! 5257: Return true if the pattern up to the corresponding stop_memory can
! 5258: match the empty string, and false otherwise.
! 5259:
! 5260: If we find the matching stop_memory, sets P to point to one past its number.
! 5261: Otherwise, sets P to an undefined byte less than or equal to END.
! 5262:
! 5263: We don't handle duplicates properly (yet). */
! 5264:
! 5265: static boolean
! 5266: group_match_null_string_p (p, end, reg_info)
! 5267: unsigned char **p, *end;
! 5268: register_info_type *reg_info;
! 5269: {
! 5270: int mcnt;
! 5271: /* Point to after the args to the start_memory. */
! 5272: unsigned char *p1 = *p + 2;
! 5273:
! 5274: while (p1 < end)
! 5275: {
! 5276: /* Skip over opcodes that can match nothing, and return true or
! 5277: false, as appropriate, when we get to one that can't, or to the
! 5278: matching stop_memory. */
! 5279:
! 5280: switch ((re_opcode_t) *p1)
! 5281: {
! 5282: /* Could be either a loop or a series of alternatives. */
! 5283: case on_failure_jump:
! 5284: p1++;
! 5285: EXTRACT_NUMBER_AND_INCR (mcnt, p1);
! 5286:
! 5287: /* If the next operation is not a jump backwards in the
! 5288: pattern. */
! 5289:
! 5290: if (mcnt >= 0)
! 5291: {
! 5292: /* Go through the on_failure_jumps of the alternatives,
! 5293: seeing if any of the alternatives cannot match nothing.
! 5294: The last alternative starts with only a jump,
! 5295: whereas the rest start with on_failure_jump and end
! 5296: with a jump, e.g., here is the pattern for `a|b|c':
! 5297:
! 5298: /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
! 5299: /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
! 5300: /exactn/1/c
! 5301:
! 5302: So, we have to first go through the first (n-1)
! 5303: alternatives and then deal with the last one separately. */
! 5304:
! 5305:
! 5306: /* Deal with the first (n-1) alternatives, which start
! 5307: with an on_failure_jump (see above) that jumps to right
! 5308: past a jump_past_alt. */
! 5309:
! 5310: while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
! 5311: {
! 5312: /* `mcnt' holds how many bytes long the alternative
! 5313: is, including the ending `jump_past_alt' and
! 5314: its number. */
! 5315:
! 5316: if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
! 5317: reg_info))
! 5318: return false;
! 5319:
! 5320: /* Move to right after this alternative, including the
! 5321: jump_past_alt. */
! 5322: p1 += mcnt;
! 5323:
! 5324: /* Break if it's the beginning of an n-th alternative
! 5325: that doesn't begin with an on_failure_jump. */
! 5326: if ((re_opcode_t) *p1 != on_failure_jump)
! 5327: break;
! 5328:
! 5329: /* Still have to check that it's not an n-th
! 5330: alternative that starts with an on_failure_jump. */
! 5331: p1++;
! 5332: EXTRACT_NUMBER_AND_INCR (mcnt, p1);
! 5333: if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
! 5334: {
! 5335: /* Get to the beginning of the n-th alternative. */
! 5336: p1 -= 3;
! 5337: break;
! 5338: }
! 5339: }
! 5340:
! 5341: /* Deal with the last alternative: go back and get number
! 5342: of the `jump_past_alt' just before it. `mcnt' contains
! 5343: the length of the alternative. */
! 5344: EXTRACT_NUMBER (mcnt, p1 - 2);
! 5345:
! 5346: if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
! 5347: return false;
! 5348:
! 5349: p1 += mcnt; /* Get past the n-th alternative. */
! 5350: } /* if mcnt > 0 */
! 5351: break;
! 5352:
! 5353:
! 5354: case stop_memory:
! 5355: assert (p1[1] == **p);
! 5356: *p = p1 + 2;
! 5357: return true;
! 5358:
! 5359:
! 5360: default:
! 5361: if (!common_op_match_null_string_p (&p1, end, reg_info))
! 5362: return false;
! 5363: }
! 5364: } /* while p1 < end */
! 5365:
! 5366: return false;
! 5367: } /* group_match_null_string_p */
! 5368:
! 5369:
! 5370: /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
! 5371: It expects P to be the first byte of a single alternative and END one
! 5372: byte past the last. The alternative can contain groups. */
! 5373:
! 5374: static boolean
! 5375: alt_match_null_string_p (p, end, reg_info)
! 5376: unsigned char *p, *end;
! 5377: register_info_type *reg_info;
! 5378: {
! 5379: int mcnt;
! 5380: unsigned char *p1 = p;
! 5381:
! 5382: while (p1 < end)
! 5383: {
! 5384: /* Skip over opcodes that can match nothing, and break when we get
! 5385: to one that can't. */
! 5386:
! 5387: switch ((re_opcode_t) *p1)
! 5388: {
! 5389: /* It's a loop. */
! 5390: case on_failure_jump:
! 5391: p1++;
! 5392: EXTRACT_NUMBER_AND_INCR (mcnt, p1);
! 5393: p1 += mcnt;
! 5394: break;
! 5395:
! 5396: default:
! 5397: if (!common_op_match_null_string_p (&p1, end, reg_info))
! 5398: return false;
! 5399: }
! 5400: } /* while p1 < end */
! 5401:
! 5402: return true;
! 5403: } /* alt_match_null_string_p */
! 5404:
! 5405:
! 5406: /* Deals with the ops common to group_match_null_string_p and
! 5407: alt_match_null_string_p.
! 5408:
! 5409: Sets P to one after the op and its arguments, if any. */
! 5410:
! 5411: static boolean
! 5412: common_op_match_null_string_p (p, end, reg_info)
! 5413: unsigned char **p, *end;
! 5414: register_info_type *reg_info;
! 5415: {
! 5416: int mcnt;
! 5417: boolean ret;
! 5418: int reg_no;
! 5419: unsigned char *p1 = *p;
! 5420:
! 5421: switch ((re_opcode_t) *p1++)
! 5422: {
! 5423: case no_op:
! 5424: case begline:
! 5425: case endline:
! 5426: case begbuf:
! 5427: case endbuf:
! 5428: case wordbeg:
! 5429: case wordend:
! 5430: case wordbound:
! 5431: case notwordbound:
! 5432: #ifdef emacs
! 5433: case before_dot:
! 5434: case at_dot:
! 5435: case after_dot:
! 5436: #endif
! 5437: break;
! 5438:
! 5439: case start_memory:
! 5440: reg_no = *p1;
! 5441: assert (reg_no > 0 && reg_no <= MAX_REGNUM);
! 5442: ret = group_match_null_string_p (&p1, end, reg_info);
! 5443:
! 5444: /* Have to set this here in case we're checking a group which
! 5445: contains a group and a back reference to it. */
! 5446:
! 5447: if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
! 5448: REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
! 5449:
! 5450: if (!ret)
! 5451: return false;
! 5452: break;
! 5453:
! 5454: /* If this is an optimized succeed_n for zero times, make the jump. */
! 5455: case jump:
! 5456: EXTRACT_NUMBER_AND_INCR (mcnt, p1);
! 5457: if (mcnt >= 0)
! 5458: p1 += mcnt;
! 5459: else
! 5460: return false;
! 5461: break;
! 5462:
! 5463: case succeed_n:
! 5464: /* Get to the number of times to succeed. */
! 5465: p1 += 2;
! 5466: EXTRACT_NUMBER_AND_INCR (mcnt, p1);
! 5467:
! 5468: if (mcnt == 0)
! 5469: {
! 5470: p1 -= 4;
! 5471: EXTRACT_NUMBER_AND_INCR (mcnt, p1);
! 5472: p1 += mcnt;
! 5473: }
! 5474: else
! 5475: return false;
! 5476: break;
! 5477:
! 5478: case duplicate:
! 5479: if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
! 5480: return false;
! 5481: break;
! 5482:
! 5483: case set_number_at:
! 5484: p1 += 4;
! 5485:
! 5486: default:
! 5487: /* All other opcodes mean we cannot match the empty string. */
! 5488: return false;
! 5489: }
! 5490:
! 5491: *p = p1;
! 5492: return true;
! 5493: } /* common_op_match_null_string_p */
! 5494:
! 5495:
! 5496: /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
! 5497: bytes; nonzero otherwise. */
! 5498:
! 5499: static int
! 5500: bcmp_translate (s1, s2, len, translate)
! 5501: const char *s1, *s2;
! 5502: register int len;
! 5503: RE_TRANSLATE_TYPE translate;
! 5504: {
! 5505: register const unsigned char *p1 = (const unsigned char *) s1;
! 5506: register const unsigned char *p2 = (const unsigned char *) s2;
! 5507: while (len)
! 5508: {
! 5509: if (translate[*p1++] != translate[*p2++]) return 1;
! 5510: len--;
! 5511: }
! 5512: return 0;
! 5513: }
! 5514:
! 5515: /* Entry points for GNU code. */
! 5516:
! 5517: /* re_compile_pattern is the GNU regular expression compiler: it
! 5518: compiles PATTERN (of length SIZE) and puts the result in BUFP.
! 5519: Returns 0 if the pattern was valid, otherwise an error string.
! 5520:
! 5521: Assumes the `allocated' (and perhaps `buffer') and `translate' fields
! 5522: are set in BUFP on entry.
! 5523:
! 5524: We call regex_compile to do the actual compilation. */
! 5525:
! 5526: const char *
! 5527: re_compile_pattern (pattern, length, bufp)
! 5528: const char *pattern;
! 5529: size_t length;
! 5530: struct re_pattern_buffer *bufp;
! 5531: {
! 5532: reg_errcode_t ret;
! 5533:
! 5534: /* GNU code is written to assume at least RE_NREGS registers will be set
! 5535: (and at least one extra will be -1). */
! 5536: bufp->regs_allocated = REGS_UNALLOCATED;
! 5537:
! 5538: /* And GNU code determines whether or not to get register information
! 5539: by passing null for the REGS argument to re_match, etc., not by
! 5540: setting no_sub. */
! 5541: bufp->no_sub = 0;
! 5542:
! 5543: /* Match anchors at newline. */
! 5544: bufp->newline_anchor = 1;
! 5545:
! 5546: ret = regex_compile (pattern, length, re_syntax_options, bufp);
! 5547:
! 5548: if (!ret)
! 5549: return NULL;
! 5550: return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
! 5551: }
! 5552: #ifdef _LIBC
! 5553: weak_alias (__re_compile_pattern, re_compile_pattern)
! 5554: #endif
! 5555:
! 5556: /* Entry points compatible with 4.2 BSD regex library. We don't define
! 5557: them unless specifically requested. */
! 5558:
! 5559: #if defined _REGEX_RE_COMP || defined _LIBC
! 5560:
! 5561: /* BSD has one and only one pattern buffer. */
! 5562: static struct re_pattern_buffer re_comp_buf;
! 5563:
! 5564: char *
! 5565: #ifdef _LIBC
! 5566: /* Make these definitions weak in libc, so POSIX programs can redefine
! 5567: these names if they don't use our functions, and still use
! 5568: regcomp/regexec below without link errors. */
! 5569: weak_function
! 5570: #endif
! 5571: re_comp (s)
! 5572: const char *s;
! 5573: {
! 5574: reg_errcode_t ret;
! 5575:
! 5576: if (!s)
! 5577: {
! 5578: if (!re_comp_buf.buffer)
! 5579: return gettext ("No previous regular expression");
! 5580: return 0;
! 5581: }
! 5582:
! 5583: if (!re_comp_buf.buffer)
! 5584: {
! 5585: re_comp_buf.buffer = (unsigned char *) malloc (200);
! 5586: if (re_comp_buf.buffer == NULL)
! 5587: return (char *) gettext (re_error_msgid
! 5588: + re_error_msgid_idx[(int) REG_ESPACE]);
! 5589: re_comp_buf.allocated = 200;
! 5590:
! 5591: re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
! 5592: if (re_comp_buf.fastmap == NULL)
! 5593: return (char *) gettext (re_error_msgid
! 5594: + re_error_msgid_idx[(int) REG_ESPACE]);
! 5595: }
! 5596:
! 5597: /* Since `re_exec' always passes NULL for the `regs' argument, we
! 5598: don't need to initialize the pattern buffer fields which affect it. */
! 5599:
! 5600: /* Match anchors at newlines. */
! 5601: re_comp_buf.newline_anchor = 1;
! 5602:
! 5603: ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
! 5604:
! 5605: if (!ret)
! 5606: return NULL;
! 5607:
! 5608: /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
! 5609: return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
! 5610: }
! 5611:
! 5612:
! 5613: int
! 5614: #ifdef _LIBC
! 5615: weak_function
! 5616: #endif
! 5617: re_exec (s)
! 5618: const char *s;
! 5619: {
! 5620: const int len = strlen (s);
! 5621: return
! 5622: 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
! 5623: }
! 5624:
! 5625: #endif /* _REGEX_RE_COMP */
! 5626:
! 5627: /* POSIX.2 functions. Don't define these for Emacs. */
! 5628:
! 5629: #ifndef emacs
! 5630:
! 5631: /* regcomp takes a regular expression as a string and compiles it.
! 5632:
! 5633: PREG is a regex_t *. We do not expect any fields to be initialized,
! 5634: since POSIX says we shouldn't. Thus, we set
! 5635:
! 5636: `buffer' to the compiled pattern;
! 5637: `used' to the length of the compiled pattern;
! 5638: `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
! 5639: REG_EXTENDED bit in CFLAGS is set; otherwise, to
! 5640: RE_SYNTAX_POSIX_BASIC;
! 5641: `newline_anchor' to REG_NEWLINE being set in CFLAGS;
! 5642: `fastmap' to an allocated space for the fastmap;
! 5643: `fastmap_accurate' to zero;
! 5644: `re_nsub' to the number of subexpressions in PATTERN.
! 5645:
! 5646: PATTERN is the address of the pattern string.
! 5647:
! 5648: CFLAGS is a series of bits which affect compilation.
! 5649:
! 5650: If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
! 5651: use POSIX basic syntax.
! 5652:
! 5653: If REG_NEWLINE is set, then . and [^...] don't match newline.
! 5654: Also, regexec will try a match beginning after every newline.
! 5655:
! 5656: If REG_ICASE is set, then we considers upper- and lowercase
! 5657: versions of letters to be equivalent when matching.
! 5658:
! 5659: If REG_NOSUB is set, then when PREG is passed to regexec, that
! 5660: routine will report only success or failure, and nothing about the
! 5661: registers.
! 5662:
! 5663: It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
! 5664: the return codes and their meanings.) */
! 5665:
! 5666: int
! 5667: regcomp (preg, pattern, cflags)
! 5668: regex_t *preg;
! 5669: const char *pattern;
! 5670: int cflags;
! 5671: {
! 5672: reg_errcode_t ret;
! 5673: reg_syntax_t syntax
! 5674: = (cflags & REG_EXTENDED) ?
! 5675: RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
! 5676:
! 5677: /* regex_compile will allocate the space for the compiled pattern. */
! 5678: preg->buffer = 0;
! 5679: preg->allocated = 0;
! 5680: preg->used = 0;
! 5681:
! 5682: /* Try to allocate space for the fastmap. */
! 5683: preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
! 5684:
! 5685: if (cflags & REG_ICASE)
! 5686: {
! 5687: unsigned i;
! 5688:
! 5689: preg->translate
! 5690: = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
! 5691: * sizeof (*(RE_TRANSLATE_TYPE)0));
! 5692: if (preg->translate == NULL)
! 5693: return (int) REG_ESPACE;
! 5694:
! 5695: /* Map uppercase characters to corresponding lowercase ones. */
! 5696: for (i = 0; i < CHAR_SET_SIZE; i++)
! 5697: preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
! 5698: }
! 5699: else
! 5700: preg->translate = NULL;
! 5701:
! 5702: /* If REG_NEWLINE is set, newlines are treated differently. */
! 5703: if (cflags & REG_NEWLINE)
! 5704: { /* REG_NEWLINE implies neither . nor [^...] match newline. */
! 5705: syntax &= ~RE_DOT_NEWLINE;
! 5706: syntax |= RE_HAT_LISTS_NOT_NEWLINE;
! 5707: /* It also changes the matching behavior. */
! 5708: preg->newline_anchor = 1;
! 5709: }
! 5710: else
! 5711: preg->newline_anchor = 0;
! 5712:
! 5713: preg->no_sub = !!(cflags & REG_NOSUB);
! 5714:
! 5715: /* POSIX says a null character in the pattern terminates it, so we
! 5716: can use strlen here in compiling the pattern. */
! 5717: ret = regex_compile (pattern, strlen (pattern), syntax, preg);
! 5718:
! 5719: /* POSIX doesn't distinguish between an unmatched open-group and an
! 5720: unmatched close-group: both are REG_EPAREN. */
! 5721: if (ret == REG_ERPAREN) ret = REG_EPAREN;
! 5722:
! 5723: if (ret == REG_NOERROR && preg->fastmap)
! 5724: {
! 5725: /* Compute the fastmap now, since regexec cannot modify the pattern
! 5726: buffer. */
! 5727: if (re_compile_fastmap (preg) == -2)
! 5728: {
! 5729: /* Some error occured while computing the fastmap, just forget
! 5730: about it. */
! 5731: free (preg->fastmap);
! 5732: preg->fastmap = NULL;
! 5733: }
! 5734: }
! 5735:
! 5736: return (int) ret;
! 5737: }
! 5738: #ifdef _LIBC
! 5739: weak_alias (__regcomp, regcomp)
! 5740: #endif
! 5741:
! 5742:
! 5743: /* regexec searches for a given pattern, specified by PREG, in the
! 5744: string STRING.
! 5745:
! 5746: If NMATCH is zero or REG_NOSUB was set in the cflags argument to
! 5747: `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
! 5748: least NMATCH elements, and we set them to the offsets of the
! 5749: corresponding matched substrings.
! 5750:
! 5751: EFLAGS specifies `execution flags' which affect matching: if
! 5752: REG_NOTBOL is set, then ^ does not match at the beginning of the
! 5753: string; if REG_NOTEOL is set, then $ does not match at the end.
! 5754:
! 5755: We return 0 if we find a match and REG_NOMATCH if not. */
! 5756:
! 5757: int
! 5758: regexec (preg, string, nmatch, pmatch, eflags)
! 5759: const regex_t *preg;
! 5760: const char *string;
! 5761: size_t nmatch;
! 5762: regmatch_t pmatch[];
! 5763: int eflags;
! 5764: {
! 5765: int ret;
! 5766: struct re_registers regs;
! 5767: regex_t private_preg;
! 5768: int len = strlen (string);
! 5769: boolean want_reg_info = !preg->no_sub && nmatch > 0;
! 5770:
! 5771: private_preg = *preg;
! 5772:
! 5773: private_preg.not_bol = !!(eflags & REG_NOTBOL);
! 5774: private_preg.not_eol = !!(eflags & REG_NOTEOL);
! 5775:
! 5776: /* The user has told us exactly how many registers to return
! 5777: information about, via `nmatch'. We have to pass that on to the
! 5778: matching routines. */
! 5779: private_preg.regs_allocated = REGS_FIXED;
! 5780:
! 5781: if (want_reg_info)
! 5782: {
! 5783: regs.num_regs = nmatch;
! 5784: regs.start = TALLOC (nmatch * 2, regoff_t);
! 5785: if (regs.start == NULL)
! 5786: return (int) REG_NOMATCH;
! 5787: regs.end = regs.start + nmatch;
! 5788: }
! 5789:
! 5790: /* Perform the searching operation. */
! 5791: ret = re_search (&private_preg, string, len,
! 5792: /* start: */ 0, /* range: */ len,
! 5793: want_reg_info ? ®s : (struct re_registers *) 0);
! 5794:
! 5795: /* Copy the register information to the POSIX structure. */
! 5796: if (want_reg_info)
! 5797: {
! 5798: if (ret >= 0)
! 5799: {
! 5800: unsigned r;
! 5801:
! 5802: for (r = 0; r < nmatch; r++)
! 5803: {
! 5804: pmatch[r].rm_so = regs.start[r];
! 5805: pmatch[r].rm_eo = regs.end[r];
! 5806: }
! 5807: }
! 5808:
! 5809: /* If we needed the temporary register info, free the space now. */
! 5810: free (regs.start);
! 5811: }
! 5812:
! 5813: /* We want zero return to mean success, unlike `re_search'. */
! 5814: return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
! 5815: }
! 5816: #ifdef _LIBC
! 5817: weak_alias (__regexec, regexec)
! 5818: #endif
! 5819:
! 5820:
! 5821: /* Returns a message corresponding to an error code, ERRCODE, returned
! 5822: from either regcomp or regexec. We don't use PREG here. */
! 5823:
! 5824: size_t
! 5825: regerror (err, preg, errbuf, errbuf_size)
! 5826: int err;
! 5827: const regex_t *preg;
! 5828: char *errbuf;
! 5829: size_t errbuf_size;
! 5830: {
! 5831: const char *msg;
! 5832: size_t msg_size;
! 5833:
! 5834: if (err < 0
! 5835: || err >= (int) (sizeof (re_error_msgid_idx)
! 5836: / sizeof (re_error_msgid_idx[0])))
! 5837: /* Only error codes returned by the rest of the code should be passed
! 5838: to this routine. If we are given anything else, or if other regex
! 5839: code generates an invalid error code, then the program has a bug.
! 5840: Dump core so we can fix it. */
! 5841: abort ();
! 5842:
! 5843: msg = gettext (re_error_msgid + re_error_msgid_idx[err]);
! 5844:
! 5845: msg_size = strlen (msg) + 1; /* Includes the null. */
! 5846:
! 5847: if (errbuf_size != 0)
! 5848: {
! 5849: if (msg_size > errbuf_size)
! 5850: {
! 5851: #if defined HAVE_MEMPCPY || defined _LIBC
! 5852: *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
! 5853: #else
! 5854: memcpy (errbuf, msg, errbuf_size - 1);
! 5855: errbuf[errbuf_size - 1] = 0;
! 5856: #endif
! 5857: }
! 5858: else
! 5859: memcpy (errbuf, msg, msg_size);
! 5860: }
! 5861:
! 5862: return msg_size;
! 5863: }
! 5864: #ifdef _LIBC
! 5865: weak_alias (__regerror, regerror)
! 5866: #endif
! 5867:
! 5868:
! 5869: /* Free dynamically allocated space used by PREG. */
! 5870:
! 5871: void
! 5872: regfree (preg)
! 5873: regex_t *preg;
! 5874: {
! 5875: if (preg->buffer != NULL)
! 5876: free (preg->buffer);
! 5877: preg->buffer = NULL;
! 5878:
! 5879: preg->allocated = 0;
! 5880: preg->used = 0;
! 5881:
! 5882: if (preg->fastmap != NULL)
! 5883: free (preg->fastmap);
! 5884: preg->fastmap = NULL;
! 5885: preg->fastmap_accurate = 0;
! 5886:
! 5887: if (preg->translate != NULL)
! 5888: free (preg->translate);
! 5889: preg->translate = NULL;
! 5890: }
! 5891: #ifdef _LIBC
! 5892: weak_alias (__regfree, regfree)
! 5893: #endif
! 5894:
! 5895: #endif /* not emacs */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>