File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / quagga / lib / regex.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Nov 2 10:09:11 2016 UTC (7 years, 7 months ago) by misho
Branches: quagga, MAIN
CVS tags: v1_0_20160315, HEAD
quagga 1.0.20160315

    1: /* Extended regular expression matching and search library,
    2:    version 0.12.
    3:    (Implements POSIX draft P1003.2/D11.2, except for some of the
    4:    internationalization features.)
    5:    Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
    6: 
    7:    The GNU C Library is free software; you can redistribute it and/or
    8:    modify it under the terms of the GNU Library General Public License as
    9:    published by the Free Software Foundation; either version 2 of the
   10:    License, or (at your option) any later version.
   11: 
   12:    The GNU C Library is distributed in the hope that it will be useful,
   13:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   14:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   15:    Library General Public License for more details.
   16: 
   17:    You should have received a copy of the GNU Library General Public
   18:    License along with the GNU C Library; see the file COPYING.LIB.  If not,
   19:    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   20:    Boston, MA 02111-1307, USA.  */
   21: 
   22: /* AIX requires this to be the first thing in the file. */
   23: #if defined _AIX && !defined REGEX_MALLOC
   24:   #pragma alloca
   25: #endif
   26: 
   27: #undef	_GNU_SOURCE
   28: #define _GNU_SOURCE
   29: 
   30: #ifdef HAVE_CONFIG_H
   31: # include <config.h>
   32: #endif
   33: #ifdef _WIN32
   34: /* Windows does not provide unistd.h, which is required for abort() */
   35: #include <process.h>
   36: #endif /* _WIN32 */
   37: 
   38: #ifndef PARAMS
   39: # if defined __GNUC__ || (defined __STDC__ && __STDC__)
   40: #  define PARAMS(args) args
   41: # else
   42: #  define PARAMS(args) ()
   43: # endif  /* GCC.  */
   44: #endif  /* Not PARAMS.  */
   45: 
   46: #if defined STDC_HEADERS && !defined emacs
   47: # include <stddef.h>
   48: #else
   49: /* We need this for `regex.h', and perhaps for the Emacs include files.  */
   50: # include <sys/types.h>
   51: #endif
   52: 
   53: #define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
   54: 
   55: /* For platform which support the ISO C amendement 1 functionality we
   56:    support user defined character classes.  */
   57: #if defined _LIBC || WIDE_CHAR_SUPPORT
   58: /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
   59: # include <wchar.h>
   60: # include <wctype.h>
   61: #endif
   62: 
   63: #ifdef _LIBC
   64: /* We have to keep the namespace clean.  */
   65: # define regfree(preg) __regfree (preg)
   66: # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
   67: # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
   68: # define regerror(errcode, preg, errbuf, errbuf_size) \
   69: 	__regerror(errcode, preg, errbuf, errbuf_size)
   70: # define re_set_registers(bu, re, nu, st, en) \
   71: 	__re_set_registers (bu, re, nu, st, en)
   72: # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
   73: 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
   74: # define re_match(bufp, string, size, pos, regs) \
   75: 	__re_match (bufp, string, size, pos, regs)
   76: # define re_search(bufp, string, size, startpos, range, regs) \
   77: 	__re_search (bufp, string, size, startpos, range, regs)
   78: # define re_compile_pattern(pattern, length, bufp) \
   79: 	__re_compile_pattern (pattern, length, bufp)
   80: # define re_set_syntax(syntax) __re_set_syntax (syntax)
   81: # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
   82: 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
   83: # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
   84: 
   85: #define btowc __btowc
   86: #endif
   87: 
   88: /* This is for other GNU distributions with internationalized messages.  */
   89: #if HAVE_LIBINTL_H || defined _LIBC
   90: # include <libintl.h>
   91: #else
   92: # define gettext(msgid) (msgid)
   93: #endif
   94: 
   95: #ifndef gettext_noop
   96: /* This define is so xgettext can find the internationalizable
   97:    strings.  */
   98: # define gettext_noop(String) String
   99: #endif
  100: 
  101: /* The `emacs' switch turns on certain matching commands
  102:    that make sense only in Emacs. */
  103: #ifdef emacs
  104: 
  105: # include "lisp.h"
  106: # include "buffer.h"
  107: # include "syntax.h"
  108: 
  109: #else  /* not emacs */
  110: 
  111: /* If we are not linking with Emacs proper,
  112:    we can't use the relocating allocator
  113:    even if config.h says that we can.  */
  114: # undef REL_ALLOC
  115: 
  116: # if defined STDC_HEADERS || defined _LIBC
  117: #  include <stdlib.h>
  118: # else
  119: char *malloc ();
  120: char *realloc ();
  121: # endif
  122: 
  123: /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
  124:    If nothing else has been done, use the method below.  */
  125: # ifdef INHIBIT_STRING_HEADER
  126: #  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
  127: #   if !defined bzero && !defined bcopy
  128: #    undef INHIBIT_STRING_HEADER
  129: #   endif
  130: #  endif
  131: # endif
  132: 
  133: /* This is the normal way of making sure we have a bcopy and a bzero.
  134:    This is used in most programs--a few other programs avoid this
  135:    by defining INHIBIT_STRING_HEADER.  */
  136: # ifndef INHIBIT_STRING_HEADER
  137: #  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
  138: #   include <string.h>
  139: #   ifndef bzero
  140: #    ifndef _LIBC
  141: #     define bzero(s, n)	(memset (s, '\0', n), (s))
  142: #    else
  143: #     define bzero(s, n)	__bzero (s, n)
  144: #    endif
  145: #   endif
  146: #  else
  147: #   include <strings.h>
  148: #   ifndef memcmp
  149: #    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
  150: #   endif
  151: #   ifndef memcpy
  152: #    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
  153: #   endif
  154: #  endif
  155: # endif
  156: 
  157: /* Define the syntax stuff for \<, \>, etc.  */
  158: 
  159: /* This must be nonzero for the wordchar and notwordchar pattern
  160:    commands in re_match_2.  */
  161: # ifndef Sword
  162: #  define Sword 1
  163: # endif
  164: 
  165: # ifdef SWITCH_ENUM_BUG
  166: #  define SWITCH_ENUM_CAST(x) ((int)(x))
  167: # else
  168: #  define SWITCH_ENUM_CAST(x) (x)
  169: # endif
  170: 
  171: /* How many characters in the character set.  */
  172: # define CHAR_SET_SIZE 256
  173: 
  174: # ifdef SYNTAX_TABLE
  175: 
  176: extern char *re_syntax_table;
  177: 
  178: # else /* not SYNTAX_TABLE */
  179: 
  180: static char re_syntax_table[CHAR_SET_SIZE];
  181: 
  182: static void
  183: init_syntax_once ()
  184: {
  185:    register int c;
  186:    static int done;
  187: 
  188:    if (done)
  189:      return;
  190: 
  191:    memset (re_syntax_table, 0, sizeof re_syntax_table);
  192: 
  193:    for (c = 'a'; c <= 'z'; c++)
  194:      re_syntax_table[c] = Sword;
  195: 
  196:    for (c = 'A'; c <= 'Z'; c++)
  197:      re_syntax_table[c] = Sword;
  198: 
  199:    for (c = '0'; c <= '9'; c++)
  200:      re_syntax_table[c] = Sword;
  201: 
  202:    re_syntax_table['_'] = Sword;
  203: 
  204:    done = 1;
  205: }
  206: 
  207: # endif /* not SYNTAX_TABLE */
  208: 
  209: # define SYNTAX(c) re_syntax_table[c]
  210: 
  211: #endif /* not emacs */
  212: 
  213: /* Get the interface, including the syntax bits.  */
  214: #include <regex-gnu.h>
  215: 
  216: /* isalpha etc. are used for the character classes.  */
  217: #include <ctype.h>
  218: 
  219: /* Jim Meyering writes:
  220: 
  221:    "... Some ctype macros are valid only for character codes that
  222:    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
  223:    using /bin/cc or gcc but without giving an ansi option).  So, all
  224:    ctype uses should be through macros like ISPRINT...  If
  225:    STDC_HEADERS is defined, then autoconf has verified that the ctype
  226:    macros don't need to be guarded with references to isascii. ...
  227:    Defining isascii to 1 should let any compiler worth its salt
  228:    eliminate the && through constant folding."
  229:    Solaris defines some of these symbols so we must undefine them first.  */
  230: 
  231: #undef ISASCII
  232: #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
  233: # define ISASCII(c) 1
  234: #else
  235: # define ISASCII(c) isascii(c)
  236: #endif
  237: 
  238: #ifdef isblank
  239: # define ISBLANK(c) (ISASCII (c) && isblank (c))
  240: #else
  241: # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
  242: #endif
  243: #ifdef isgraph
  244: # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
  245: #else
  246: # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
  247: #endif
  248: 
  249: #undef ISPRINT
  250: #define ISPRINT(c) (ISASCII (c) && isprint (c))
  251: #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
  252: #define ISALNUM(c) (ISASCII (c) && isalnum (c))
  253: #define ISALPHA(c) (ISASCII (c) && isalpha (c))
  254: #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
  255: #define ISLOWER(c) (ISASCII (c) && islower (c))
  256: #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
  257: #define ISSPACE(c) (ISASCII (c) && isspace (c))
  258: #define ISUPPER(c) (ISASCII (c) && isupper (c))
  259: #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
  260: 
  261: #ifdef _tolower
  262: # define TOLOWER(c) _tolower(c)
  263: #else
  264: # define TOLOWER(c) tolower(c)
  265: #endif
  266: 
  267: #ifndef NULL
  268: # define NULL (void *)0
  269: #endif
  270: 
  271: /* We remove any previous definition of `SIGN_EXTEND_CHAR',
  272:    since ours (we hope) works properly with all combinations of
  273:    machines, compilers, `char' and `unsigned char' argument types.
  274:    (Per Bothner suggested the basic approach.)  */
  275: #undef SIGN_EXTEND_CHAR
  276: #if __STDC__
  277: # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
  278: #else  /* not __STDC__ */
  279: /* As in Harbison and Steele.  */
  280: # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
  281: #endif
  282: 
  283: /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
  284:    use `alloca' instead of `malloc'.  This is because using malloc in
  285:    re_search* or re_match* could cause memory leaks when C-g is used in
  286:    Emacs; also, malloc is slower and causes storage fragmentation.  On
  287:    the other hand, malloc is more portable, and easier to debug.
  288: 
  289:    Because we sometimes use alloca, some routines have to be macros,
  290:    not functions -- `alloca'-allocated space disappears at the end of the
  291:    function it is called in.  */
  292: 
  293: #ifdef REGEX_MALLOC
  294: 
  295: # define REGEX_ALLOCATE malloc
  296: # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
  297: # define REGEX_FREE free
  298: 
  299: #else /* not REGEX_MALLOC  */
  300: 
  301: /* Emacs already defines alloca, sometimes.  */
  302: # ifndef alloca
  303: 
  304: /* Make alloca work the best possible way.  */
  305: #  ifdef __GNUC__
  306: #   define alloca __builtin_alloca
  307: #  else /* not __GNUC__ */
  308: #   if HAVE_ALLOCA_H
  309: #    include <alloca.h>
  310: #   endif /* HAVE_ALLOCA_H */
  311: #  endif /* not __GNUC__ */
  312: 
  313: # endif /* not alloca */
  314: 
  315: # define REGEX_ALLOCATE alloca
  316: 
  317: /* Assumes a `char *destination' variable.  */
  318: # define REGEX_REALLOCATE(source, osize, nsize)				\
  319:   (destination = (char *) alloca (nsize),				\
  320:    memcpy (destination, source, osize))
  321: 
  322: /* No need to do anything to free, after alloca.  */
  323: # define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
  324: 
  325: #endif /* not REGEX_MALLOC */
  326: 
  327: /* Define how to allocate the failure stack.  */
  328: 
  329: #if defined REL_ALLOC && defined REGEX_MALLOC
  330: 
  331: # define REGEX_ALLOCATE_STACK(size)				\
  332:   r_alloc (&failure_stack_ptr, (size))
  333: # define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
  334:   r_re_alloc (&failure_stack_ptr, (nsize))
  335: # define REGEX_FREE_STACK(ptr)					\
  336:   r_alloc_free (&failure_stack_ptr)
  337: 
  338: #else /* not using relocating allocator */
  339: 
  340: # ifdef REGEX_MALLOC
  341: 
  342: #  define REGEX_ALLOCATE_STACK malloc
  343: #  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
  344: #  define REGEX_FREE_STACK free
  345: 
  346: # else /* not REGEX_MALLOC */
  347: 
  348: #  define REGEX_ALLOCATE_STACK alloca
  349: 
  350: #  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
  351:    REGEX_REALLOCATE (source, osize, nsize)
  352: /* No need to explicitly free anything.  */
  353: #  define REGEX_FREE_STACK(arg)
  354: 
  355: # endif /* not REGEX_MALLOC */
  356: #endif /* not using relocating allocator */
  357: 
  358: 
  359: /* True if `size1' is non-NULL and PTR is pointing anywhere inside
  360:    `string1' or just past its end.  This works if PTR is NULL, which is
  361:    a good thing.  */
  362: #define FIRST_STRING_P(ptr) 					\
  363:   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
  364: 
  365: /* (Re)Allocate N items of type T using malloc, or fail.  */
  366: #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
  367: #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
  368: #define RETALLOC_IF(addr, n, t) \
  369:   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
  370: #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
  371: 
  372: #define BYTEWIDTH 8 /* In bits.  */
  373: 
  374: #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
  375: 
  376: #undef MAX
  377: #undef MIN
  378: #define MAX(a, b) ((a) > (b) ? (a) : (b))
  379: #define MIN(a, b) ((a) < (b) ? (a) : (b))
  380: 
  381: typedef char boolean;
  382: #define false 0
  383: #define true 1
  384: 
  385: static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
  386: 					const char *string1, int size1,
  387: 					const char *string2, int size2,
  388: 					int pos,
  389: 					struct re_registers *regs,
  390: 					int stop));
  391: 
  392: /* These are the command codes that appear in compiled regular
  393:    expressions.  Some opcodes are followed by argument bytes.  A
  394:    command code can specify any interpretation whatsoever for its
  395:    arguments.  Zero bytes may appear in the compiled regular expression.  */
  396: 
  397: typedef enum
  398: {
  399:   no_op = 0,
  400: 
  401:   /* Succeed right away--no more backtracking.  */
  402:   succeed,
  403: 
  404:         /* Followed by one byte giving n, then by n literal bytes.  */
  405:   exactn,
  406: 
  407:         /* Matches any (more or less) character.  */
  408:   anychar,
  409: 
  410:         /* Matches any one char belonging to specified set.  First
  411:            following byte is number of bitmap bytes.  Then come bytes
  412:            for a bitmap saying which chars are in.  Bits in each byte
  413:            are ordered low-bit-first.  A character is in the set if its
  414:            bit is 1.  A character too large to have a bit in the map is
  415:            automatically not in the set.  */
  416:   charset,
  417: 
  418:         /* Same parameters as charset, but match any character that is
  419:            not one of those specified.  */
  420:   charset_not,
  421: 
  422:         /* Start remembering the text that is matched, for storing in a
  423:            register.  Followed by one byte with the register number, in
  424:            the range 0 to one less than the pattern buffer's re_nsub
  425:            field.  Then followed by one byte with the number of groups
  426:            inner to this one.  (This last has to be part of the
  427:            start_memory only because we need it in the on_failure_jump
  428:            of re_match_2.)  */
  429:   start_memory,
  430: 
  431:         /* Stop remembering the text that is matched and store it in a
  432:            memory register.  Followed by one byte with the register
  433:            number, in the range 0 to one less than `re_nsub' in the
  434:            pattern buffer, and one byte with the number of inner groups,
  435:            just like `start_memory'.  (We need the number of inner
  436:            groups here because we don't have any easy way of finding the
  437:            corresponding start_memory when we're at a stop_memory.)  */
  438:   stop_memory,
  439: 
  440:         /* Match a duplicate of something remembered. Followed by one
  441:            byte containing the register number.  */
  442:   duplicate,
  443: 
  444:         /* Fail unless at beginning of line.  */
  445:   begline,
  446: 
  447:         /* Fail unless at end of line.  */
  448:   endline,
  449: 
  450:         /* Succeeds if at beginning of buffer (if emacs) or at beginning
  451:            of string to be matched (if not).  */
  452:   begbuf,
  453: 
  454:         /* Analogously, for end of buffer/string.  */
  455:   endbuf,
  456: 
  457:         /* Followed by two byte relative address to which to jump.  */
  458:   jump,
  459: 
  460: 	/* Same as jump, but marks the end of an alternative.  */
  461:   jump_past_alt,
  462: 
  463:         /* Followed by two-byte relative address of place to resume at
  464:            in case of failure.  */
  465:   on_failure_jump,
  466: 
  467:         /* Like on_failure_jump, but pushes a placeholder instead of the
  468:            current string position when executed.  */
  469:   on_failure_keep_string_jump,
  470: 
  471:         /* Throw away latest failure point and then jump to following
  472:            two-byte relative address.  */
  473:   pop_failure_jump,
  474: 
  475:         /* Change to pop_failure_jump if know won't have to backtrack to
  476:            match; otherwise change to jump.  This is used to jump
  477:            back to the beginning of a repeat.  If what follows this jump
  478:            clearly won't match what the repeat does, such that we can be
  479:            sure that there is no use backtracking out of repetitions
  480:            already matched, then we change it to a pop_failure_jump.
  481:            Followed by two-byte address.  */
  482:   maybe_pop_jump,
  483: 
  484:         /* Jump to following two-byte address, and push a dummy failure
  485:            point. This failure point will be thrown away if an attempt
  486:            is made to use it for a failure.  A `+' construct makes this
  487:            before the first repeat.  Also used as an intermediary kind
  488:            of jump when compiling an alternative.  */
  489:   dummy_failure_jump,
  490: 
  491: 	/* Push a dummy failure point and continue.  Used at the end of
  492: 	   alternatives.  */
  493:   push_dummy_failure,
  494: 
  495:         /* Followed by two-byte relative address and two-byte number n.
  496:            After matching N times, jump to the address upon failure.  */
  497:   succeed_n,
  498: 
  499:         /* Followed by two-byte relative address, and two-byte number n.
  500:            Jump to the address N times, then fail.  */
  501:   jump_n,
  502: 
  503:         /* Set the following two-byte relative address to the
  504:            subsequent two-byte number.  The address *includes* the two
  505:            bytes of number.  */
  506:   set_number_at,
  507: 
  508:   wordchar,	/* Matches any word-constituent character.  */
  509:   notwordchar,	/* Matches any char that is not a word-constituent.  */
  510: 
  511:   wordbeg,	/* Succeeds if at word beginning.  */
  512:   wordend,	/* Succeeds if at word end.  */
  513: 
  514:   wordbound,	/* Succeeds if at a word boundary.  */
  515:   notwordbound	/* Succeeds if not at a word boundary.  */
  516: 
  517: #ifdef emacs
  518:   ,before_dot,	/* Succeeds if before point.  */
  519:   at_dot,	/* Succeeds if at point.  */
  520:   after_dot,	/* Succeeds if after point.  */
  521: 
  522: 	/* Matches any character whose syntax is specified.  Followed by
  523:            a byte which contains a syntax code, e.g., Sword.  */
  524:   syntaxspec,
  525: 
  526: 	/* Matches any character whose syntax is not that specified.  */
  527:   notsyntaxspec
  528: #endif /* emacs */
  529: } re_opcode_t;
  530: 
  531: /* Common operations on the compiled pattern.  */
  532: 
  533: /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
  534: 
  535: #define STORE_NUMBER(destination, number)				\
  536:   do {									\
  537:     (destination)[0] = (number) & 0377;					\
  538:     (destination)[1] = (number) >> 8;					\
  539:   } while (0)
  540: 
  541: /* Same as STORE_NUMBER, except increment DESTINATION to
  542:    the byte after where the number is stored.  Therefore, DESTINATION
  543:    must be an lvalue.  */
  544: 
  545: #define STORE_NUMBER_AND_INCR(destination, number)			\
  546:   do {									\
  547:     STORE_NUMBER (destination, number);					\
  548:     (destination) += 2;							\
  549:   } while (0)
  550: 
  551: /* Put into DESTINATION a number stored in two contiguous bytes starting
  552:    at SOURCE.  */
  553: 
  554: #define EXTRACT_NUMBER(destination, source)				\
  555:   do {									\
  556:     (destination) = *(source) & 0377;					\
  557:     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
  558:   } while (0)
  559: 
  560: #ifdef DEBUG
  561: static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
  562: static void
  563: extract_number (dest, source)
  564:     int *dest;
  565:     unsigned char *source;
  566: {
  567:   int temp = SIGN_EXTEND_CHAR (*(source + 1));
  568:   *dest = *source & 0377;
  569:   *dest += temp << 8;
  570: }
  571: 
  572: # ifndef EXTRACT_MACROS /* To debug the macros.  */
  573: #  undef EXTRACT_NUMBER
  574: #  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
  575: # endif /* not EXTRACT_MACROS */
  576: 
  577: #endif /* DEBUG */
  578: 
  579: /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
  580:    SOURCE must be an lvalue.  */
  581: 
  582: #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
  583:   do {									\
  584:     EXTRACT_NUMBER (destination, source);				\
  585:     (source) += 2; 							\
  586:   } while (0)
  587: 
  588: #ifdef DEBUG
  589: static void extract_number_and_incr _RE_ARGS ((int *destination,
  590: 					       unsigned char **source));
  591: static void
  592: extract_number_and_incr (destination, source)
  593:     int *destination;
  594:     unsigned char **source;
  595: {
  596:   extract_number (destination, *source);
  597:   *source += 2;
  598: }
  599: 
  600: # ifndef EXTRACT_MACROS
  601: #  undef EXTRACT_NUMBER_AND_INCR
  602: #  define EXTRACT_NUMBER_AND_INCR(dest, src) \
  603:   extract_number_and_incr (&dest, &src)
  604: # endif /* not EXTRACT_MACROS */
  605: 
  606: #endif /* DEBUG */
  607: 
  608: /* If DEBUG is defined, Regex prints many voluminous messages about what
  609:    it is doing (if the variable `debug' is nonzero).  If linked with the
  610:    main program in `iregex.c', you can enter patterns and strings
  611:    interactively.  And if linked with the main program in `main.c' and
  612:    the other test files, you can run the already-written tests.  */
  613: 
  614: #ifdef DEBUG
  615: 
  616: /* We use standard I/O for debugging.  */
  617: # include <stdio.h>
  618: 
  619: /* It is useful to test things that ``must'' be true when debugging.  */
  620: # include "zassert.h"
  621: 
  622: static int debug;
  623: 
  624: # define DEBUG_STATEMENT(e) e
  625: # define DEBUG_PRINT1(x) if (debug) printf (x)
  626: # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
  627: # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
  628: # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
  629: # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
  630:   if (debug) print_partial_compiled_pattern (s, e)
  631: # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
  632:   if (debug) print_double_string (w, s1, sz1, s2, sz2)
  633: 
  634: 
  635: /* Print the fastmap in human-readable form.  */
  636: 
  637: void
  638: print_fastmap (fastmap)
  639:     char *fastmap;
  640: {
  641:   unsigned was_a_range = 0;
  642:   unsigned i = 0;
  643: 
  644:   while (i < (1 << BYTEWIDTH))
  645:     {
  646:       if (fastmap[i++])
  647: 	{
  648: 	  was_a_range = 0;
  649:           putchar (i - 1);
  650:           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
  651:             {
  652:               was_a_range = 1;
  653:               i++;
  654:             }
  655: 	  if (was_a_range)
  656:             {
  657:               printf ("-");
  658:               putchar (i - 1);
  659:             }
  660:         }
  661:     }
  662:   putchar ('\n');
  663: }
  664: 
  665: 
  666: /* Print a compiled pattern string in human-readable form, starting at
  667:    the START pointer into it and ending just before the pointer END.  */
  668: 
  669: void
  670: print_partial_compiled_pattern (start, end)
  671:     unsigned char *start;
  672:     unsigned char *end;
  673: {
  674:   int mcnt, mcnt2;
  675:   unsigned char *p1;
  676:   unsigned char *p = start;
  677:   unsigned char *pend = end;
  678: 
  679:   if (start == NULL)
  680:     {
  681:       printf ("(null)\n");
  682:       return;
  683:     }
  684: 
  685:   /* Loop over pattern commands.  */
  686:   while (p < pend)
  687:     {
  688:       printf ("%d:\t", p - start);
  689: 
  690:       switch ((re_opcode_t) *p++)
  691: 	{
  692:         case no_op:
  693:           printf ("/no_op");
  694:           break;
  695: 
  696: 	case exactn:
  697: 	  mcnt = *p++;
  698:           printf ("/exactn/%d", mcnt);
  699:           do
  700: 	    {
  701:               putchar ('/');
  702: 	      putchar (*p++);
  703:             }
  704:           while (--mcnt);
  705:           break;
  706: 
  707: 	case start_memory:
  708:           mcnt = *p++;
  709:           printf ("/start_memory/%d/%d", mcnt, *p++);
  710:           break;
  711: 
  712: 	case stop_memory:
  713:           mcnt = *p++;
  714: 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
  715:           break;
  716: 
  717: 	case duplicate:
  718: 	  printf ("/duplicate/%d", *p++);
  719: 	  break;
  720: 
  721: 	case anychar:
  722: 	  printf ("/anychar");
  723: 	  break;
  724: 
  725: 	case charset:
  726:         case charset_not:
  727:           {
  728:             register int c, last = -100;
  729: 	    register int in_range = 0;
  730: 
  731: 	    printf ("/charset [%s",
  732: 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
  733: 
  734:             assert (p + *p < pend);
  735: 
  736:             for (c = 0; c < 256; c++)
  737: 	      if (c / 8 < *p
  738: 		  && (p[1 + (c/8)] & (1 << (c % 8))))
  739: 		{
  740: 		  /* Are we starting a range?  */
  741: 		  if (last + 1 == c && ! in_range)
  742: 		    {
  743: 		      putchar ('-');
  744: 		      in_range = 1;
  745: 		    }
  746: 		  /* Have we broken a range?  */
  747: 		  else if (last + 1 != c && in_range)
  748:               {
  749: 		      putchar (last);
  750: 		      in_range = 0;
  751: 		    }
  752: 
  753: 		  if (! in_range)
  754: 		    putchar (c);
  755: 
  756: 		  last = c;
  757:               }
  758: 
  759: 	    if (in_range)
  760: 	      putchar (last);
  761: 
  762: 	    putchar (']');
  763: 
  764: 	    p += 1 + *p;
  765: 	  }
  766: 	  break;
  767: 
  768: 	case begline:
  769: 	  printf ("/begline");
  770:           break;
  771: 
  772: 	case endline:
  773:           printf ("/endline");
  774:           break;
  775: 
  776: 	case on_failure_jump:
  777:           extract_number_and_incr (&mcnt, &p);
  778:   	  printf ("/on_failure_jump to %d", p + mcnt - start);
  779:           break;
  780: 
  781: 	case on_failure_keep_string_jump:
  782:           extract_number_and_incr (&mcnt, &p);
  783:   	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
  784:           break;
  785: 
  786: 	case dummy_failure_jump:
  787:           extract_number_and_incr (&mcnt, &p);
  788:   	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
  789:           break;
  790: 
  791: 	case push_dummy_failure:
  792:           printf ("/push_dummy_failure");
  793:           break;
  794: 
  795:         case maybe_pop_jump:
  796:           extract_number_and_incr (&mcnt, &p);
  797:   	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
  798: 	  break;
  799: 
  800:         case pop_failure_jump:
  801: 	  extract_number_and_incr (&mcnt, &p);
  802:   	  printf ("/pop_failure_jump to %d", p + mcnt - start);
  803: 	  break;
  804: 
  805:         case jump_past_alt:
  806: 	  extract_number_and_incr (&mcnt, &p);
  807:   	  printf ("/jump_past_alt to %d", p + mcnt - start);
  808: 	  break;
  809: 
  810:         case jump:
  811: 	  extract_number_and_incr (&mcnt, &p);
  812:   	  printf ("/jump to %d", p + mcnt - start);
  813: 	  break;
  814: 
  815:         case succeed_n:
  816:           extract_number_and_incr (&mcnt, &p);
  817: 	  p1 = p + mcnt;
  818:           extract_number_and_incr (&mcnt2, &p);
  819: 	  printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
  820:           break;
  821: 
  822:         case jump_n:
  823:           extract_number_and_incr (&mcnt, &p);
  824: 	  p1 = p + mcnt;
  825:           extract_number_and_incr (&mcnt2, &p);
  826: 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
  827:           break;
  828: 
  829:         case set_number_at:
  830:           extract_number_and_incr (&mcnt, &p);
  831: 	  p1 = p + mcnt;
  832:           extract_number_and_incr (&mcnt2, &p);
  833: 	  printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
  834:           break;
  835: 
  836:         case wordbound:
  837: 	  printf ("/wordbound");
  838: 	  break;
  839: 
  840: 	case notwordbound:
  841: 	  printf ("/notwordbound");
  842:           break;
  843: 
  844: 	case wordbeg:
  845: 	  printf ("/wordbeg");
  846: 	  break;
  847: 
  848: 	case wordend:
  849: 	  printf ("/wordend");
  850: 
  851: # ifdef emacs
  852: 	case before_dot:
  853: 	  printf ("/before_dot");
  854:           break;
  855: 
  856: 	case at_dot:
  857: 	  printf ("/at_dot");
  858:           break;
  859: 
  860: 	case after_dot:
  861: 	  printf ("/after_dot");
  862:           break;
  863: 
  864: 	case syntaxspec:
  865:           printf ("/syntaxspec");
  866: 	  mcnt = *p++;
  867: 	  printf ("/%d", mcnt);
  868:           break;
  869: 
  870: 	case notsyntaxspec:
  871:           printf ("/notsyntaxspec");
  872: 	  mcnt = *p++;
  873: 	  printf ("/%d", mcnt);
  874: 	  break;
  875: # endif /* emacs */
  876: 
  877: 	case wordchar:
  878: 	  printf ("/wordchar");
  879:           break;
  880: 
  881: 	case notwordchar:
  882: 	  printf ("/notwordchar");
  883:           break;
  884: 
  885: 	case begbuf:
  886: 	  printf ("/begbuf");
  887:           break;
  888: 
  889: 	case endbuf:
  890: 	  printf ("/endbuf");
  891:           break;
  892: 
  893:         default:
  894:           printf ("?%d", *(p-1));
  895: 	}
  896: 
  897:       putchar ('\n');
  898:     }
  899: 
  900:   printf ("%d:\tend of pattern.\n", p - start);
  901: }
  902: 
  903: 
  904: void
  905: print_compiled_pattern (bufp)
  906:     struct re_pattern_buffer *bufp;
  907: {
  908:   unsigned char *buffer = bufp->buffer;
  909: 
  910:   print_partial_compiled_pattern (buffer, buffer + bufp->used);
  911:   printf ("%ld bytes used/%ld bytes allocated.\n",
  912: 	  bufp->used, bufp->allocated);
  913: 
  914:   if (bufp->fastmap_accurate && bufp->fastmap)
  915:     {
  916:       printf ("fastmap: ");
  917:       print_fastmap (bufp->fastmap);
  918:     }
  919: 
  920:   printf ("re_nsub: %d\t", bufp->re_nsub);
  921:   printf ("regs_alloc: %d\t", bufp->regs_allocated);
  922:   printf ("can_be_null: %d\t", bufp->can_be_null);
  923:   printf ("newline_anchor: %d\n", bufp->newline_anchor);
  924:   printf ("no_sub: %d\t", bufp->no_sub);
  925:   printf ("not_bol: %d\t", bufp->not_bol);
  926:   printf ("not_eol: %d\t", bufp->not_eol);
  927:   printf ("syntax: %lx\n", bufp->syntax);
  928:   /* Perhaps we should print the translate table?  */
  929: }
  930: 
  931: 
  932: void
  933: print_double_string (where, string1, size1, string2, size2)
  934:     const char *where;
  935:     const char *string1;
  936:     const char *string2;
  937:     int size1;
  938:     int size2;
  939: {
  940:   int this_char;
  941: 
  942:   if (where == NULL)
  943:     printf ("(null)");
  944:   else
  945:     {
  946:       if (FIRST_STRING_P (where))
  947:         {
  948:           for (this_char = where - string1; this_char < size1; this_char++)
  949:             putchar (string1[this_char]);
  950: 
  951:           where = string2;
  952:         }
  953: 
  954:       for (this_char = where - string2; this_char < size2; this_char++)
  955:         putchar (string2[this_char]);
  956:     }
  957: }
  958: 
  959: void
  960: printchar (c)
  961:      int c;
  962: {
  963:   putc (c, stderr);
  964: }
  965: 
  966: #else /* not DEBUG */
  967: 
  968: # undef assert
  969: # define assert(e)
  970: 
  971: # define DEBUG_STATEMENT(e)
  972: # define DEBUG_PRINT1(x)
  973: # define DEBUG_PRINT2(x1, x2)
  974: # define DEBUG_PRINT3(x1, x2, x3)
  975: # define DEBUG_PRINT4(x1, x2, x3, x4)
  976: # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
  977: # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
  978: 
  979: #endif /* not DEBUG */
  980: 
  981: /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
  982:    also be assigned to arbitrarily: each pattern buffer stores its own
  983:    syntax, so it can be changed between regex compilations.  */
  984: /* This has no initializer because initialized variables in Emacs
  985:    become read-only after dumping.  */
  986: reg_syntax_t re_syntax_options;
  987: 
  988: 
  989: /* Specify the precise syntax of regexps for compilation.  This provides
  990:    for compatibility for various utilities which historically have
  991:    different, incompatible syntaxes.
  992: 
  993:    The argument SYNTAX is a bit mask comprised of the various bits
  994:    defined in regex.h.  We return the old syntax.  */
  995: 
  996: reg_syntax_t
  997: re_set_syntax (syntax)
  998:     reg_syntax_t syntax;
  999: {
 1000:   reg_syntax_t ret = re_syntax_options;
 1001: 
 1002:   re_syntax_options = syntax;
 1003: #ifdef DEBUG
 1004:   if (syntax & RE_DEBUG)
 1005:     debug = 1;
 1006:   else if (debug) /* was on but now is not */
 1007:     debug = 0;
 1008: #endif /* DEBUG */
 1009:   return ret;
 1010: }
 1011: #ifdef _LIBC
 1012: weak_alias (__re_set_syntax, re_set_syntax)
 1013: #endif
 1014: 
 1015: /* This table gives an error message for each of the error codes listed
 1016:    in regex.h.  Obviously the order here has to be same as there.
 1017:    POSIX doesn't require that we do anything for REG_NOERROR,
 1018:    but why not be nice?  */
 1019: 
 1020: static const char re_error_msgid[] =
 1021:   {
 1022: #define REG_NOERROR_IDX	0
 1023:     gettext_noop ("Success")	/* REG_NOERROR */
 1024:     "\0"
 1025: #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
 1026:     gettext_noop ("No match")	/* REG_NOMATCH */
 1027:     "\0"
 1028: #define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
 1029:     gettext_noop ("Invalid regular expression") /* REG_BADPAT */
 1030:     "\0"
 1031: #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
 1032:     gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
 1033:     "\0"
 1034: #define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
 1035:     gettext_noop ("Invalid character class name") /* REG_ECTYPE */
 1036:     "\0"
 1037: #define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
 1038:     gettext_noop ("Trailing backslash") /* REG_EESCAPE */
 1039:     "\0"
 1040: #define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
 1041:     gettext_noop ("Invalid back reference") /* REG_ESUBREG */
 1042:     "\0"
 1043: #define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
 1044:     gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
 1045:     "\0"
 1046: #define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
 1047:     gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
 1048:     "\0"
 1049: #define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
 1050:     gettext_noop ("Unmatched \\{") /* REG_EBRACE */
 1051:     "\0"
 1052: #define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
 1053:     gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
 1054:     "\0"
 1055: #define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
 1056:     gettext_noop ("Invalid range end")	/* REG_ERANGE */
 1057:     "\0"
 1058: #define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
 1059:     gettext_noop ("Memory exhausted") /* REG_ESPACE */
 1060:     "\0"
 1061: #define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
 1062:     gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
 1063:     "\0"
 1064: #define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
 1065:     gettext_noop ("Premature end of regular expression") /* REG_EEND */
 1066:     "\0"
 1067: #define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
 1068:     gettext_noop ("Regular expression too big") /* REG_ESIZE */
 1069:     "\0"
 1070: #define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
 1071:     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
 1072:   };
 1073: 
 1074: static const size_t re_error_msgid_idx[] =
 1075:   {
 1076:     REG_NOERROR_IDX,
 1077:     REG_NOMATCH_IDX,
 1078:     REG_BADPAT_IDX,
 1079:     REG_ECOLLATE_IDX,
 1080:     REG_ECTYPE_IDX,
 1081:     REG_EESCAPE_IDX,
 1082:     REG_ESUBREG_IDX,
 1083:     REG_EBRACK_IDX,
 1084:     REG_EPAREN_IDX,
 1085:     REG_EBRACE_IDX,
 1086:     REG_BADBR_IDX,
 1087:     REG_ERANGE_IDX,
 1088:     REG_ESPACE_IDX,
 1089:     REG_BADRPT_IDX,
 1090:     REG_EEND_IDX,
 1091:     REG_ESIZE_IDX,
 1092:     REG_ERPAREN_IDX
 1093:   };
 1094: 
 1095: /* Avoiding alloca during matching, to placate r_alloc.  */
 1096: 
 1097: /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
 1098:    searching and matching functions should not call alloca.  On some
 1099:    systems, alloca is implemented in terms of malloc, and if we're
 1100:    using the relocating allocator routines, then malloc could cause a
 1101:    relocation, which might (if the strings being searched are in the
 1102:    ralloc heap) shift the data out from underneath the regexp
 1103:    routines.
 1104: 
 1105:    Here's another reason to avoid allocation: Emacs
 1106:    processes input from X in a signal handler; processing X input may
 1107:    call malloc; if input arrives while a matching routine is calling
 1108:    malloc, then we're scrod.  But Emacs can't just block input while
 1109:    calling matching routines; then we don't notice interrupts when
 1110:    they come in.  So, Emacs blocks input around all regexp calls
 1111:    except the matching calls, which it leaves unprotected, in the
 1112:    faith that they will not malloc.  */
 1113: 
 1114: /* Normally, this is fine.  */
 1115: #define MATCH_MAY_ALLOCATE
 1116: 
 1117: /* When using GNU C, we are not REALLY using the C alloca, no matter
 1118:    what config.h may say.  So don't take precautions for it.  */
 1119: #ifdef __GNUC__
 1120: # undef C_ALLOCA
 1121: #endif
 1122: 
 1123: /* The match routines may not allocate if (1) they would do it with malloc
 1124:    and (2) it's not safe for them to use malloc.
 1125:    Note that if REL_ALLOC is defined, matching would not use malloc for the
 1126:    failure stack, but we would still use it for the register vectors;
 1127:    so REL_ALLOC should not affect this.  */
 1128: #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
 1129: # undef MATCH_MAY_ALLOCATE
 1130: #endif
 1131: 
 1132: 
 1133: /* Failure stack declarations and macros; both re_compile_fastmap and
 1134:    re_match_2 use a failure stack.  These have to be macros because of
 1135:    REGEX_ALLOCATE_STACK.  */
 1136: 
 1137: 
 1138: /* Number of failure points for which to initially allocate space
 1139:    when matching.  If this number is exceeded, we allocate more
 1140:    space, so it is not a hard limit.  */
 1141: #ifndef INIT_FAILURE_ALLOC
 1142: # define INIT_FAILURE_ALLOC 5
 1143: #endif
 1144: 
 1145: /* Roughly the maximum number of failure points on the stack.  Would be
 1146:    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
 1147:    This is a variable only so users of regex can assign to it; we never
 1148:    change it ourselves.  */
 1149: 
 1150: #ifdef INT_IS_16BIT
 1151: 
 1152: # if defined MATCH_MAY_ALLOCATE
 1153: /* 4400 was enough to cause a crash on Alpha OSF/1,
 1154:    whose default stack limit is 2mb.  */
 1155: long int re_max_failures = 4000;
 1156: # else
 1157: long int re_max_failures = 2000;
 1158: # endif
 1159: 
 1160: union fail_stack_elt
 1161: {
 1162:   unsigned char *pointer;
 1163:   long int integer;
 1164: };
 1165: 
 1166: typedef union fail_stack_elt fail_stack_elt_t;
 1167: 
 1168: typedef struct
 1169: {
 1170:   fail_stack_elt_t *stack;
 1171:   unsigned long int size;
 1172:   unsigned long int avail;		/* Offset of next open position.  */
 1173: } fail_stack_type;
 1174: 
 1175: #else /* not INT_IS_16BIT */
 1176: 
 1177: # if defined MATCH_MAY_ALLOCATE
 1178: /* 4400 was enough to cause a crash on Alpha OSF/1,
 1179:    whose default stack limit is 2mb.  */
 1180: int re_max_failures = 20000;
 1181: # else
 1182: int re_max_failures = 2000;
 1183: # endif
 1184: 
 1185: union fail_stack_elt
 1186: {
 1187:   unsigned char *pointer;
 1188:   int integer;
 1189: };
 1190: 
 1191: typedef union fail_stack_elt fail_stack_elt_t;
 1192: 
 1193: typedef struct
 1194: {
 1195:   fail_stack_elt_t *stack;
 1196:   unsigned size;
 1197:   unsigned avail;			/* Offset of next open position.  */
 1198: } fail_stack_type;
 1199: 
 1200: #endif /* INT_IS_16BIT */
 1201: 
 1202: #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
 1203: #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
 1204: #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
 1205: 
 1206: 
 1207: /* Define macros to initialize and free the failure stack.
 1208:    Do `return -2' if the alloc fails.  */
 1209: 
 1210: #ifdef MATCH_MAY_ALLOCATE
 1211: # define INIT_FAIL_STACK()						\
 1212:   do {									\
 1213:     fail_stack.stack = (fail_stack_elt_t *)				\
 1214:       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
 1215: 									\
 1216:     if (fail_stack.stack == NULL)					\
 1217:       return -2;							\
 1218: 									\
 1219:     fail_stack.size = INIT_FAILURE_ALLOC;				\
 1220:     fail_stack.avail = 0;						\
 1221:   } while (0)
 1222: 
 1223: # define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
 1224: #else
 1225: # define INIT_FAIL_STACK()						\
 1226:   do {									\
 1227:     fail_stack.avail = 0;						\
 1228:   } while (0)
 1229: 
 1230: # define RESET_FAIL_STACK()
 1231: #endif
 1232: 
 1233: 
 1234: /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
 1235: 
 1236:    Return 1 if succeeds, and 0 if either ran out of memory
 1237:    allocating space for it or it was already too large.
 1238: 
 1239:    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
 1240: 
 1241: #define DOUBLE_FAIL_STACK(fail_stack)					\
 1242:   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
 1243:    ? 0									\
 1244:    : ((fail_stack).stack = (fail_stack_elt_t *)				\
 1245:         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
 1246:           (fail_stack).size * sizeof (fail_stack_elt_t),		\
 1247:           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
 1248: 									\
 1249:       (fail_stack).stack == NULL					\
 1250:       ? 0								\
 1251:       : ((fail_stack).size <<= 1, 					\
 1252:          1)))
 1253: 
 1254: 
 1255: /* Push pointer POINTER on FAIL_STACK.
 1256:    Return 1 if was able to do so and 0 if ran out of memory allocating
 1257:    space to do so.  */
 1258: #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
 1259:   ((FAIL_STACK_FULL ()							\
 1260:     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
 1261:    ? 0									\
 1262:    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
 1263:       1))
 1264: 
 1265: /* Push a pointer value onto the failure stack.
 1266:    Assumes the variable `fail_stack'.  Probably should only
 1267:    be called from within `PUSH_FAILURE_POINT'.  */
 1268: #define PUSH_FAILURE_POINTER(item)					\
 1269:   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
 1270: 
 1271: /* This pushes an integer-valued item onto the failure stack.
 1272:    Assumes the variable `fail_stack'.  Probably should only
 1273:    be called from within `PUSH_FAILURE_POINT'.  */
 1274: #define PUSH_FAILURE_INT(item)					\
 1275:   fail_stack.stack[fail_stack.avail++].integer = (item)
 1276: 
 1277: /* Push a fail_stack_elt_t value onto the failure stack.
 1278:    Assumes the variable `fail_stack'.  Probably should only
 1279:    be called from within `PUSH_FAILURE_POINT'.  */
 1280: #define PUSH_FAILURE_ELT(item)					\
 1281:   fail_stack.stack[fail_stack.avail++] =  (item)
 1282: 
 1283: /* These three POP... operations complement the three PUSH... operations.
 1284:    All assume that `fail_stack' is nonempty.  */
 1285: #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
 1286: #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
 1287: #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
 1288: 
 1289: /* Used to omit pushing failure point id's when we're not debugging.  */
 1290: #ifdef DEBUG
 1291: # define DEBUG_PUSH PUSH_FAILURE_INT
 1292: # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
 1293: #else
 1294: # define DEBUG_PUSH(item)
 1295: # define DEBUG_POP(item_addr)
 1296: #endif
 1297: 
 1298: 
 1299: /* Push the information about the state we will need
 1300:    if we ever fail back to it.
 1301: 
 1302:    Requires variables fail_stack, regstart, regend, reg_info, and
 1303:    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
 1304:    be declared.
 1305: 
 1306:    Does `return FAILURE_CODE' if runs out of memory.  */
 1307: 
 1308: #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
 1309:   do {									\
 1310:     char *destination;							\
 1311:     /* Must be int, so when we don't save any registers, the arithmetic	\
 1312:        of 0 + -1 isn't done as unsigned.  */				\
 1313:     /* Can't be int, since there is not a shred of a guarantee that int	\
 1314:        is wide enough to hold a value of something to which pointer can	\
 1315:        be assigned */							\
 1316:     active_reg_t this_reg;						\
 1317:     									\
 1318:     DEBUG_STATEMENT (failure_id++);					\
 1319:     DEBUG_STATEMENT (nfailure_points_pushed++);				\
 1320:     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
 1321:     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
 1322:     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
 1323: 									\
 1324:     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
 1325:     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
 1326: 									\
 1327:     /* Ensure we have enough space allocated for what we will push.  */	\
 1328:     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
 1329:       {									\
 1330:         if (!DOUBLE_FAIL_STACK (fail_stack))				\
 1331:           return failure_code;						\
 1332: 									\
 1333:         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
 1334: 		       (fail_stack).size);				\
 1335:         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
 1336:       }									\
 1337: 									\
 1338:     /* Push the info, starting with the registers.  */			\
 1339:     DEBUG_PRINT1 ("\n");						\
 1340: 									\
 1341:     if (1)								\
 1342:       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
 1343: 	   this_reg++)							\
 1344: 	{								\
 1345: 	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
 1346: 	  DEBUG_STATEMENT (num_regs_pushed++);				\
 1347: 									\
 1348: 	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
 1349: 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
 1350: 									\
 1351: 	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
 1352: 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
 1353: 									\
 1354: 	  DEBUG_PRINT2 ("    info: %p\n      ",				\
 1355: 			reg_info[this_reg].word.pointer);		\
 1356: 	  DEBUG_PRINT2 (" match_null=%d",				\
 1357: 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
 1358: 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
 1359: 	  DEBUG_PRINT2 (" matched_something=%d",			\
 1360: 			MATCHED_SOMETHING (reg_info[this_reg]));	\
 1361: 	  DEBUG_PRINT2 (" ever_matched=%d",				\
 1362: 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
 1363: 	  DEBUG_PRINT1 ("\n");						\
 1364: 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
 1365: 	}								\
 1366: 									\
 1367:     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
 1368:     PUSH_FAILURE_INT (lowest_active_reg);				\
 1369: 									\
 1370:     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
 1371:     PUSH_FAILURE_INT (highest_active_reg);				\
 1372: 									\
 1373:     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
 1374:     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
 1375:     PUSH_FAILURE_POINTER (pattern_place);				\
 1376: 									\
 1377:     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
 1378:     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
 1379: 				 size2);				\
 1380:     DEBUG_PRINT1 ("'\n");						\
 1381:     PUSH_FAILURE_POINTER (string_place);				\
 1382: 									\
 1383:     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
 1384:     DEBUG_PUSH (failure_id);						\
 1385:   } while (0)
 1386: 
 1387: /* This is the number of items that are pushed and popped on the stack
 1388:    for each register.  */
 1389: #define NUM_REG_ITEMS  3
 1390: 
 1391: /* Individual items aside from the registers.  */
 1392: #ifdef DEBUG
 1393: # define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
 1394: #else
 1395: # define NUM_NONREG_ITEMS 4
 1396: #endif
 1397: 
 1398: /* We push at most this many items on the stack.  */
 1399: /* We used to use (num_regs - 1), which is the number of registers
 1400:    this regexp will save; but that was changed to 5
 1401:    to avoid stack overflow for a regexp with lots of parens.  */
 1402: #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
 1403: 
 1404: /* We actually push this many items.  */
 1405: #define NUM_FAILURE_ITEMS				\
 1406:   (((0							\
 1407:      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
 1408:     * NUM_REG_ITEMS)					\
 1409:    + NUM_NONREG_ITEMS)
 1410: 
 1411: /* How many items can still be added to the stack without overflowing it.  */
 1412: #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
 1413: 
 1414: 
 1415: /* Pops what PUSH_FAIL_STACK pushes.
 1416: 
 1417:    We restore into the parameters, all of which should be lvalues:
 1418:      STR -- the saved data position.
 1419:      PAT -- the saved pattern position.
 1420:      LOW_REG, HIGH_REG -- the highest and lowest active registers.
 1421:      REGSTART, REGEND -- arrays of string positions.
 1422:      REG_INFO -- array of information about each subexpression.
 1423: 
 1424:    Also assumes the variables `fail_stack' and (if debugging), `bufp',
 1425:    `pend', `string1', `size1', `string2', and `size2'.  */
 1426: 
 1427: #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
 1428: {									\
 1429:   DEBUG_STATEMENT (unsigned failure_id;)				\
 1430:   active_reg_t this_reg;						\
 1431:   const unsigned char *string_temp;					\
 1432: 									\
 1433:   assert (!FAIL_STACK_EMPTY ());					\
 1434: 									\
 1435:   /* Remove failure points and point to how many regs pushed.  */	\
 1436:   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
 1437:   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
 1438:   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
 1439: 									\
 1440:   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
 1441: 									\
 1442:   DEBUG_POP (&failure_id);						\
 1443:   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
 1444: 									\
 1445:   /* If the saved string location is NULL, it came from an		\
 1446:      on_failure_keep_string_jump opcode, and we want to throw away the	\
 1447:      saved NULL, thus retaining our current position in the string.  */	\
 1448:   string_temp = POP_FAILURE_POINTER ();					\
 1449:   if (string_temp != NULL)						\
 1450:     str = (const char *) string_temp;					\
 1451: 									\
 1452:   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
 1453:   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
 1454:   DEBUG_PRINT1 ("'\n");							\
 1455: 									\
 1456:   pat = (unsigned char *) POP_FAILURE_POINTER ();			\
 1457:   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
 1458:   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
 1459: 									\
 1460:   /* Restore register info.  */						\
 1461:   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
 1462:   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
 1463: 									\
 1464:   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
 1465:   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
 1466: 									\
 1467:   if (1)								\
 1468:     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
 1469:       {									\
 1470: 	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
 1471: 									\
 1472: 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
 1473: 	DEBUG_PRINT2 ("      info: %p\n",				\
 1474: 		      reg_info[this_reg].word.pointer);			\
 1475: 									\
 1476: 	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
 1477: 	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
 1478: 									\
 1479: 	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
 1480: 	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
 1481:       }									\
 1482:   else									\
 1483:     {									\
 1484:       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
 1485: 	{								\
 1486: 	  reg_info[this_reg].word.integer = 0;				\
 1487: 	  regend[this_reg] = 0;						\
 1488: 	  regstart[this_reg] = 0;					\
 1489: 	}								\
 1490:       highest_active_reg = high_reg;					\
 1491:     }									\
 1492: 									\
 1493:   set_regs_matched_done = 0;						\
 1494:   DEBUG_STATEMENT (nfailure_points_popped++);				\
 1495: } /* POP_FAILURE_POINT */
 1496: 
 1497: 
 1498: 
 1499: /* Structure for per-register (a.k.a. per-group) information.
 1500:    Other register information, such as the
 1501:    starting and ending positions (which are addresses), and the list of
 1502:    inner groups (which is a bits list) are maintained in separate
 1503:    variables.
 1504: 
 1505:    We are making a (strictly speaking) nonportable assumption here: that
 1506:    the compiler will pack our bit fields into something that fits into
 1507:    the type of `word', i.e., is something that fits into one item on the
 1508:    failure stack.  */
 1509: 
 1510: 
 1511: /* Declarations and macros for re_match_2.  */
 1512: 
 1513: typedef union
 1514: {
 1515:   fail_stack_elt_t word;
 1516:   struct
 1517:   {
 1518:       /* This field is one if this group can match the empty string,
 1519:          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
 1520: #define MATCH_NULL_UNSET_VALUE 3
 1521:     unsigned match_null_string_p : 2;
 1522:     unsigned is_active : 1;
 1523:     unsigned matched_something : 1;
 1524:     unsigned ever_matched_something : 1;
 1525:   } bits;
 1526: } register_info_type;
 1527: 
 1528: #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
 1529: #define IS_ACTIVE(R)  ((R).bits.is_active)
 1530: #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
 1531: #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
 1532: 
 1533: 
 1534: /* Call this when have matched a real character; it sets `matched' flags
 1535:    for the subexpressions which we are currently inside.  Also records
 1536:    that those subexprs have matched.  */
 1537: #define SET_REGS_MATCHED()						\
 1538:   do									\
 1539:     {									\
 1540:       if (!set_regs_matched_done)					\
 1541: 	{								\
 1542: 	  active_reg_t r;						\
 1543: 	  set_regs_matched_done = 1;					\
 1544: 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
 1545: 	    {								\
 1546: 	      MATCHED_SOMETHING (reg_info[r])				\
 1547: 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
 1548: 		= 1;							\
 1549: 	    }								\
 1550: 	}								\
 1551:     }									\
 1552:   while (0)
 1553: 
 1554: /* Registers are set to a sentinel when they haven't yet matched.  */
 1555: static char reg_unset_dummy;
 1556: #define REG_UNSET_VALUE (&reg_unset_dummy)
 1557: #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
 1558: 
 1559: /* Subroutine declarations and macros for regex_compile.  */
 1560: 
 1561: static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
 1562: 					      reg_syntax_t syntax,
 1563: 					      struct re_pattern_buffer *bufp));
 1564: static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
 1565: static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
 1566: 				 int arg1, int arg2));
 1567: static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
 1568: 				  int arg, unsigned char *end));
 1569: static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
 1570: 				  int arg1, int arg2, unsigned char *end));
 1571: static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
 1572: 					   reg_syntax_t syntax));
 1573: static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
 1574: 					   reg_syntax_t syntax));
 1575: static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
 1576: 					      const char *pend,
 1577: 					      char *translate,
 1578: 					      reg_syntax_t syntax,
 1579: 					      unsigned char *b));
 1580: 
 1581: /* Fetch the next character in the uncompiled pattern---translating it
 1582:    if necessary.  Also cast from a signed character in the constant
 1583:    string passed to us by the user to an unsigned char that we can use
 1584:    as an array index (in, e.g., `translate').  */
 1585: #ifndef PATFETCH
 1586: # define PATFETCH(c)							\
 1587:   do {if (p == pend) return REG_EEND;					\
 1588:     c = (unsigned char) *p++;						\
 1589:     if (translate) c = (unsigned char) translate[c];			\
 1590:   } while (0)
 1591: #endif
 1592: 
 1593: /* Fetch the next character in the uncompiled pattern, with no
 1594:    translation.  */
 1595: #define PATFETCH_RAW(c)							\
 1596:   do {if (p == pend) return REG_EEND;					\
 1597:     c = (unsigned char) *p++; 						\
 1598:   } while (0)
 1599: 
 1600: /* Go backwards one character in the pattern.  */
 1601: #define PATUNFETCH p--
 1602: 
 1603: 
 1604: /* If `translate' is non-null, return translate[D], else just D.  We
 1605:    cast the subscript to translate because some data is declared as
 1606:    `char *', to avoid warnings when a string constant is passed.  But
 1607:    when we use a character as a subscript we must make it unsigned.  */
 1608: #ifndef TRANSLATE
 1609: # define TRANSLATE(d) \
 1610:   (translate ? (char) translate[(unsigned char) (d)] : (d))
 1611: #endif
 1612: 
 1613: 
 1614: /* Macros for outputting the compiled pattern into `buffer'.  */
 1615: 
 1616: /* If the buffer isn't allocated when it comes in, use this.  */
 1617: #define INIT_BUF_SIZE  32
 1618: 
 1619: /* Make sure we have at least N more bytes of space in buffer.  */
 1620: #define GET_BUFFER_SPACE(n)						\
 1621:     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
 1622:       EXTEND_BUFFER ()
 1623: 
 1624: /* Make sure we have one more byte of buffer space and then add C to it.  */
 1625: #define BUF_PUSH(c)							\
 1626:   do {									\
 1627:     GET_BUFFER_SPACE (1);						\
 1628:     *b++ = (unsigned char) (c);						\
 1629:   } while (0)
 1630: 
 1631: 
 1632: /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
 1633: #define BUF_PUSH_2(c1, c2)						\
 1634:   do {									\
 1635:     GET_BUFFER_SPACE (2);						\
 1636:     *b++ = (unsigned char) (c1);					\
 1637:     *b++ = (unsigned char) (c2);					\
 1638:   } while (0)
 1639: 
 1640: 
 1641: /* As with BUF_PUSH_2, except for three bytes.  */
 1642: #define BUF_PUSH_3(c1, c2, c3)						\
 1643:   do {									\
 1644:     GET_BUFFER_SPACE (3);						\
 1645:     *b++ = (unsigned char) (c1);					\
 1646:     *b++ = (unsigned char) (c2);					\
 1647:     *b++ = (unsigned char) (c3);					\
 1648:   } while (0)
 1649: 
 1650: 
 1651: /* Store a jump with opcode OP at LOC to location TO.  We store a
 1652:    relative address offset by the three bytes the jump itself occupies.  */
 1653: #define STORE_JUMP(op, loc, to) \
 1654:   store_op1 (op, loc, (int) ((to) - (loc) - 3))
 1655: 
 1656: /* Likewise, for a two-argument jump.  */
 1657: #define STORE_JUMP2(op, loc, to, arg) \
 1658:   store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
 1659: 
 1660: /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
 1661: #define INSERT_JUMP(op, loc, to) \
 1662:   insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
 1663: 
 1664: /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
 1665: #define INSERT_JUMP2(op, loc, to, arg) \
 1666:   insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
 1667: 
 1668: 
 1669: /* This is not an arbitrary limit: the arguments which represent offsets
 1670:    into the pattern are two bytes long.  So if 2^16 bytes turns out to
 1671:    be too small, many things would have to change.  */
 1672: /* Any other compiler which, like MSC, has allocation limit below 2^16
 1673:    bytes will have to use approach similar to what was done below for
 1674:    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
 1675:    reallocating to 0 bytes.  Such thing is not going to work too well.
 1676:    You have been warned!!  */
 1677: #if defined _MSC_VER  && !defined _WIN32
 1678: /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
 1679:    The REALLOC define eliminates a flurry of conversion warnings,
 1680:    but is not required. */
 1681: # define MAX_BUF_SIZE  65500L
 1682: # define REALLOC(p,s) realloc ((p), (size_t) (s))
 1683: #else
 1684: # define MAX_BUF_SIZE (1L << 16)
 1685: # define REALLOC(p,s) realloc ((p), (s))
 1686: #endif
 1687: 
 1688: /* Extend the buffer by twice its current size via realloc and
 1689:    reset the pointers that pointed into the old block to point to the
 1690:    correct places in the new one.  If extending the buffer results in it
 1691:    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
 1692: #define EXTEND_BUFFER()							\
 1693:   do { 									\
 1694:     unsigned char *old_buffer = bufp->buffer;				\
 1695:     if (bufp->allocated == MAX_BUF_SIZE) 				\
 1696:       return REG_ESIZE;							\
 1697:     bufp->allocated <<= 1;						\
 1698:     if (bufp->allocated > MAX_BUF_SIZE)					\
 1699:       bufp->allocated = MAX_BUF_SIZE; 					\
 1700:     bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
 1701:     if (bufp->buffer == NULL)						\
 1702:       return REG_ESPACE;						\
 1703:     /* If the buffer moved, move all the pointers into it.  */		\
 1704:     if (old_buffer != bufp->buffer)					\
 1705:       {									\
 1706:         b = (b - old_buffer) + bufp->buffer;				\
 1707:         begalt = (begalt - old_buffer) + bufp->buffer;			\
 1708:         if (fixup_alt_jump)						\
 1709:           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
 1710:         if (laststart)							\
 1711:           laststart = (laststart - old_buffer) + bufp->buffer;		\
 1712:         if (pending_exact)						\
 1713:           pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
 1714:       }									\
 1715:   } while (0)
 1716: 
 1717: 
 1718: /* Since we have one byte reserved for the register number argument to
 1719:    {start,stop}_memory, the maximum number of groups we can report
 1720:    things about is what fits in that byte.  */
 1721: #define MAX_REGNUM 255
 1722: 
 1723: /* But patterns can have more than `MAX_REGNUM' registers.  We just
 1724:    ignore the excess.  */
 1725: typedef unsigned regnum_t;
 1726: 
 1727: 
 1728: /* Macros for the compile stack.  */
 1729: 
 1730: /* Since offsets can go either forwards or backwards, this type needs to
 1731:    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
 1732: /* int may be not enough when sizeof(int) == 2.  */
 1733: typedef long pattern_offset_t;
 1734: 
 1735: typedef struct
 1736: {
 1737:   pattern_offset_t begalt_offset;
 1738:   pattern_offset_t fixup_alt_jump;
 1739:   pattern_offset_t inner_group_offset;
 1740:   pattern_offset_t laststart_offset;
 1741:   regnum_t regnum;
 1742: } compile_stack_elt_t;
 1743: 
 1744: 
 1745: typedef struct
 1746: {
 1747:   compile_stack_elt_t *stack;
 1748:   unsigned size;
 1749:   unsigned avail;			/* Offset of next open position.  */
 1750: } compile_stack_type;
 1751: 
 1752: 
 1753: #define INIT_COMPILE_STACK_SIZE 32
 1754: 
 1755: #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
 1756: #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
 1757: 
 1758: /* The next available element.  */
 1759: #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
 1760: 
 1761: 
 1762: /* Set the bit for character C in a list.  */
 1763: #define SET_LIST_BIT(c)                               \
 1764:   (b[((unsigned char) (c)) / BYTEWIDTH]               \
 1765:    |= 1 << (((unsigned char) c) % BYTEWIDTH))
 1766: 
 1767: 
 1768: /* Get the next unsigned number in the uncompiled pattern.  */
 1769: #define GET_UNSIGNED_NUMBER(num) 					\
 1770:   { if (p != pend)							\
 1771:      {									\
 1772:        PATFETCH (c); 							\
 1773:        while (ISDIGIT (c)) 						\
 1774:          { 								\
 1775:            if (num < 0)							\
 1776:               num = 0;							\
 1777:            num = num * 10 + c - '0'; 					\
 1778:            if (p == pend) 						\
 1779:               break; 							\
 1780:            PATFETCH (c);						\
 1781:          } 								\
 1782:        } 								\
 1783:     }
 1784: 
 1785: #if defined _LIBC || WIDE_CHAR_SUPPORT
 1786: /* The GNU C library provides support for user-defined character classes
 1787:    and the functions from ISO C amendement 1.  */
 1788: # ifdef CHARCLASS_NAME_MAX
 1789: #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
 1790: # else
 1791: /* This shouldn't happen but some implementation might still have this
 1792:    problem.  Use a reasonable default value.  */
 1793: #  define CHAR_CLASS_MAX_LENGTH 256
 1794: # endif
 1795: 
 1796: # ifdef _LIBC
 1797: #  define IS_CHAR_CLASS(string) __wctype (string)
 1798: # else
 1799: #  define IS_CHAR_CLASS(string) wctype (string)
 1800: # endif
 1801: #else
 1802: # define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
 1803: 
 1804: # define IS_CHAR_CLASS(string)						\
 1805:    (STREQ (string, "alpha") || STREQ (string, "upper")			\
 1806:     || STREQ (string, "lower") || STREQ (string, "digit")		\
 1807:     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
 1808:     || STREQ (string, "space") || STREQ (string, "print")		\
 1809:     || STREQ (string, "punct") || STREQ (string, "graph")		\
 1810:     || STREQ (string, "cntrl") || STREQ (string, "blank"))
 1811: #endif
 1812: 
 1813: #ifndef MATCH_MAY_ALLOCATE
 1814: 
 1815: /* If we cannot allocate large objects within re_match_2_internal,
 1816:    we make the fail stack and register vectors global.
 1817:    The fail stack, we grow to the maximum size when a regexp
 1818:    is compiled.
 1819:    The register vectors, we adjust in size each time we
 1820:    compile a regexp, according to the number of registers it needs.  */
 1821: 
 1822: static fail_stack_type fail_stack;
 1823: 
 1824: /* Size with which the following vectors are currently allocated.
 1825:    That is so we can make them bigger as needed,
 1826:    but never make them smaller.  */
 1827: static int regs_allocated_size;
 1828: 
 1829: static const char **     regstart, **     regend;
 1830: static const char ** old_regstart, ** old_regend;
 1831: static const char **best_regstart, **best_regend;
 1832: static register_info_type *reg_info;
 1833: static const char **reg_dummy;
 1834: static register_info_type *reg_info_dummy;
 1835: 
 1836: /* Make the register vectors big enough for NUM_REGS registers,
 1837:    but don't make them smaller.  */
 1838: 
 1839: static
 1840: regex_grow_registers (num_regs)
 1841:      int num_regs;
 1842: {
 1843:   if (num_regs > regs_allocated_size)
 1844:     {
 1845:       RETALLOC_IF (regstart,	 num_regs, const char *);
 1846:       RETALLOC_IF (regend,	 num_regs, const char *);
 1847:       RETALLOC_IF (old_regstart, num_regs, const char *);
 1848:       RETALLOC_IF (old_regend,	 num_regs, const char *);
 1849:       RETALLOC_IF (best_regstart, num_regs, const char *);
 1850:       RETALLOC_IF (best_regend,	 num_regs, const char *);
 1851:       RETALLOC_IF (reg_info,	 num_regs, register_info_type);
 1852:       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
 1853:       RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
 1854: 
 1855:       regs_allocated_size = num_regs;
 1856:     }
 1857: }
 1858: 
 1859: #endif /* not MATCH_MAY_ALLOCATE */
 1860: 
 1861: static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
 1862: 						 compile_stack,
 1863: 						 regnum_t regnum));
 1864: 
 1865: /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
 1866:    Returns one of error codes defined in `regex.h', or zero for success.
 1867: 
 1868:    Assumes the `allocated' (and perhaps `buffer') and `translate'
 1869:    fields are set in BUFP on entry.
 1870: 
 1871:    If it succeeds, results are put in BUFP (if it returns an error, the
 1872:    contents of BUFP are undefined):
 1873:      `buffer' is the compiled pattern;
 1874:      `syntax' is set to SYNTAX;
 1875:      `used' is set to the length of the compiled pattern;
 1876:      `fastmap_accurate' is zero;
 1877:      `re_nsub' is the number of subexpressions in PATTERN;
 1878:      `not_bol' and `not_eol' are zero;
 1879: 
 1880:    The `fastmap' and `newline_anchor' fields are neither
 1881:    examined nor set.  */
 1882: 
 1883: /* Return, freeing storage we allocated.  */
 1884: #define FREE_STACK_RETURN(value)		\
 1885:   return (free (compile_stack.stack), value)
 1886: 
 1887: static reg_errcode_t
 1888: regex_compile (pattern, size, syntax, bufp)
 1889:      const char *pattern;
 1890:      size_t size;
 1891:      reg_syntax_t syntax;
 1892:      struct re_pattern_buffer *bufp;
 1893: {
 1894:   /* We fetch characters from PATTERN here.  Even though PATTERN is
 1895:      `char *' (i.e., signed), we declare these variables as unsigned, so
 1896:      they can be reliably used as array indices.  */
 1897:   register unsigned char c, c1;
 1898: 
 1899:   /* A random temporary spot in PATTERN.  */
 1900:   const char *p1;
 1901: 
 1902:   /* Points to the end of the buffer, where we should append.  */
 1903:   register unsigned char *b;
 1904: 
 1905:   /* Keeps track of unclosed groups.  */
 1906:   compile_stack_type compile_stack;
 1907: 
 1908:   /* Points to the current (ending) position in the pattern.  */
 1909:   const char *p = pattern;
 1910:   const char *pend = pattern + size;
 1911: 
 1912:   /* How to translate the characters in the pattern.  */
 1913:   RE_TRANSLATE_TYPE translate = bufp->translate;
 1914: 
 1915:   /* Address of the count-byte of the most recently inserted `exactn'
 1916:      command.  This makes it possible to tell if a new exact-match
 1917:      character can be added to that command or if the character requires
 1918:      a new `exactn' command.  */
 1919:   unsigned char *pending_exact = 0;
 1920: 
 1921:   /* Address of start of the most recently finished expression.
 1922:      This tells, e.g., postfix * where to find the start of its
 1923:      operand.  Reset at the beginning of groups and alternatives.  */
 1924:   unsigned char *laststart = 0;
 1925: 
 1926:   /* Address of beginning of regexp, or inside of last group.  */
 1927:   unsigned char *begalt;
 1928: 
 1929:   /* Place in the uncompiled pattern (i.e., the {) to
 1930:      which to go back if the interval is invalid.  */
 1931:   const char *beg_interval;
 1932: 
 1933:   /* Address of the place where a forward jump should go to the end of
 1934:      the containing expression.  Each alternative of an `or' -- except the
 1935:      last -- ends with a forward jump of this sort.  */
 1936:   unsigned char *fixup_alt_jump = 0;
 1937: 
 1938:   /* Counts open-groups as they are encountered.  Remembered for the
 1939:      matching close-group on the compile stack, so the same register
 1940:      number is put in the stop_memory as the start_memory.  */
 1941:   regnum_t regnum = 0;
 1942: 
 1943: #ifdef DEBUG
 1944:   DEBUG_PRINT1 ("\nCompiling pattern: ");
 1945:   if (debug)
 1946:     {
 1947:       unsigned debug_count;
 1948: 
 1949:       for (debug_count = 0; debug_count < size; debug_count++)
 1950:         putchar (pattern[debug_count]);
 1951:       putchar ('\n');
 1952:     }
 1953: #endif /* DEBUG */
 1954: 
 1955:   /* Initialize the compile stack.  */
 1956:   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
 1957:   if (compile_stack.stack == NULL)
 1958:     return REG_ESPACE;
 1959: 
 1960:   compile_stack.size = INIT_COMPILE_STACK_SIZE;
 1961:   compile_stack.avail = 0;
 1962: 
 1963:   /* Initialize the pattern buffer.  */
 1964:   bufp->syntax = syntax;
 1965:   bufp->fastmap_accurate = 0;
 1966:   bufp->not_bol = bufp->not_eol = 0;
 1967: 
 1968:   /* Set `used' to zero, so that if we return an error, the pattern
 1969:      printer (for debugging) will think there's no pattern.  We reset it
 1970:      at the end.  */
 1971:   bufp->used = 0;
 1972: 
 1973:   /* Always count groups, whether or not bufp->no_sub is set.  */
 1974:   bufp->re_nsub = 0;
 1975: 
 1976: #if !defined emacs && !defined SYNTAX_TABLE
 1977:   /* Initialize the syntax table.  */
 1978:    init_syntax_once ();
 1979: #endif
 1980: 
 1981:   if (bufp->allocated == 0)
 1982:     {
 1983:       if (bufp->buffer)
 1984: 	{ /* If zero allocated, but buffer is non-null, try to realloc
 1985:              enough space.  This loses if buffer's address is bogus, but
 1986:              that is the user's responsibility.  */
 1987:           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
 1988:         }
 1989:       else
 1990:         { /* Caller did not allocate a buffer.  Do it for them.  */
 1991:           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
 1992:         }
 1993:       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
 1994: 
 1995:       bufp->allocated = INIT_BUF_SIZE;
 1996:     }
 1997: 
 1998:   begalt = b = bufp->buffer;
 1999: 
 2000:   /* Loop through the uncompiled pattern until we're at the end.  */
 2001:   while (p != pend)
 2002:     {
 2003:       PATFETCH (c);
 2004: 
 2005:       switch (c)
 2006:         {
 2007:         case '^':
 2008:           {
 2009:             if (   /* If at start of pattern, it's an operator.  */
 2010:                    p == pattern + 1
 2011:                    /* If context independent, it's an operator.  */
 2012:                 || syntax & RE_CONTEXT_INDEP_ANCHORS
 2013:                    /* Otherwise, depends on what's come before.  */
 2014:                 || at_begline_loc_p (pattern, p, syntax))
 2015:               BUF_PUSH (begline);
 2016:             else
 2017:               goto normal_char;
 2018:           }
 2019:           break;
 2020: 
 2021: 
 2022:         case '$':
 2023:           {
 2024:             if (   /* If at end of pattern, it's an operator.  */
 2025:                    p == pend
 2026:                    /* If context independent, it's an operator.  */
 2027:                 || syntax & RE_CONTEXT_INDEP_ANCHORS
 2028:                    /* Otherwise, depends on what's next.  */
 2029:                 || at_endline_loc_p (p, pend, syntax))
 2030:                BUF_PUSH (endline);
 2031:              else
 2032:                goto normal_char;
 2033:            }
 2034:            break;
 2035: 
 2036: 
 2037: 	case '+':
 2038:         case '?':
 2039:           if ((syntax & RE_BK_PLUS_QM)
 2040:               || (syntax & RE_LIMITED_OPS))
 2041:             goto normal_char;
 2042:         handle_plus:
 2043:         case '*':
 2044:           /* If there is no previous pattern... */
 2045:           if (!laststart)
 2046:             {
 2047:               if (syntax & RE_CONTEXT_INVALID_OPS)
 2048:                 FREE_STACK_RETURN (REG_BADRPT);
 2049:               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
 2050:                 goto normal_char;
 2051:             }
 2052: 
 2053:           {
 2054:             /* Are we optimizing this jump?  */
 2055:             boolean keep_string_p = false;
 2056: 
 2057:             /* 1 means zero (many) matches is allowed.  */
 2058:             char zero_times_ok = 0, many_times_ok = 0;
 2059: 
 2060:             /* If there is a sequence of repetition chars, collapse it
 2061:                down to just one (the right one).  We can't combine
 2062:                interval operators with these because of, e.g., `a{2}*',
 2063:                which should only match an even number of `a's.  */
 2064: 
 2065:             for (;;)
 2066:               {
 2067:                 zero_times_ok |= c != '+';
 2068:                 many_times_ok |= c != '?';
 2069: 
 2070:                 if (p == pend)
 2071:                   break;
 2072: 
 2073:                 PATFETCH (c);
 2074: 
 2075:                 if (c == '*'
 2076:                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
 2077:                   ;
 2078: 
 2079:                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
 2080:                   {
 2081:                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
 2082: 
 2083:                     PATFETCH (c1);
 2084:                     if (!(c1 == '+' || c1 == '?'))
 2085:                       {
 2086:                         PATUNFETCH;
 2087:                         PATUNFETCH;
 2088:                         break;
 2089:                       }
 2090: 
 2091:                     c = c1;
 2092:                   }
 2093:                 else
 2094:                   {
 2095:                     PATUNFETCH;
 2096:                     break;
 2097:                   }
 2098: 
 2099:                 /* If we get here, we found another repeat character.  */
 2100:                }
 2101: 
 2102:             /* Star, etc. applied to an empty pattern is equivalent
 2103:                to an empty pattern.  */
 2104:             if (!laststart)
 2105:               break;
 2106: 
 2107:             /* Now we know whether or not zero matches is allowed
 2108:                and also whether or not two or more matches is allowed.  */
 2109:             if (many_times_ok)
 2110:               { /* More than one repetition is allowed, so put in at the
 2111:                    end a backward relative jump from `b' to before the next
 2112:                    jump we're going to put in below (which jumps from
 2113:                    laststart to after this jump).
 2114: 
 2115:                    But if we are at the `*' in the exact sequence `.*\n',
 2116:                    insert an unconditional jump backwards to the .,
 2117:                    instead of the beginning of the loop.  This way we only
 2118:                    push a failure point once, instead of every time
 2119:                    through the loop.  */
 2120:                 assert (p - 1 > pattern);
 2121: 
 2122:                 /* Allocate the space for the jump.  */
 2123:                 GET_BUFFER_SPACE (3);
 2124: 
 2125:                 /* We know we are not at the first character of the pattern,
 2126:                    because laststart was nonzero.  And we've already
 2127:                    incremented `p', by the way, to be the character after
 2128:                    the `*'.  Do we have to do something analogous here
 2129:                    for null bytes, because of RE_DOT_NOT_NULL?  */
 2130:                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
 2131: 		    && zero_times_ok
 2132:                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
 2133:                     && !(syntax & RE_DOT_NEWLINE))
 2134:                   { /* We have .*\n.  */
 2135:                     STORE_JUMP (jump, b, laststart);
 2136:                     keep_string_p = true;
 2137:                   }
 2138:                 else
 2139:                   /* Anything else.  */
 2140:                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
 2141: 
 2142:                 /* We've added more stuff to the buffer.  */
 2143:                 b += 3;
 2144:               }
 2145: 
 2146:             /* On failure, jump from laststart to b + 3, which will be the
 2147:                end of the buffer after this jump is inserted.  */
 2148:             GET_BUFFER_SPACE (3);
 2149:             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
 2150:                                        : on_failure_jump,
 2151:                          laststart, b + 3);
 2152:             pending_exact = 0;
 2153:             b += 3;
 2154: 
 2155:             if (!zero_times_ok)
 2156:               {
 2157:                 /* At least one repetition is required, so insert a
 2158:                    `dummy_failure_jump' before the initial
 2159:                    `on_failure_jump' instruction of the loop. This
 2160:                    effects a skip over that instruction the first time
 2161:                    we hit that loop.  */
 2162:                 GET_BUFFER_SPACE (3);
 2163:                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
 2164:                 b += 3;
 2165:               }
 2166:             }
 2167: 	  break;
 2168: 
 2169: 
 2170: 	case '.':
 2171:           laststart = b;
 2172:           BUF_PUSH (anychar);
 2173:           break;
 2174: 
 2175: 
 2176:         case '[':
 2177:           {
 2178:             boolean had_char_class = false;
 2179: 
 2180:             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 2181: 
 2182:             /* Ensure that we have enough space to push a charset: the
 2183:                opcode, the length count, and the bitset; 34 bytes in all.  */
 2184: 	    GET_BUFFER_SPACE (34);
 2185: 
 2186:             laststart = b;
 2187: 
 2188:             /* We test `*p == '^' twice, instead of using an if
 2189:                statement, so we only need one BUF_PUSH.  */
 2190:             BUF_PUSH (*p == '^' ? charset_not : charset);
 2191:             if (*p == '^')
 2192:               p++;
 2193: 
 2194:             /* Remember the first position in the bracket expression.  */
 2195:             p1 = p;
 2196: 
 2197:             /* Push the number of bytes in the bitmap.  */
 2198:             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
 2199: 
 2200:             /* Clear the whole map.  */
 2201:             memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
 2202: 
 2203:             /* charset_not matches newline according to a syntax bit.  */
 2204:             if ((re_opcode_t) b[-2] == charset_not
 2205:                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
 2206:               SET_LIST_BIT ('\n');
 2207: 
 2208:             /* Read in characters and ranges, setting map bits.  */
 2209:             for (;;)
 2210:               {
 2211:                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 2212: 
 2213:                 PATFETCH (c);
 2214: 
 2215:                 /* \ might escape characters inside [...] and [^...].  */
 2216:                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
 2217:                   {
 2218:                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
 2219: 
 2220:                     PATFETCH (c1);
 2221:                     SET_LIST_BIT (c1);
 2222:                     continue;
 2223:                   }
 2224: 
 2225:                 /* Could be the end of the bracket expression.  If it's
 2226:                    not (i.e., when the bracket expression is `[]' so
 2227:                    far), the ']' character bit gets set way below.  */
 2228:                 if (c == ']' && p != p1 + 1)
 2229:                   break;
 2230: 
 2231:                 /* Look ahead to see if it's a range when the last thing
 2232:                    was a character class.  */
 2233:                 if (had_char_class && c == '-' && *p != ']')
 2234:                   FREE_STACK_RETURN (REG_ERANGE);
 2235: 
 2236:                 /* Look ahead to see if it's a range when the last thing
 2237:                    was a character: if this is a hyphen not at the
 2238:                    beginning or the end of a list, then it's the range
 2239:                    operator.  */
 2240:                 if (c == '-'
 2241:                     && !(p - 2 >= pattern && p[-2] == '[')
 2242:                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
 2243:                     && *p != ']')
 2244:                   {
 2245:                     reg_errcode_t ret
 2246:                       = compile_range (&p, pend, translate, syntax, b);
 2247:                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
 2248:                   }
 2249: 
 2250:                 else if (p[0] == '-' && p[1] != ']')
 2251:                   { /* This handles ranges made up of characters only.  */
 2252:                     reg_errcode_t ret;
 2253: 
 2254: 		    /* Move past the `-'.  */
 2255:                     PATFETCH (c1);
 2256: 
 2257:                     ret = compile_range (&p, pend, translate, syntax, b);
 2258:                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
 2259:                   }
 2260: 
 2261:                 /* See if we're at the beginning of a possible character
 2262:                    class.  */
 2263: 
 2264:                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
 2265:                   { /* Leave room for the null.  */
 2266:                     char str[CHAR_CLASS_MAX_LENGTH + 1];
 2267: 
 2268:                     PATFETCH (c);
 2269:                     c1 = 0;
 2270: 
 2271:                     /* If pattern is `[[:'.  */
 2272:                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 2273: 
 2274:                     for (;;)
 2275:                       {
 2276:                         PATFETCH (c);
 2277:                         if ((c == ':' && *p == ']') || p == pend)
 2278:                           break;
 2279: 			if (c1 < CHAR_CLASS_MAX_LENGTH)
 2280: 			  str[c1++] = c;
 2281: 			else
 2282: 			  /* This is in any case an invalid class name.  */
 2283: 			  str[0] = '\0';
 2284:                       }
 2285:                     str[c1] = '\0';
 2286: 
 2287:                     /* If isn't a word bracketed by `[:' and `:]':
 2288:                        undo the ending character, the letters, and leave
 2289:                        the leading `:' and `[' (but set bits for them).  */
 2290:                     if (c == ':' && *p == ']')
 2291:                       {
 2292: #if defined _LIBC || WIDE_CHAR_SUPPORT
 2293:                         boolean is_lower = STREQ (str, "lower");
 2294:                         boolean is_upper = STREQ (str, "upper");
 2295: 			wctype_t wt;
 2296:                         int ch;
 2297: 
 2298: 			wt = IS_CHAR_CLASS (str);
 2299: 			if (wt == 0)
 2300: 			  FREE_STACK_RETURN (REG_ECTYPE);
 2301: 
 2302:                         /* Throw away the ] at the end of the character
 2303:                            class.  */
 2304:                         PATFETCH (c);
 2305: 
 2306:                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 2307: 
 2308:                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
 2309: 			  {
 2310: # ifdef _LIBC
 2311: 			    if (__iswctype (__btowc (ch), wt))
 2312: 			      SET_LIST_BIT (ch);
 2313: # else
 2314: 			    if (iswctype (btowc (ch), wt))
 2315: 			      SET_LIST_BIT (ch);
 2316: # endif
 2317: 
 2318: 			    if (translate && (is_upper || is_lower)
 2319: 				&& (ISUPPER (ch) || ISLOWER (ch)))
 2320: 			      SET_LIST_BIT (ch);
 2321: 			  }
 2322: 
 2323:                         had_char_class = true;
 2324: #else
 2325:                         int ch;
 2326:                         boolean is_alnum = STREQ (str, "alnum");
 2327:                         boolean is_alpha = STREQ (str, "alpha");
 2328:                         boolean is_blank = STREQ (str, "blank");
 2329:                         boolean is_cntrl = STREQ (str, "cntrl");
 2330:                         boolean is_digit = STREQ (str, "digit");
 2331:                         boolean is_graph = STREQ (str, "graph");
 2332:                         boolean is_lower = STREQ (str, "lower");
 2333:                         boolean is_print = STREQ (str, "print");
 2334:                         boolean is_punct = STREQ (str, "punct");
 2335:                         boolean is_space = STREQ (str, "space");
 2336:                         boolean is_upper = STREQ (str, "upper");
 2337:                         boolean is_xdigit = STREQ (str, "xdigit");
 2338: 
 2339:                         if (!IS_CHAR_CLASS (str))
 2340: 			  FREE_STACK_RETURN (REG_ECTYPE);
 2341: 
 2342:                         /* Throw away the ] at the end of the character
 2343:                            class.  */
 2344:                         PATFETCH (c);
 2345: 
 2346:                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 2347: 
 2348:                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
 2349:                           {
 2350: 			    /* This was split into 3 if's to
 2351: 			       avoid an arbitrary limit in some compiler.  */
 2352:                             if (   (is_alnum  && ISALNUM (ch))
 2353:                                 || (is_alpha  && ISALPHA (ch))
 2354:                                 || (is_blank  && ISBLANK (ch))
 2355:                                 || (is_cntrl  && ISCNTRL (ch)))
 2356: 			      SET_LIST_BIT (ch);
 2357: 			    if (   (is_digit  && ISDIGIT (ch))
 2358:                                 || (is_graph  && ISGRAPH (ch))
 2359:                                 || (is_lower  && ISLOWER (ch))
 2360:                                 || (is_print  && ISPRINT (ch)))
 2361: 			      SET_LIST_BIT (ch);
 2362: 			    if (   (is_punct  && ISPUNCT (ch))
 2363:                                 || (is_space  && ISSPACE (ch))
 2364:                                 || (is_upper  && ISUPPER (ch))
 2365:                                 || (is_xdigit && ISXDIGIT (ch)))
 2366: 			      SET_LIST_BIT (ch);
 2367: 			    if (   translate && (is_upper || is_lower)
 2368: 				&& (ISUPPER (ch) || ISLOWER (ch)))
 2369: 			      SET_LIST_BIT (ch);
 2370:                           }
 2371:                         had_char_class = true;
 2372: #endif	/* libc || wctype.h */
 2373:                       }
 2374:                     else
 2375:                       {
 2376:                         c1++;
 2377:                         while (c1--)
 2378:                           PATUNFETCH;
 2379:                         SET_LIST_BIT ('[');
 2380:                         SET_LIST_BIT (':');
 2381:                         had_char_class = false;
 2382:                       }
 2383:                   }
 2384:                 else
 2385:                   {
 2386:                     had_char_class = false;
 2387:                     SET_LIST_BIT (c);
 2388:                   }
 2389:               }
 2390: 
 2391:             /* Discard any (non)matching list bytes that are all 0 at the
 2392:                end of the map.  Decrease the map-length byte too.  */
 2393:             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
 2394:               b[-1]--;
 2395:             b += b[-1];
 2396:           }
 2397:           break;
 2398: 
 2399: 
 2400: 	case '(':
 2401:           if (syntax & RE_NO_BK_PARENS)
 2402:             goto handle_open;
 2403:           else
 2404:             goto normal_char;
 2405: 
 2406: 
 2407:         case ')':
 2408:           if (syntax & RE_NO_BK_PARENS)
 2409:             goto handle_close;
 2410:           else
 2411:             goto normal_char;
 2412: 
 2413: 
 2414:         case '\n':
 2415:           if (syntax & RE_NEWLINE_ALT)
 2416:             goto handle_alt;
 2417:           else
 2418:             goto normal_char;
 2419: 
 2420: 
 2421: 	case '|':
 2422:           if (syntax & RE_NO_BK_VBAR)
 2423:             goto handle_alt;
 2424:           else
 2425:             goto normal_char;
 2426: 
 2427: 
 2428:         case '{':
 2429:            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
 2430:              goto handle_interval;
 2431:            else
 2432:              goto normal_char;
 2433: 
 2434: 
 2435:         case '\\':
 2436:           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
 2437: 
 2438:           /* Do not translate the character after the \, so that we can
 2439:              distinguish, e.g., \B from \b, even if we normally would
 2440:              translate, e.g., B to b.  */
 2441:           PATFETCH_RAW (c);
 2442: 
 2443:           switch (c)
 2444:             {
 2445:             case '(':
 2446:               if (syntax & RE_NO_BK_PARENS)
 2447:                 goto normal_backslash;
 2448: 
 2449:             handle_open:
 2450:               bufp->re_nsub++;
 2451:               regnum++;
 2452: 
 2453:               if (COMPILE_STACK_FULL)
 2454:                 {
 2455:                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
 2456:                             compile_stack_elt_t);
 2457:                   if (compile_stack.stack == NULL) return REG_ESPACE;
 2458: 
 2459:                   compile_stack.size <<= 1;
 2460:                 }
 2461: 
 2462:               /* These are the values to restore when we hit end of this
 2463:                  group.  They are all relative offsets, so that if the
 2464:                  whole pattern moves because of realloc, they will still
 2465:                  be valid.  */
 2466:               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
 2467:               COMPILE_STACK_TOP.fixup_alt_jump
 2468:                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
 2469:               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
 2470:               COMPILE_STACK_TOP.regnum = regnum;
 2471: 
 2472:               /* We will eventually replace the 0 with the number of
 2473:                  groups inner to this one.  But do not push a
 2474:                  start_memory for groups beyond the last one we can
 2475:                  represent in the compiled pattern.  */
 2476:               if (regnum <= MAX_REGNUM)
 2477:                 {
 2478:                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
 2479:                   BUF_PUSH_3 (start_memory, regnum, 0);
 2480:                 }
 2481: 
 2482:               compile_stack.avail++;
 2483: 
 2484:               fixup_alt_jump = 0;
 2485:               laststart = 0;
 2486:               begalt = b;
 2487: 	      /* If we've reached MAX_REGNUM groups, then this open
 2488: 		 won't actually generate any code, so we'll have to
 2489: 		 clear pending_exact explicitly.  */
 2490: 	      pending_exact = 0;
 2491:               break;
 2492: 
 2493: 
 2494:             case ')':
 2495:               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
 2496: 
 2497:               if (COMPILE_STACK_EMPTY)
 2498: 		{
 2499: 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
 2500: 		    goto normal_backslash;
 2501: 		  else
 2502: 		    FREE_STACK_RETURN (REG_ERPAREN);
 2503: 		}
 2504: 
 2505:             handle_close:
 2506:               if (fixup_alt_jump)
 2507:                 { /* Push a dummy failure point at the end of the
 2508:                      alternative for a possible future
 2509:                      `pop_failure_jump' to pop.  See comments at
 2510:                      `push_dummy_failure' in `re_match_2'.  */
 2511:                   BUF_PUSH (push_dummy_failure);
 2512: 
 2513:                   /* We allocated space for this jump when we assigned
 2514:                      to `fixup_alt_jump', in the `handle_alt' case below.  */
 2515:                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
 2516:                 }
 2517: 
 2518:               /* See similar code for backslashed left paren above.  */
 2519:               if (COMPILE_STACK_EMPTY)
 2520: 		{
 2521: 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
 2522: 		    goto normal_char;
 2523: 		  else
 2524: 		    FREE_STACK_RETURN (REG_ERPAREN);
 2525: 		}
 2526: 
 2527:               /* Since we just checked for an empty stack above, this
 2528:                  ``can't happen''.  */
 2529:               assert (compile_stack.avail != 0);
 2530:               {
 2531:                 /* We don't just want to restore into `regnum', because
 2532:                    later groups should continue to be numbered higher,
 2533:                    as in `(ab)c(de)' -- the second group is #2.  */
 2534:                 regnum_t this_group_regnum;
 2535: 
 2536:                 compile_stack.avail--;
 2537:                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
 2538:                 fixup_alt_jump
 2539:                   = COMPILE_STACK_TOP.fixup_alt_jump
 2540:                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
 2541:                     : 0;
 2542:                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
 2543:                 this_group_regnum = COMPILE_STACK_TOP.regnum;
 2544: 		/* If we've reached MAX_REGNUM groups, then this open
 2545: 		   won't actually generate any code, so we'll have to
 2546: 		   clear pending_exact explicitly.  */
 2547: 		pending_exact = 0;
 2548: 
 2549:                 /* We're at the end of the group, so now we know how many
 2550:                    groups were inside this one.  */
 2551:                 if (this_group_regnum <= MAX_REGNUM)
 2552:                   {
 2553:                     unsigned char *inner_group_loc
 2554:                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
 2555: 
 2556:                     *inner_group_loc = regnum - this_group_regnum;
 2557:                     BUF_PUSH_3 (stop_memory, this_group_regnum,
 2558:                                 regnum - this_group_regnum);
 2559:                   }
 2560:               }
 2561:               break;
 2562: 
 2563: 
 2564:             case '|':					/* `\|'.  */
 2565:               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
 2566:                 goto normal_backslash;
 2567:             handle_alt:
 2568:               if (syntax & RE_LIMITED_OPS)
 2569:                 goto normal_char;
 2570: 
 2571:               /* Insert before the previous alternative a jump which
 2572:                  jumps to this alternative if the former fails.  */
 2573:               GET_BUFFER_SPACE (3);
 2574:               INSERT_JUMP (on_failure_jump, begalt, b + 6);
 2575:               pending_exact = 0;
 2576:               b += 3;
 2577: 
 2578:               /* The alternative before this one has a jump after it
 2579:                  which gets executed if it gets matched.  Adjust that
 2580:                  jump so it will jump to this alternative's analogous
 2581:                  jump (put in below, which in turn will jump to the next
 2582:                  (if any) alternative's such jump, etc.).  The last such
 2583:                  jump jumps to the correct final destination.  A picture:
 2584:                           _____ _____
 2585:                           |   | |   |
 2586:                           |   v |   v
 2587:                          a | b   | c
 2588: 
 2589:                  If we are at `b', then fixup_alt_jump right now points to a
 2590:                  three-byte space after `a'.  We'll put in the jump, set
 2591:                  fixup_alt_jump to right after `b', and leave behind three
 2592:                  bytes which we'll fill in when we get to after `c'.  */
 2593: 
 2594:               if (fixup_alt_jump)
 2595:                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
 2596: 
 2597:               /* Mark and leave space for a jump after this alternative,
 2598:                  to be filled in later either by next alternative or
 2599:                  when know we're at the end of a series of alternatives.  */
 2600:               fixup_alt_jump = b;
 2601:               GET_BUFFER_SPACE (3);
 2602:               b += 3;
 2603: 
 2604:               laststart = 0;
 2605:               begalt = b;
 2606:               break;
 2607: 
 2608: 
 2609:             case '{':
 2610:               /* If \{ is a literal.  */
 2611:               if (!(syntax & RE_INTERVALS)
 2612:                      /* If we're at `\{' and it's not the open-interval
 2613:                         operator.  */
 2614:                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
 2615:                   || (p - 2 == pattern  &&  p == pend))
 2616:                 goto normal_backslash;
 2617: 
 2618:             handle_interval:
 2619:               {
 2620:                 /* If got here, then the syntax allows intervals.  */
 2621: 
 2622:                 /* At least (most) this many matches must be made.  */
 2623:                 int lower_bound = -1, upper_bound = -1;
 2624: 
 2625:                 beg_interval = p - 1;
 2626: 
 2627:                 if (p == pend)
 2628:                   {
 2629:                     if (syntax & RE_NO_BK_BRACES)
 2630:                       goto unfetch_interval;
 2631:                     else
 2632:                       FREE_STACK_RETURN (REG_EBRACE);
 2633:                   }
 2634: 
 2635:                 GET_UNSIGNED_NUMBER (lower_bound);
 2636: 
 2637:                 if (c == ',')
 2638:                   {
 2639:                     GET_UNSIGNED_NUMBER (upper_bound);
 2640:                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
 2641:                   }
 2642:                 else
 2643:                   /* Interval such as `{1}' => match exactly once. */
 2644:                   upper_bound = lower_bound;
 2645: 
 2646:                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
 2647:                     || lower_bound > upper_bound)
 2648:                   {
 2649:                     if (syntax & RE_NO_BK_BRACES)
 2650:                       goto unfetch_interval;
 2651:                     else
 2652:                       FREE_STACK_RETURN (REG_BADBR);
 2653:                   }
 2654: 
 2655:                 if (!(syntax & RE_NO_BK_BRACES))
 2656:                   {
 2657:                     if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
 2658: 
 2659:                     PATFETCH (c);
 2660:                   }
 2661: 
 2662:                 if (c != '}')
 2663:                   {
 2664:                     if (syntax & RE_NO_BK_BRACES)
 2665:                       goto unfetch_interval;
 2666:                     else
 2667:                       FREE_STACK_RETURN (REG_BADBR);
 2668:                   }
 2669: 
 2670:                 /* We just parsed a valid interval.  */
 2671: 
 2672:                 /* If it's invalid to have no preceding re.  */
 2673:                 if (!laststart)
 2674:                   {
 2675:                     if (syntax & RE_CONTEXT_INVALID_OPS)
 2676:                       FREE_STACK_RETURN (REG_BADRPT);
 2677:                     else if (syntax & RE_CONTEXT_INDEP_OPS)
 2678:                       laststart = b;
 2679:                     else
 2680:                       goto unfetch_interval;
 2681:                   }
 2682: 
 2683:                 /* If the upper bound is zero, don't want to succeed at
 2684:                    all; jump from `laststart' to `b + 3', which will be
 2685:                    the end of the buffer after we insert the jump.  */
 2686:                  if (upper_bound == 0)
 2687:                    {
 2688:                      GET_BUFFER_SPACE (3);
 2689:                      INSERT_JUMP (jump, laststart, b + 3);
 2690:                      b += 3;
 2691:                    }
 2692: 
 2693:                  /* Otherwise, we have a nontrivial interval.  When
 2694:                     we're all done, the pattern will look like:
 2695:                       set_number_at <jump count> <upper bound>
 2696:                       set_number_at <succeed_n count> <lower bound>
 2697:                       succeed_n <after jump addr> <succeed_n count>
 2698:                       <body of loop>
 2699:                       jump_n <succeed_n addr> <jump count>
 2700:                     (The upper bound and `jump_n' are omitted if
 2701:                     `upper_bound' is 1, though.)  */
 2702:                  else
 2703:                    { /* If the upper bound is > 1, we need to insert
 2704:                         more at the end of the loop.  */
 2705:                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
 2706: 
 2707:                      GET_BUFFER_SPACE (nbytes);
 2708: 
 2709:                      /* Initialize lower bound of the `succeed_n', even
 2710:                         though it will be set during matching by its
 2711:                         attendant `set_number_at' (inserted next),
 2712:                         because `re_compile_fastmap' needs to know.
 2713:                         Jump to the `jump_n' we might insert below.  */
 2714:                      INSERT_JUMP2 (succeed_n, laststart,
 2715:                                    b + 5 + (upper_bound > 1) * 5,
 2716:                                    lower_bound);
 2717:                      b += 5;
 2718: 
 2719:                      /* Code to initialize the lower bound.  Insert
 2720:                         before the `succeed_n'.  The `5' is the last two
 2721:                         bytes of this `set_number_at', plus 3 bytes of
 2722:                         the following `succeed_n'.  */
 2723:                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
 2724:                      b += 5;
 2725: 
 2726:                      if (upper_bound > 1)
 2727:                        { /* More than one repetition is allowed, so
 2728:                             append a backward jump to the `succeed_n'
 2729:                             that starts this interval.
 2730: 
 2731:                             When we've reached this during matching,
 2732:                             we'll have matched the interval once, so
 2733:                             jump back only `upper_bound - 1' times.  */
 2734:                          STORE_JUMP2 (jump_n, b, laststart + 5,
 2735:                                       upper_bound - 1);
 2736:                          b += 5;
 2737: 
 2738:                          /* The location we want to set is the second
 2739:                             parameter of the `jump_n'; that is `b-2' as
 2740:                             an absolute address.  `laststart' will be
 2741:                             the `set_number_at' we're about to insert;
 2742:                             `laststart+3' the number to set, the source
 2743:                             for the relative address.  But we are
 2744:                             inserting into the middle of the pattern --
 2745:                             so everything is getting moved up by 5.
 2746:                             Conclusion: (b - 2) - (laststart + 3) + 5,
 2747:                             i.e., b - laststart.
 2748: 
 2749:                             We insert this at the beginning of the loop
 2750:                             so that if we fail during matching, we'll
 2751:                             reinitialize the bounds.  */
 2752:                          insert_op2 (set_number_at, laststart, b - laststart,
 2753:                                      upper_bound - 1, b);
 2754:                          b += 5;
 2755:                        }
 2756:                    }
 2757:                 pending_exact = 0;
 2758:                 beg_interval = NULL;
 2759:               }
 2760:               break;
 2761: 
 2762:             unfetch_interval:
 2763:               /* If an invalid interval, match the characters as literals.  */
 2764:                assert (beg_interval);
 2765:                p = beg_interval;
 2766:                beg_interval = NULL;
 2767: 
 2768:                /* normal_char and normal_backslash need `c'.  */
 2769:                PATFETCH (c);
 2770: 
 2771:                if (!(syntax & RE_NO_BK_BRACES))
 2772:                  {
 2773:                    if (p > pattern  &&  p[-1] == '\\')
 2774:                      goto normal_backslash;
 2775:                  }
 2776:                goto normal_char;
 2777: 
 2778: #ifdef emacs
 2779:             /* There is no way to specify the before_dot and after_dot
 2780:                operators.  rms says this is ok.  --karl  */
 2781:             case '=':
 2782:               BUF_PUSH (at_dot);
 2783:               break;
 2784: 
 2785:             case 's':
 2786:               laststart = b;
 2787:               PATFETCH (c);
 2788:               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
 2789:               break;
 2790: 
 2791:             case 'S':
 2792:               laststart = b;
 2793:               PATFETCH (c);
 2794:               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
 2795:               break;
 2796: #endif /* emacs */
 2797: 
 2798: 
 2799:             case 'w':
 2800: 	      if (syntax & RE_NO_GNU_OPS)
 2801: 		goto normal_char;
 2802:               laststart = b;
 2803:               BUF_PUSH (wordchar);
 2804:               break;
 2805: 
 2806: 
 2807:             case 'W':
 2808: 	      if (syntax & RE_NO_GNU_OPS)
 2809: 		goto normal_char;
 2810:               laststart = b;
 2811:               BUF_PUSH (notwordchar);
 2812:               break;
 2813: 
 2814: 
 2815:             case '<':
 2816: 	      if (syntax & RE_NO_GNU_OPS)
 2817: 		goto normal_char;
 2818:               BUF_PUSH (wordbeg);
 2819:               break;
 2820: 
 2821:             case '>':
 2822: 	      if (syntax & RE_NO_GNU_OPS)
 2823: 		goto normal_char;
 2824:               BUF_PUSH (wordend);
 2825:               break;
 2826: 
 2827:             case 'b':
 2828: 	      if (syntax & RE_NO_GNU_OPS)
 2829: 		goto normal_char;
 2830:               BUF_PUSH (wordbound);
 2831:               break;
 2832: 
 2833:             case 'B':
 2834: 	      if (syntax & RE_NO_GNU_OPS)
 2835: 		goto normal_char;
 2836:               BUF_PUSH (notwordbound);
 2837:               break;
 2838: 
 2839:             case '`':
 2840: 	      if (syntax & RE_NO_GNU_OPS)
 2841: 		goto normal_char;
 2842:               BUF_PUSH (begbuf);
 2843:               break;
 2844: 
 2845:             case '\'':
 2846: 	      if (syntax & RE_NO_GNU_OPS)
 2847: 		goto normal_char;
 2848:               BUF_PUSH (endbuf);
 2849:               break;
 2850: 
 2851:             case '1': case '2': case '3': case '4': case '5':
 2852:             case '6': case '7': case '8': case '9':
 2853:               if (syntax & RE_NO_BK_REFS)
 2854:                 goto normal_char;
 2855: 
 2856:               c1 = c - '0';
 2857: 
 2858:               if (c1 > regnum)
 2859:                 FREE_STACK_RETURN (REG_ESUBREG);
 2860: 
 2861:               /* Can't back reference to a subexpression if inside of it.  */
 2862:               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
 2863:                 goto normal_char;
 2864: 
 2865:               laststart = b;
 2866:               BUF_PUSH_2 (duplicate, c1);
 2867:               break;
 2868: 
 2869: 
 2870:             case '+':
 2871:             case '?':
 2872:               if (syntax & RE_BK_PLUS_QM)
 2873:                 goto handle_plus;
 2874:               else
 2875:                 goto normal_backslash;
 2876: 
 2877:             default:
 2878:             normal_backslash:
 2879:               /* You might think it would be useful for \ to mean
 2880:                  not to translate; but if we don't translate it
 2881:                  it will never match anything.  */
 2882:               c = TRANSLATE (c);
 2883:               goto normal_char;
 2884:             }
 2885:           break;
 2886: 
 2887: 
 2888: 	default:
 2889:         /* Expects the character in `c'.  */
 2890: 	normal_char:
 2891: 	      /* If no exactn currently being built.  */
 2892:           if (!pending_exact
 2893: 
 2894:               /* If last exactn not at current position.  */
 2895:               || pending_exact + *pending_exact + 1 != b
 2896: 
 2897:               /* We have only one byte following the exactn for the count.  */
 2898: 	      || *pending_exact == (1 << BYTEWIDTH) - 1
 2899: 
 2900:               /* If followed by a repetition operator.  */
 2901:               || *p == '*' || *p == '^'
 2902: 	      || ((syntax & RE_BK_PLUS_QM)
 2903: 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
 2904: 		  : (*p == '+' || *p == '?'))
 2905: 	      || ((syntax & RE_INTERVALS)
 2906:                   && ((syntax & RE_NO_BK_BRACES)
 2907: 		      ? *p == '{'
 2908:                       : (p[0] == '\\' && p[1] == '{'))))
 2909: 	    {
 2910: 	      /* Start building a new exactn.  */
 2911: 
 2912:               laststart = b;
 2913: 
 2914: 	      BUF_PUSH_2 (exactn, 0);
 2915: 	      pending_exact = b - 1;
 2916:             }
 2917: 
 2918: 	  BUF_PUSH (c);
 2919:           (*pending_exact)++;
 2920: 	  break;
 2921:         } /* switch (c) */
 2922:     } /* while p != pend */
 2923: 
 2924: 
 2925:   /* Through the pattern now.  */
 2926: 
 2927:   if (fixup_alt_jump)
 2928:     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
 2929: 
 2930:   if (!COMPILE_STACK_EMPTY)
 2931:     FREE_STACK_RETURN (REG_EPAREN);
 2932: 
 2933:   /* If we don't want backtracking, force success
 2934:      the first time we reach the end of the compiled pattern.  */
 2935:   if (syntax & RE_NO_POSIX_BACKTRACKING)
 2936:     BUF_PUSH (succeed);
 2937: 
 2938:   free (compile_stack.stack);
 2939: 
 2940:   /* We have succeeded; set the length of the buffer.  */
 2941:   bufp->used = b - bufp->buffer;
 2942: 
 2943: #ifdef DEBUG
 2944:   if (debug)
 2945:     {
 2946:       DEBUG_PRINT1 ("\nCompiled pattern: \n");
 2947:       print_compiled_pattern (bufp);
 2948:     }
 2949: #endif /* DEBUG */
 2950: 
 2951: #ifndef MATCH_MAY_ALLOCATE
 2952:   /* Initialize the failure stack to the largest possible stack.  This
 2953:      isn't necessary unless we're trying to avoid calling alloca in
 2954:      the search and match routines.  */
 2955:   {
 2956:     int num_regs = bufp->re_nsub + 1;
 2957: 
 2958:     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
 2959:        is strictly greater than re_max_failures, the largest possible stack
 2960:        is 2 * re_max_failures failure points.  */
 2961:     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
 2962:       {
 2963: 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
 2964: 
 2965: # ifdef emacs
 2966: 	if (! fail_stack.stack)
 2967: 	  fail_stack.stack
 2968: 	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
 2969: 					    * sizeof (fail_stack_elt_t));
 2970: 	else
 2971: 	  fail_stack.stack
 2972: 	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
 2973: 					     (fail_stack.size
 2974: 					      * sizeof (fail_stack_elt_t)));
 2975: # else /* not emacs */
 2976: 	if (! fail_stack.stack)
 2977: 	  fail_stack.stack
 2978: 	    = (fail_stack_elt_t *) malloc (fail_stack.size
 2979: 					   * sizeof (fail_stack_elt_t));
 2980: 	else
 2981: 	  fail_stack.stack
 2982: 	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
 2983: 					    (fail_stack.size
 2984: 					     * sizeof (fail_stack_elt_t)));
 2985: # endif /* not emacs */
 2986:       }
 2987: 
 2988:     regex_grow_registers (num_regs);
 2989:   }
 2990: #endif /* not MATCH_MAY_ALLOCATE */
 2991: 
 2992:   return REG_NOERROR;
 2993: } /* regex_compile */
 2994: 
 2995: /* Subroutines for `regex_compile'.  */
 2996: 
 2997: /* Store OP at LOC followed by two-byte integer parameter ARG.  */
 2998: 
 2999: static void
 3000: store_op1 (op, loc, arg)
 3001:     re_opcode_t op;
 3002:     unsigned char *loc;
 3003:     int arg;
 3004: {
 3005:   *loc = (unsigned char) op;
 3006:   STORE_NUMBER (loc + 1, arg);
 3007: }
 3008: 
 3009: 
 3010: /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
 3011: 
 3012: static void
 3013: store_op2 (op, loc, arg1, arg2)
 3014:     re_opcode_t op;
 3015:     unsigned char *loc;
 3016:     int arg1, arg2;
 3017: {
 3018:   *loc = (unsigned char) op;
 3019:   STORE_NUMBER (loc + 1, arg1);
 3020:   STORE_NUMBER (loc + 3, arg2);
 3021: }
 3022: 
 3023: 
 3024: /* Copy the bytes from LOC to END to open up three bytes of space at LOC
 3025:    for OP followed by two-byte integer parameter ARG.  */
 3026: 
 3027: static void
 3028: insert_op1 (op, loc, arg, end)
 3029:     re_opcode_t op;
 3030:     unsigned char *loc;
 3031:     int arg;
 3032:     unsigned char *end;
 3033: {
 3034:   register unsigned char *pfrom = end;
 3035:   register unsigned char *pto = end + 3;
 3036: 
 3037:   while (pfrom != loc)
 3038:     *--pto = *--pfrom;
 3039: 
 3040:   store_op1 (op, loc, arg);
 3041: }
 3042: 
 3043: 
 3044: /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
 3045: 
 3046: static void
 3047: insert_op2 (op, loc, arg1, arg2, end)
 3048:     re_opcode_t op;
 3049:     unsigned char *loc;
 3050:     int arg1, arg2;
 3051:     unsigned char *end;
 3052: {
 3053:   register unsigned char *pfrom = end;
 3054:   register unsigned char *pto = end + 5;
 3055: 
 3056:   while (pfrom != loc)
 3057:     *--pto = *--pfrom;
 3058: 
 3059:   store_op2 (op, loc, arg1, arg2);
 3060: }
 3061: 
 3062: 
 3063: /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
 3064:    after an alternative or a begin-subexpression.  We assume there is at
 3065:    least one character before the ^.  */
 3066: 
 3067: static boolean
 3068: at_begline_loc_p (pattern, p, syntax)
 3069:     const char *pattern, *p;
 3070:     reg_syntax_t syntax;
 3071: {
 3072:   const char *prev = p - 2;
 3073:   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
 3074: 
 3075:   return
 3076:        /* After a subexpression?  */
 3077:        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
 3078:        /* After an alternative?  */
 3079:     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
 3080: }
 3081: 
 3082: 
 3083: /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
 3084:    at least one character after the $, i.e., `P < PEND'.  */
 3085: 
 3086: static boolean
 3087: at_endline_loc_p (p, pend, syntax)
 3088:     const char *p, *pend;
 3089:     reg_syntax_t syntax;
 3090: {
 3091:   const char *next = p;
 3092:   boolean next_backslash = *next == '\\';
 3093:   const char *next_next = p + 1 < pend ? p + 1 : 0;
 3094: 
 3095:   return
 3096:        /* Before a subexpression?  */
 3097:        (syntax & RE_NO_BK_PARENS ? *next == ')'
 3098:         : next_backslash && next_next && *next_next == ')')
 3099:        /* Before an alternative?  */
 3100:     || (syntax & RE_NO_BK_VBAR ? *next == '|'
 3101:         : next_backslash && next_next && *next_next == '|');
 3102: }
 3103: 
 3104: 
 3105: /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
 3106:    false if it's not.  */
 3107: 
 3108: static boolean
 3109: group_in_compile_stack (compile_stack, regnum)
 3110:     compile_stack_type compile_stack;
 3111:     regnum_t regnum;
 3112: {
 3113:   int this_element;
 3114: 
 3115:   for (this_element = compile_stack.avail - 1;
 3116:        this_element >= 0;
 3117:        this_element--)
 3118:     if (compile_stack.stack[this_element].regnum == regnum)
 3119:       return true;
 3120: 
 3121:   return false;
 3122: }
 3123: 
 3124: 
 3125: /* Read the ending character of a range (in a bracket expression) from the
 3126:    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
 3127:    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
 3128:    Then we set the translation of all bits between the starting and
 3129:    ending characters (inclusive) in the compiled pattern B.
 3130: 
 3131:    Return an error code.
 3132: 
 3133:    We use these short variable names so we can use the same macros as
 3134:    `regex_compile' itself.  */
 3135: 
 3136: static reg_errcode_t
 3137: compile_range (p_ptr, pend, translate, syntax, b)
 3138:     const char **p_ptr, *pend;
 3139:     RE_TRANSLATE_TYPE translate;
 3140:     reg_syntax_t syntax;
 3141:     unsigned char *b;
 3142: {
 3143:   unsigned this_char;
 3144: 
 3145:   const char *p = *p_ptr;
 3146:   unsigned int range_start, range_end;
 3147: 
 3148:   if (p == pend)
 3149:     return REG_ERANGE;
 3150: 
 3151:   /* Even though the pattern is a signed `char *', we need to fetch
 3152:      with unsigned char *'s; if the high bit of the pattern character
 3153:      is set, the range endpoints will be negative if we fetch using a
 3154:      signed char *.
 3155: 
 3156:      We also want to fetch the endpoints without translating them; the
 3157:      appropriate translation is done in the bit-setting loop below.  */
 3158:   /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
 3159:   range_start = ((const unsigned char *) p)[-2];
 3160:   range_end   = ((const unsigned char *) p)[0];
 3161: 
 3162:   /* Have to increment the pointer into the pattern string, so the
 3163:      caller isn't still at the ending character.  */
 3164:   (*p_ptr)++;
 3165: 
 3166:   /* If the start is after the end, the range is empty.  */
 3167:   if (range_start > range_end)
 3168:     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
 3169: 
 3170:   /* Here we see why `this_char' has to be larger than an `unsigned
 3171:      char' -- the range is inclusive, so if `range_end' == 0xff
 3172:      (assuming 8-bit characters), we would otherwise go into an infinite
 3173:      loop, since all characters <= 0xff.  */
 3174:   for (this_char = range_start; this_char <= range_end; this_char++)
 3175:     {
 3176:       SET_LIST_BIT (TRANSLATE (this_char));
 3177:     }
 3178: 
 3179:   return REG_NOERROR;
 3180: }
 3181: 
 3182: /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
 3183:    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
 3184:    characters can start a string that matches the pattern.  This fastmap
 3185:    is used by re_search to skip quickly over impossible starting points.
 3186: 
 3187:    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
 3188:    area as BUFP->fastmap.
 3189: 
 3190:    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
 3191:    the pattern buffer.
 3192: 
 3193:    Returns 0 if we succeed, -2 if an internal error.   */
 3194: 
 3195: int
 3196: re_compile_fastmap (bufp)
 3197:      struct re_pattern_buffer *bufp;
 3198: {
 3199:   int j, k;
 3200: #ifdef MATCH_MAY_ALLOCATE
 3201:   fail_stack_type fail_stack;
 3202: #endif
 3203: #ifndef REGEX_MALLOC
 3204:   char *destination;
 3205: #endif
 3206: 
 3207:   register char *fastmap = bufp->fastmap;
 3208:   unsigned char *pattern = bufp->buffer;
 3209:   unsigned char *p = pattern;
 3210:   register unsigned char *pend = pattern + bufp->used;
 3211: 
 3212: #ifdef REL_ALLOC
 3213:   /* This holds the pointer to the failure stack, when
 3214:      it is allocated relocatably.  */
 3215:   fail_stack_elt_t *failure_stack_ptr;
 3216: #endif
 3217: 
 3218:   /* Assume that each path through the pattern can be null until
 3219:      proven otherwise.  We set this false at the bottom of switch
 3220:      statement, to which we get only if a particular path doesn't
 3221:      match the empty string.  */
 3222:   boolean path_can_be_null = true;
 3223: 
 3224:   /* We aren't doing a `succeed_n' to begin with.  */
 3225:   boolean succeed_n_p = false;
 3226: 
 3227:   assert (fastmap != NULL && p != NULL);
 3228: 
 3229:   INIT_FAIL_STACK ();
 3230:   memset (fastmap, 0, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
 3231:   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
 3232:   bufp->can_be_null = 0;
 3233: 
 3234:   while (1)
 3235:     {
 3236:       if (p == pend || *p == succeed)
 3237: 	{
 3238: 	  /* We have reached the (effective) end of pattern.  */
 3239: 	  if (!FAIL_STACK_EMPTY ())
 3240: 	    {
 3241: 	      bufp->can_be_null |= path_can_be_null;
 3242: 
 3243: 	      /* Reset for next path.  */
 3244: 	      path_can_be_null = true;
 3245: 
 3246: 	      p = fail_stack.stack[--fail_stack.avail].pointer;
 3247: 
 3248: 	      continue;
 3249: 	    }
 3250: 	  else
 3251: 	    break;
 3252: 	}
 3253: 
 3254:       /* We should never be about to go beyond the end of the pattern.  */
 3255:       assert (p < pend);
 3256: 
 3257:       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
 3258: 	{
 3259: 
 3260:         /* I guess the idea here is to simply not bother with a fastmap
 3261:            if a backreference is used, since it's too hard to figure out
 3262:            the fastmap for the corresponding group.  Setting
 3263:            `can_be_null' stops `re_search_2' from using the fastmap, so
 3264:            that is all we do.  */
 3265: 	case duplicate:
 3266: 	  bufp->can_be_null = 1;
 3267:           goto done;
 3268: 
 3269: 
 3270:       /* Following are the cases which match a character.  These end
 3271:          with `break'.  */
 3272: 
 3273: 	case exactn:
 3274:           fastmap[p[1]] = 1;
 3275: 	  break;
 3276: 
 3277: 
 3278:         case charset:
 3279:           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
 3280: 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
 3281:               fastmap[j] = 1;
 3282: 	  break;
 3283: 
 3284: 
 3285: 	case charset_not:
 3286: 	  /* Chars beyond end of map must be allowed.  */
 3287: 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
 3288:             fastmap[j] = 1;
 3289: 
 3290: 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
 3291: 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
 3292:               fastmap[j] = 1;
 3293:           break;
 3294: 
 3295: 
 3296: 	case wordchar:
 3297: 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
 3298: 	    if (SYNTAX (j) == Sword)
 3299: 	      fastmap[j] = 1;
 3300: 	  break;
 3301: 
 3302: 
 3303: 	case notwordchar:
 3304: 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
 3305: 	    if (SYNTAX (j) != Sword)
 3306: 	      fastmap[j] = 1;
 3307: 	  break;
 3308: 
 3309: 
 3310:         case anychar:
 3311: 	  {
 3312: 	    int fastmap_newline = fastmap['\n'];
 3313: 
 3314: 	    /* `.' matches anything ...  */
 3315: 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
 3316: 	      fastmap[j] = 1;
 3317: 
 3318: 	    /* ... except perhaps newline.  */
 3319: 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
 3320: 	      fastmap['\n'] = fastmap_newline;
 3321: 
 3322: 	    /* Return if we have already set `can_be_null'; if we have,
 3323: 	       then the fastmap is irrelevant.  Something's wrong here.  */
 3324: 	    else if (bufp->can_be_null)
 3325: 	      goto done;
 3326: 
 3327: 	    /* Otherwise, have to check alternative paths.  */
 3328: 	    break;
 3329: 	  }
 3330: 
 3331: #ifdef emacs
 3332:         case syntaxspec:
 3333: 	  k = *p++;
 3334: 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
 3335: 	    if (SYNTAX (j) == (enum syntaxcode) k)
 3336: 	      fastmap[j] = 1;
 3337: 	  break;
 3338: 
 3339: 
 3340: 	case notsyntaxspec:
 3341: 	  k = *p++;
 3342: 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
 3343: 	    if (SYNTAX (j) != (enum syntaxcode) k)
 3344: 	      fastmap[j] = 1;
 3345: 	  break;
 3346: 
 3347: 
 3348:       /* All cases after this match the empty string.  These end with
 3349:          `continue'.  */
 3350: 
 3351: 
 3352: 	case before_dot:
 3353: 	case at_dot:
 3354: 	case after_dot:
 3355:           continue;
 3356: #endif /* emacs */
 3357: 
 3358: 
 3359:         case no_op:
 3360:         case begline:
 3361:         case endline:
 3362: 	case begbuf:
 3363: 	case endbuf:
 3364: 	case wordbound:
 3365: 	case notwordbound:
 3366: 	case wordbeg:
 3367: 	case wordend:
 3368:         case push_dummy_failure:
 3369:           continue;
 3370: 
 3371: 
 3372: 	case jump_n:
 3373:         case pop_failure_jump:
 3374: 	case maybe_pop_jump:
 3375: 	case jump:
 3376:         case jump_past_alt:
 3377: 	case dummy_failure_jump:
 3378:           EXTRACT_NUMBER_AND_INCR (j, p);
 3379: 	  p += j;
 3380: 	  if (j > 0)
 3381: 	    continue;
 3382: 
 3383:           /* Jump backward implies we just went through the body of a
 3384:              loop and matched nothing.  Opcode jumped to should be
 3385:              `on_failure_jump' or `succeed_n'.  Just treat it like an
 3386:              ordinary jump.  For a * loop, it has pushed its failure
 3387:              point already; if so, discard that as redundant.  */
 3388:           if ((re_opcode_t) *p != on_failure_jump
 3389: 	      && (re_opcode_t) *p != succeed_n)
 3390: 	    continue;
 3391: 
 3392:           p++;
 3393:           EXTRACT_NUMBER_AND_INCR (j, p);
 3394:           p += j;
 3395: 
 3396:           /* If what's on the stack is where we are now, pop it.  */
 3397:           if (!FAIL_STACK_EMPTY ()
 3398: 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
 3399:             fail_stack.avail--;
 3400: 
 3401:           continue;
 3402: 
 3403: 
 3404:         case on_failure_jump:
 3405:         case on_failure_keep_string_jump:
 3406: 	handle_on_failure_jump:
 3407:           EXTRACT_NUMBER_AND_INCR (j, p);
 3408: 
 3409:           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
 3410:              end of the pattern.  We don't want to push such a point,
 3411:              since when we restore it above, entering the switch will
 3412:              increment `p' past the end of the pattern.  We don't need
 3413:              to push such a point since we obviously won't find any more
 3414:              fastmap entries beyond `pend'.  Such a pattern can match
 3415:              the null string, though.  */
 3416:           if (p + j < pend)
 3417:             {
 3418:               if (!PUSH_PATTERN_OP (p + j, fail_stack))
 3419: 		{
 3420: 		  RESET_FAIL_STACK ();
 3421: 		  return -2;
 3422: 		}
 3423:             }
 3424:           else
 3425:             bufp->can_be_null = 1;
 3426: 
 3427:           if (succeed_n_p)
 3428:             {
 3429:               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
 3430:               succeed_n_p = false;
 3431: 	    }
 3432: 
 3433:           continue;
 3434: 
 3435: 
 3436: 	case succeed_n:
 3437:           /* Get to the number of times to succeed.  */
 3438:           p += 2;
 3439: 
 3440:           /* Increment p past the n for when k != 0.  */
 3441:           EXTRACT_NUMBER_AND_INCR (k, p);
 3442:           if (k == 0)
 3443: 	    {
 3444:               p -= 4;
 3445:   	      succeed_n_p = true;  /* Spaghetti code alert.  */
 3446:               goto handle_on_failure_jump;
 3447:             }
 3448:           continue;
 3449: 
 3450: 
 3451: 	case set_number_at:
 3452:           p += 4;
 3453:           continue;
 3454: 
 3455: 
 3456: 	case start_memory:
 3457:         case stop_memory:
 3458: 	  p += 2;
 3459: 	  continue;
 3460: 
 3461: 
 3462: 	default:
 3463:           abort (); /* We have listed all the cases.  */
 3464:         } /* switch *p++ */
 3465: 
 3466:       /* Getting here means we have found the possible starting
 3467:          characters for one path of the pattern -- and that the empty
 3468:          string does not match.  We need not follow this path further.
 3469:          Instead, look at the next alternative (remembered on the
 3470:          stack), or quit if no more.  The test at the top of the loop
 3471:          does these things.  */
 3472:       path_can_be_null = false;
 3473:       p = pend;
 3474:     } /* while p */
 3475: 
 3476:   /* Set `can_be_null' for the last path (also the first path, if the
 3477:      pattern is empty).  */
 3478:   bufp->can_be_null |= path_can_be_null;
 3479: 
 3480:  done:
 3481:   RESET_FAIL_STACK ();
 3482:   return 0;
 3483: } /* re_compile_fastmap */
 3484: #ifdef _LIBC
 3485: weak_alias (__re_compile_fastmap, re_compile_fastmap)
 3486: #endif
 3487: 
 3488: /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 3489:    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 3490:    this memory for recording register information.  STARTS and ENDS
 3491:    must be allocated using the malloc library routine, and must each
 3492:    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 3493: 
 3494:    If NUM_REGS == 0, then subsequent matches should allocate their own
 3495:    register data.
 3496: 
 3497:    Unless this function is called, the first search or match using
 3498:    PATTERN_BUFFER will allocate its own register data, without
 3499:    freeing the old data.  */
 3500: 
 3501: void
 3502: re_set_registers (bufp, regs, num_regs, starts, ends)
 3503:     struct re_pattern_buffer *bufp;
 3504:     struct re_registers *regs;
 3505:     unsigned num_regs;
 3506:     regoff_t *starts, *ends;
 3507: {
 3508:   if (num_regs)
 3509:     {
 3510:       bufp->regs_allocated = REGS_REALLOCATE;
 3511:       regs->num_regs = num_regs;
 3512:       regs->start = starts;
 3513:       regs->end = ends;
 3514:     }
 3515:   else
 3516:     {
 3517:       bufp->regs_allocated = REGS_UNALLOCATED;
 3518:       regs->num_regs = 0;
 3519:       regs->start = regs->end = (regoff_t *) 0;
 3520:     }
 3521: }
 3522: #ifdef _LIBC
 3523: weak_alias (__re_set_registers, re_set_registers)
 3524: #endif
 3525: 
 3526: /* Searching routines.  */
 3527: 
 3528: /* Like re_search_2, below, but only one string is specified, and
 3529:    doesn't let you say where to stop matching. */
 3530: 
 3531: int
 3532: re_search (bufp, string, size, startpos, range, regs)
 3533:      struct re_pattern_buffer *bufp;
 3534:      const char *string;
 3535:      int size, startpos, range;
 3536:      struct re_registers *regs;
 3537: {
 3538:   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
 3539: 		      regs, size);
 3540: }
 3541: #ifdef _LIBC
 3542: weak_alias (__re_search, re_search)
 3543: #endif
 3544: 
 3545: 
 3546: /* Using the compiled pattern in BUFP->buffer, first tries to match the
 3547:    virtual concatenation of STRING1 and STRING2, starting first at index
 3548:    STARTPOS, then at STARTPOS + 1, and so on.
 3549: 
 3550:    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
 3551: 
 3552:    RANGE is how far to scan while trying to match.  RANGE = 0 means try
 3553:    only at STARTPOS; in general, the last start tried is STARTPOS +
 3554:    RANGE.
 3555: 
 3556:    In REGS, return the indices of the virtual concatenation of STRING1
 3557:    and STRING2 that matched the entire BUFP->buffer and its contained
 3558:    subexpressions.
 3559: 
 3560:    Do not consider matching one past the index STOP in the virtual
 3561:    concatenation of STRING1 and STRING2.
 3562: 
 3563:    We return either the position in the strings at which the match was
 3564:    found, -1 if no match, or -2 if error (such as failure
 3565:    stack overflow).  */
 3566: 
 3567: int
 3568: re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
 3569:      struct re_pattern_buffer *bufp;
 3570:      const char *string1, *string2;
 3571:      int size1, size2;
 3572:      int startpos;
 3573:      int range;
 3574:      struct re_registers *regs;
 3575:      int stop;
 3576: {
 3577:   int val;
 3578:   register char *fastmap = bufp->fastmap;
 3579:   register RE_TRANSLATE_TYPE translate = bufp->translate;
 3580:   int total_size = size1 + size2;
 3581:   int endpos = startpos + range;
 3582: 
 3583:   /* Check for out-of-range STARTPOS.  */
 3584:   if (startpos < 0 || startpos > total_size)
 3585:     return -1;
 3586: 
 3587:   /* Fix up RANGE if it might eventually take us outside
 3588:      the virtual concatenation of STRING1 and STRING2.
 3589:      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
 3590:   if (endpos < 0)
 3591:     range = 0 - startpos;
 3592:   else if (endpos > total_size)
 3593:     range = total_size - startpos;
 3594: 
 3595:   /* If the search isn't to be a backwards one, don't waste time in a
 3596:      search for a pattern that must be anchored.  */
 3597:   if (bufp->used > 0 && range > 0
 3598:       && ((re_opcode_t) bufp->buffer[0] == begbuf
 3599: 	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
 3600: 	  || ((re_opcode_t) bufp->buffer[0] == begline
 3601: 	      && !bufp->newline_anchor)))
 3602:     {
 3603:       if (startpos > 0)
 3604: 	return -1;
 3605:       else
 3606: 	range = 1;
 3607:     }
 3608: 
 3609: #ifdef emacs
 3610:   /* In a forward search for something that starts with \=.
 3611:      don't keep searching past point.  */
 3612:   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
 3613:     {
 3614:       range = PT - startpos;
 3615:       if (range <= 0)
 3616: 	return -1;
 3617:     }
 3618: #endif /* emacs */
 3619: 
 3620:   /* Update the fastmap now if not correct already.  */
 3621:   if (fastmap && !bufp->fastmap_accurate)
 3622:     if (re_compile_fastmap (bufp) == -2)
 3623:       return -2;
 3624: 
 3625:   /* Loop through the string, looking for a place to start matching.  */
 3626:   for (;;)
 3627:     {
 3628:       /* If a fastmap is supplied, skip quickly over characters that
 3629:          cannot be the start of a match.  If the pattern can match the
 3630:          null string, however, we don't need to skip characters; we want
 3631:          the first null string.  */
 3632:       if (fastmap && startpos < total_size && !bufp->can_be_null)
 3633: 	{
 3634: 	  if (range > 0)	/* Searching forwards.  */
 3635: 	    {
 3636: 	      register const char *d;
 3637: 	      register int lim = 0;
 3638: 	      int irange = range;
 3639: 
 3640:               if (startpos < size1 && startpos + range >= size1)
 3641:                 lim = range - (size1 - startpos);
 3642: 
 3643: 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
 3644: 
 3645:               /* Written out as an if-else to avoid testing `translate'
 3646:                  inside the loop.  */
 3647: 	      if (translate)
 3648:                 while (range > lim
 3649:                        && !fastmap[(unsigned char)
 3650: 				   translate[(unsigned char) *d++]])
 3651:                   range--;
 3652: 	      else
 3653:                 while (range > lim && !fastmap[(unsigned char) *d++])
 3654:                   range--;
 3655: 
 3656: 	      startpos += irange - range;
 3657: 	    }
 3658: 	  else				/* Searching backwards.  */
 3659: 	    {
 3660: 	      register char c = (size1 == 0 || startpos >= size1
 3661:                                  ? string2[startpos - size1]
 3662:                                  : string1[startpos]);
 3663: 
 3664: 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
 3665: 		goto advance;
 3666: 	    }
 3667: 	}
 3668: 
 3669:       /* If can't match the null string, and that's all we have left, fail.  */
 3670:       if (range >= 0 && startpos == total_size && fastmap
 3671:           && !bufp->can_be_null)
 3672: 	return -1;
 3673: 
 3674:       val = re_match_2_internal (bufp, string1, size1, string2, size2,
 3675: 				 startpos, regs, stop);
 3676: #ifndef REGEX_MALLOC
 3677: # ifdef C_ALLOCA
 3678:       alloca (0);
 3679: # endif
 3680: #endif
 3681: 
 3682:       if (val >= 0)
 3683: 	return startpos;
 3684: 
 3685:       if (val == -2)
 3686: 	return -2;
 3687: 
 3688:     advance:
 3689:       if (!range)
 3690:         break;
 3691:       else if (range > 0)
 3692:         {
 3693:           range--;
 3694:           startpos++;
 3695:         }
 3696:       else
 3697:         {
 3698:           range++;
 3699:           startpos--;
 3700:         }
 3701:     }
 3702:   return -1;
 3703: } /* re_search_2 */
 3704: #ifdef _LIBC
 3705: weak_alias (__re_search_2, re_search_2)
 3706: #endif
 3707: 
 3708: /* This converts PTR, a pointer into one of the search strings `string1'
 3709:    and `string2' into an offset from the beginning of that string.  */
 3710: #define POINTER_TO_OFFSET(ptr)			\
 3711:   (FIRST_STRING_P (ptr)				\
 3712:    ? ((regoff_t) ((ptr) - string1))		\
 3713:    : ((regoff_t) ((ptr) - string2 + size1)))
 3714: 
 3715: /* Macros for dealing with the split strings in re_match_2.  */
 3716: 
 3717: #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
 3718: 
 3719: /* Call before fetching a character with *d.  This switches over to
 3720:    string2 if necessary.  */
 3721: #define PREFETCH()							\
 3722:   while (d == dend)						    	\
 3723:     {									\
 3724:       /* End of string2 => fail.  */					\
 3725:       if (dend == end_match_2) 						\
 3726:         goto fail;							\
 3727:       /* End of string1 => advance to string2.  */ 			\
 3728:       d = string2;						        \
 3729:       dend = end_match_2;						\
 3730:     }
 3731: 
 3732: 
 3733: /* Test if at very beginning or at very end of the virtual concatenation
 3734:    of `string1' and `string2'.  If only one string, it's `string2'.  */
 3735: #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
 3736: #define AT_STRINGS_END(d) ((d) == end2)
 3737: 
 3738: 
 3739: /* Test if D points to a character which is word-constituent.  We have
 3740:    two special cases to check for: if past the end of string1, look at
 3741:    the first character in string2; and if before the beginning of
 3742:    string2, look at the last character in string1.  */
 3743: #define WORDCHAR_P(d)							\
 3744:   (SYNTAX ((d) == end1 ? *string2					\
 3745:            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
 3746:    == Sword)
 3747: 
 3748: /* Disabled due to a compiler bug -- see comment at case wordbound */
 3749: #if 0
 3750: /* Test if the character before D and the one at D differ with respect
 3751:    to being word-constituent.  */
 3752: #define AT_WORD_BOUNDARY(d)						\
 3753:   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
 3754:    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
 3755: #endif
 3756: 
 3757: /* Free everything we malloc.  */
 3758: #ifdef MATCH_MAY_ALLOCATE
 3759: # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
 3760: # define FREE_VARIABLES()						\
 3761:   do {									\
 3762:     REGEX_FREE_STACK (fail_stack.stack);				\
 3763:     FREE_VAR (regstart);						\
 3764:     FREE_VAR (regend);							\
 3765:     FREE_VAR (old_regstart);						\
 3766:     FREE_VAR (old_regend);						\
 3767:     FREE_VAR (best_regstart);						\
 3768:     FREE_VAR (best_regend);						\
 3769:     FREE_VAR (reg_info);						\
 3770:     FREE_VAR (reg_dummy);						\
 3771:     FREE_VAR (reg_info_dummy);						\
 3772:   } while (0)
 3773: #else
 3774: # define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
 3775: #endif /* not MATCH_MAY_ALLOCATE */
 3776: 
 3777: /* These values must meet several constraints.  They must not be valid
 3778:    register values; since we have a limit of 255 registers (because
 3779:    we use only one byte in the pattern for the register number), we can
 3780:    use numbers larger than 255.  They must differ by 1, because of
 3781:    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
 3782:    be larger than the value for the highest register, so we do not try
 3783:    to actually save any registers when none are active.  */
 3784: #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
 3785: #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
 3786: 
 3787: /* Matching routines.  */
 3788: 
 3789: #ifndef emacs   /* Emacs never uses this.  */
 3790: /* re_match is like re_match_2 except it takes only a single string.  */
 3791: 
 3792: int
 3793: re_match (bufp, string, size, pos, regs)
 3794:      struct re_pattern_buffer *bufp;
 3795:      const char *string;
 3796:      int size, pos;
 3797:      struct re_registers *regs;
 3798: {
 3799:   int result = re_match_2_internal (bufp, NULL, 0, string, size,
 3800: 				    pos, regs, size);
 3801: # ifndef REGEX_MALLOC
 3802: #  ifdef C_ALLOCA
 3803:   alloca (0);
 3804: #  endif
 3805: # endif
 3806:   return result;
 3807: }
 3808: # ifdef _LIBC
 3809: weak_alias (__re_match, re_match)
 3810: # endif
 3811: #endif /* not emacs */
 3812: 
 3813: static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
 3814: 						    unsigned char *end,
 3815: 						register_info_type *reg_info));
 3816: static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
 3817: 						  unsigned char *end,
 3818: 						register_info_type *reg_info));
 3819: static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
 3820: 							unsigned char *end,
 3821: 						register_info_type *reg_info));
 3822: static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
 3823: 				     int len, char *translate));
 3824: 
 3825: /* re_match_2 matches the compiled pattern in BUFP against the
 3826:    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
 3827:    and SIZE2, respectively).  We start matching at POS, and stop
 3828:    matching at STOP.
 3829: 
 3830:    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
 3831:    store offsets for the substring each group matched in REGS.  See the
 3832:    documentation for exactly how many groups we fill.
 3833: 
 3834:    We return -1 if no match, -2 if an internal error (such as the
 3835:    failure stack overflowing).  Otherwise, we return the length of the
 3836:    matched substring.  */
 3837: 
 3838: int
 3839: re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
 3840:      struct re_pattern_buffer *bufp;
 3841:      const char *string1, *string2;
 3842:      int size1, size2;
 3843:      int pos;
 3844:      struct re_registers *regs;
 3845:      int stop;
 3846: {
 3847:   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
 3848: 				    pos, regs, stop);
 3849: #ifndef REGEX_MALLOC
 3850: # ifdef C_ALLOCA
 3851:   alloca (0);
 3852: # endif
 3853: #endif
 3854:   return result;
 3855: }
 3856: #ifdef _LIBC
 3857: weak_alias (__re_match_2, re_match_2)
 3858: #endif
 3859: 
 3860: /* This is a separate function so that we can force an alloca cleanup
 3861:    afterwards.  */
 3862: static int
 3863: re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 3864:      struct re_pattern_buffer *bufp;
 3865:      const char *string1, *string2;
 3866:      int size1, size2;
 3867:      int pos;
 3868:      struct re_registers *regs;
 3869:      int stop;
 3870: {
 3871:   /* General temporaries.  */
 3872:   int mcnt;
 3873:   unsigned char *p1;
 3874: 
 3875:   /* Just past the end of the corresponding string.  */
 3876:   const char *end1, *end2;
 3877: 
 3878:   /* Pointers into string1 and string2, just past the last characters in
 3879:      each to consider matching.  */
 3880:   const char *end_match_1, *end_match_2;
 3881: 
 3882:   /* Where we are in the data, and the end of the current string.  */
 3883:   const char *d, *dend;
 3884: 
 3885:   /* Where we are in the pattern, and the end of the pattern.  */
 3886:   unsigned char *p = bufp->buffer;
 3887:   register unsigned char *pend = p + bufp->used;
 3888: 
 3889:   /* Mark the opcode just after a start_memory, so we can test for an
 3890:      empty subpattern when we get to the stop_memory.  */
 3891:   unsigned char *just_past_start_mem = 0;
 3892: 
 3893:   /* We use this to map every character in the string.  */
 3894:   RE_TRANSLATE_TYPE translate = bufp->translate;
 3895: 
 3896:   /* Failure point stack.  Each place that can handle a failure further
 3897:      down the line pushes a failure point on this stack.  It consists of
 3898:      restart, regend, and reg_info for all registers corresponding to
 3899:      the subexpressions we're currently inside, plus the number of such
 3900:      registers, and, finally, two char *'s.  The first char * is where
 3901:      to resume scanning the pattern; the second one is where to resume
 3902:      scanning the strings.  If the latter is zero, the failure point is
 3903:      a ``dummy''; if a failure happens and the failure point is a dummy,
 3904:      it gets discarded and the next next one is tried.  */
 3905: #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
 3906:   fail_stack_type fail_stack;
 3907: #endif
 3908: #ifdef DEBUG
 3909:   static unsigned failure_id;
 3910:   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
 3911: #endif
 3912: 
 3913: #ifdef REL_ALLOC
 3914:   /* This holds the pointer to the failure stack, when
 3915:      it is allocated relocatably.  */
 3916:   fail_stack_elt_t *failure_stack_ptr;
 3917: #endif
 3918: 
 3919:   /* We fill all the registers internally, independent of what we
 3920:      return, for use in backreferences.  The number here includes
 3921:      an element for register zero.  */
 3922:   size_t num_regs = bufp->re_nsub + 1;
 3923: 
 3924:   /* The currently active registers.  */
 3925:   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
 3926:   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
 3927: 
 3928:   /* Information on the contents of registers. These are pointers into
 3929:      the input strings; they record just what was matched (on this
 3930:      attempt) by a subexpression part of the pattern, that is, the
 3931:      regnum-th regstart pointer points to where in the pattern we began
 3932:      matching and the regnum-th regend points to right after where we
 3933:      stopped matching the regnum-th subexpression.  (The zeroth register
 3934:      keeps track of what the whole pattern matches.)  */
 3935: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
 3936:   const char **regstart, **regend;
 3937: #endif
 3938: 
 3939:   /* If a group that's operated upon by a repetition operator fails to
 3940:      match anything, then the register for its start will need to be
 3941:      restored because it will have been set to wherever in the string we
 3942:      are when we last see its open-group operator.  Similarly for a
 3943:      register's end.  */
 3944: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
 3945:   const char **old_regstart, **old_regend;
 3946: #endif
 3947: 
 3948:   /* The is_active field of reg_info helps us keep track of which (possibly
 3949:      nested) subexpressions we are currently in. The matched_something
 3950:      field of reg_info[reg_num] helps us tell whether or not we have
 3951:      matched any of the pattern so far this time through the reg_num-th
 3952:      subexpression.  These two fields get reset each time through any
 3953:      loop their register is in.  */
 3954: #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
 3955:   register_info_type *reg_info;
 3956: #endif
 3957: 
 3958:   /* The following record the register info as found in the above
 3959:      variables when we find a match better than any we've seen before.
 3960:      This happens as we backtrack through the failure points, which in
 3961:      turn happens only if we have not yet matched the entire string. */
 3962:   unsigned best_regs_set = false;
 3963: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
 3964:   const char **best_regstart, **best_regend;
 3965: #endif
 3966: 
 3967:   /* Logically, this is `best_regend[0]'.  But we don't want to have to
 3968:      allocate space for that if we're not allocating space for anything
 3969:      else (see below).  Also, we never need info about register 0 for
 3970:      any of the other register vectors, and it seems rather a kludge to
 3971:      treat `best_regend' differently than the rest.  So we keep track of
 3972:      the end of the best match so far in a separate variable.  We
 3973:      initialize this to NULL so that when we backtrack the first time
 3974:      and need to test it, it's not garbage.  */
 3975:   const char *match_end = NULL;
 3976: 
 3977:   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
 3978:   int set_regs_matched_done = 0;
 3979: 
 3980:   /* Used when we pop values we don't care about.  */
 3981: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
 3982:   const char **reg_dummy;
 3983:   register_info_type *reg_info_dummy;
 3984: #endif
 3985: 
 3986: #ifdef DEBUG
 3987:   /* Counts the total number of registers pushed.  */
 3988:   unsigned num_regs_pushed = 0;
 3989: #endif
 3990: 
 3991:   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
 3992: 
 3993:   INIT_FAIL_STACK ();
 3994: 
 3995: #ifdef MATCH_MAY_ALLOCATE
 3996:   /* Do not bother to initialize all the register variables if there are
 3997:      no groups in the pattern, as it takes a fair amount of time.  If
 3998:      there are groups, we include space for register 0 (the whole
 3999:      pattern), even though we never use it, since it simplifies the
 4000:      array indexing.  We should fix this.  */
 4001:   if (bufp->re_nsub)
 4002:     {
 4003:       regstart = REGEX_TALLOC (num_regs, const char *);
 4004:       regend = REGEX_TALLOC (num_regs, const char *);
 4005:       old_regstart = REGEX_TALLOC (num_regs, const char *);
 4006:       old_regend = REGEX_TALLOC (num_regs, const char *);
 4007:       best_regstart = REGEX_TALLOC (num_regs, const char *);
 4008:       best_regend = REGEX_TALLOC (num_regs, const char *);
 4009:       reg_info = REGEX_TALLOC (num_regs, register_info_type);
 4010:       reg_dummy = REGEX_TALLOC (num_regs, const char *);
 4011:       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
 4012: 
 4013:       if (!(regstart && regend && old_regstart && old_regend && reg_info
 4014:             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
 4015:         {
 4016:           FREE_VARIABLES ();
 4017:           return -2;
 4018:         }
 4019:     }
 4020:   else
 4021:     {
 4022:       /* We must initialize all our variables to NULL, so that
 4023:          `FREE_VARIABLES' doesn't try to free them.  */
 4024:       regstart = regend = old_regstart = old_regend = best_regstart
 4025:         = best_regend = reg_dummy = NULL;
 4026:       reg_info = reg_info_dummy = (register_info_type *) NULL;
 4027:     }
 4028: #endif /* MATCH_MAY_ALLOCATE */
 4029: 
 4030:   /* The starting position is bogus.  */
 4031:   if (pos < 0 || pos > size1 + size2)
 4032:     {
 4033:       FREE_VARIABLES ();
 4034:       return -1;
 4035:     }
 4036: 
 4037:   /* Initialize subexpression text positions to -1 to mark ones that no
 4038:      start_memory/stop_memory has been seen for. Also initialize the
 4039:      register information struct.  */
 4040:   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
 4041:     {
 4042:       regstart[mcnt] = regend[mcnt]
 4043:         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
 4044: 
 4045:       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
 4046:       IS_ACTIVE (reg_info[mcnt]) = 0;
 4047:       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
 4048:       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
 4049:     }
 4050: 
 4051:   /* We move `string1' into `string2' if the latter's empty -- but not if
 4052:      `string1' is null.  */
 4053:   if (size2 == 0 && string1 != NULL)
 4054:     {
 4055:       string2 = string1;
 4056:       size2 = size1;
 4057:       string1 = 0;
 4058:       size1 = 0;
 4059:     }
 4060:   end1 = string1 + size1;
 4061:   end2 = string2 + size2;
 4062: 
 4063:   /* Compute where to stop matching, within the two strings.  */
 4064:   if (stop <= size1)
 4065:     {
 4066:       end_match_1 = string1 + stop;
 4067:       end_match_2 = string2;
 4068:     }
 4069:   else
 4070:     {
 4071:       end_match_1 = end1;
 4072:       end_match_2 = string2 + stop - size1;
 4073:     }
 4074: 
 4075:   /* `p' scans through the pattern as `d' scans through the data.
 4076:      `dend' is the end of the input string that `d' points within.  `d'
 4077:      is advanced into the following input string whenever necessary, but
 4078:      this happens before fetching; therefore, at the beginning of the
 4079:      loop, `d' can be pointing at the end of a string, but it cannot
 4080:      equal `string2'.  */
 4081:   if (size1 > 0 && pos <= size1)
 4082:     {
 4083:       d = string1 + pos;
 4084:       dend = end_match_1;
 4085:     }
 4086:   else
 4087:     {
 4088:       d = string2 + pos - size1;
 4089:       dend = end_match_2;
 4090:     }
 4091: 
 4092:   DEBUG_PRINT1 ("The compiled pattern is:\n");
 4093:   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
 4094:   DEBUG_PRINT1 ("The string to match is: `");
 4095:   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
 4096:   DEBUG_PRINT1 ("'\n");
 4097: 
 4098:   /* This loops over pattern commands.  It exits by returning from the
 4099:      function if the match is complete, or it drops through if the match
 4100:      fails at this starting point in the input data.  */
 4101:   for (;;)
 4102:     {
 4103: #ifdef _LIBC
 4104:       DEBUG_PRINT2 ("\n%p: ", p);
 4105: #else
 4106:       DEBUG_PRINT2 ("\n0x%x: ", p);
 4107: #endif
 4108: 
 4109:       if (p == pend)
 4110: 	{ /* End of pattern means we might have succeeded.  */
 4111:           DEBUG_PRINT1 ("end of pattern ... ");
 4112: 
 4113: 	  /* If we haven't matched the entire string, and we want the
 4114:              longest match, try backtracking.  */
 4115:           if (d != end_match_2)
 4116: 	    {
 4117: 	      /* 1 if this match ends in the same string (string1 or string2)
 4118: 		 as the best previous match.  */
 4119: 	      boolean same_str_p = (FIRST_STRING_P (match_end)
 4120: 				    == MATCHING_IN_FIRST_STRING);
 4121: 	      /* 1 if this match is the best seen so far.  */
 4122: 	      boolean best_match_p;
 4123: 
 4124: 	      /* AIX compiler got confused when this was combined
 4125: 		 with the previous declaration.  */
 4126: 	      if (same_str_p)
 4127: 		best_match_p = d > match_end;
 4128: 	      else
 4129: 		best_match_p = !MATCHING_IN_FIRST_STRING;
 4130: 
 4131:               DEBUG_PRINT1 ("backtracking.\n");
 4132: 
 4133:               if (!FAIL_STACK_EMPTY ())
 4134:                 { /* More failure points to try.  */
 4135: 
 4136:                   /* If exceeds best match so far, save it.  */
 4137:                   if (!best_regs_set || best_match_p)
 4138:                     {
 4139:                       best_regs_set = true;
 4140:                       match_end = d;
 4141: 
 4142:                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
 4143: 
 4144:                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
 4145:                         {
 4146:                           best_regstart[mcnt] = regstart[mcnt];
 4147:                           best_regend[mcnt] = regend[mcnt];
 4148:                         }
 4149:                     }
 4150:                   goto fail;
 4151:                 }
 4152: 
 4153:               /* If no failure points, don't restore garbage.  And if
 4154:                  last match is real best match, don't restore second
 4155:                  best one. */
 4156:               else if (best_regs_set && !best_match_p)
 4157:                 {
 4158:   	        restore_best_regs:
 4159:                   /* Restore best match.  It may happen that `dend ==
 4160:                      end_match_1' while the restored d is in string2.
 4161:                      For example, the pattern `x.*y.*z' against the
 4162:                      strings `x-' and `y-z-', if the two strings are
 4163:                      not consecutive in memory.  */
 4164:                   DEBUG_PRINT1 ("Restoring best registers.\n");
 4165: 
 4166:                   d = match_end;
 4167:                   dend = ((d >= string1 && d <= end1)
 4168: 		           ? end_match_1 : end_match_2);
 4169: 
 4170: 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
 4171: 		    {
 4172: 		      regstart[mcnt] = best_regstart[mcnt];
 4173: 		      regend[mcnt] = best_regend[mcnt];
 4174: 		    }
 4175:                 }
 4176:             } /* d != end_match_2 */
 4177: 
 4178: 	succeed_label:
 4179:           DEBUG_PRINT1 ("Accepting match.\n");
 4180: 
 4181:           /* If caller wants register contents data back, do it.  */
 4182:           if (regs && !bufp->no_sub)
 4183: 	    {
 4184:               /* Have the register data arrays been allocated?  */
 4185:               if (bufp->regs_allocated == REGS_UNALLOCATED)
 4186:                 { /* No.  So allocate them with malloc.  We need one
 4187:                      extra element beyond `num_regs' for the `-1' marker
 4188:                      GNU code uses.  */
 4189:                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
 4190:                   regs->start = TALLOC (regs->num_regs, regoff_t);
 4191:                   regs->end = TALLOC (regs->num_regs, regoff_t);
 4192:                   if (regs->start == NULL || regs->end == NULL)
 4193: 		    {
 4194: 		      FREE_VARIABLES ();
 4195: 		      return -2;
 4196: 		    }
 4197:                   bufp->regs_allocated = REGS_REALLOCATE;
 4198:                 }
 4199:               else if (bufp->regs_allocated == REGS_REALLOCATE)
 4200:                 { /* Yes.  If we need more elements than were already
 4201:                      allocated, reallocate them.  If we need fewer, just
 4202:                      leave it alone.  */
 4203:                   if (regs->num_regs < num_regs + 1)
 4204:                     {
 4205:                       regs->num_regs = num_regs + 1;
 4206:                       RETALLOC (regs->start, regs->num_regs, regoff_t);
 4207:                       RETALLOC (regs->end, regs->num_regs, regoff_t);
 4208:                       if (regs->start == NULL || regs->end == NULL)
 4209: 			{
 4210: 			  FREE_VARIABLES ();
 4211: 			  return -2;
 4212: 			}
 4213:                     }
 4214:                 }
 4215:               else
 4216: 		{
 4217: 		  /* These braces fend off a "empty body in an else-statement"
 4218: 		     warning under GCC when assert expands to nothing.  */
 4219: 		  assert (bufp->regs_allocated == REGS_FIXED);
 4220: 		}
 4221: 
 4222:               /* Convert the pointer data in `regstart' and `regend' to
 4223:                  indices.  Register zero has to be set differently,
 4224:                  since we haven't kept track of any info for it.  */
 4225:               if (regs->num_regs > 0)
 4226:                 {
 4227:                   regs->start[0] = pos;
 4228:                   regs->end[0] = (MATCHING_IN_FIRST_STRING
 4229: 				  ? ((regoff_t) (d - string1))
 4230: 			          : ((regoff_t) (d - string2 + size1)));
 4231:                 }
 4232: 
 4233:               /* Go through the first `min (num_regs, regs->num_regs)'
 4234:                  registers, since that is all we initialized.  */
 4235: 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
 4236: 		   mcnt++)
 4237: 		{
 4238:                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
 4239:                     regs->start[mcnt] = regs->end[mcnt] = -1;
 4240:                   else
 4241:                     {
 4242: 		      regs->start[mcnt]
 4243: 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
 4244:                       regs->end[mcnt]
 4245: 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
 4246:                     }
 4247: 		}
 4248: 
 4249:               /* If the regs structure we return has more elements than
 4250:                  were in the pattern, set the extra elements to -1.  If
 4251:                  we (re)allocated the registers, this is the case,
 4252:                  because we always allocate enough to have at least one
 4253:                  -1 at the end.  */
 4254:               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
 4255:                 regs->start[mcnt] = regs->end[mcnt] = -1;
 4256: 	    } /* regs && !bufp->no_sub */
 4257: 
 4258:           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
 4259:                         nfailure_points_pushed, nfailure_points_popped,
 4260:                         nfailure_points_pushed - nfailure_points_popped);
 4261:           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
 4262: 
 4263:           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
 4264: 			    ? string1
 4265: 			    : string2 - size1);
 4266: 
 4267:           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
 4268: 
 4269:           FREE_VARIABLES ();
 4270:           return mcnt;
 4271:         }
 4272: 
 4273:       /* Otherwise match next pattern command.  */
 4274:       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
 4275: 	{
 4276:         /* Ignore these.  Used to ignore the n of succeed_n's which
 4277:            currently have n == 0.  */
 4278:         case no_op:
 4279:           DEBUG_PRINT1 ("EXECUTING no_op.\n");
 4280:           break;
 4281: 
 4282: 	case succeed:
 4283:           DEBUG_PRINT1 ("EXECUTING succeed.\n");
 4284: 	  goto succeed_label;
 4285: 
 4286:         /* Match the next n pattern characters exactly.  The following
 4287:            byte in the pattern defines n, and the n bytes after that
 4288:            are the characters to match.  */
 4289: 	case exactn:
 4290: 	  mcnt = *p++;
 4291:           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
 4292: 
 4293:           /* This is written out as an if-else so we don't waste time
 4294:              testing `translate' inside the loop.  */
 4295:           if (translate)
 4296: 	    {
 4297: 	      do
 4298: 		{
 4299: 		  PREFETCH ();
 4300: 		  if ((unsigned char) translate[(unsigned char) *d++]
 4301: 		      != (unsigned char) *p++)
 4302:                     goto fail;
 4303: 		}
 4304: 	      while (--mcnt);
 4305: 	    }
 4306: 	  else
 4307: 	    {
 4308: 	      do
 4309: 		{
 4310: 		  PREFETCH ();
 4311: 		  if (*d++ != (char) *p++) goto fail;
 4312: 		}
 4313: 	      while (--mcnt);
 4314: 	    }
 4315: 	  SET_REGS_MATCHED ();
 4316:           break;
 4317: 
 4318: 
 4319:         /* Match any character except possibly a newline or a null.  */
 4320: 	case anychar:
 4321:           DEBUG_PRINT1 ("EXECUTING anychar.\n");
 4322: 
 4323:           PREFETCH ();
 4324: 
 4325:           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
 4326:               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
 4327: 	    goto fail;
 4328: 
 4329:           SET_REGS_MATCHED ();
 4330:           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
 4331:           d++;
 4332: 	  break;
 4333: 
 4334: 
 4335: 	case charset:
 4336: 	case charset_not:
 4337: 	  {
 4338: 	    register unsigned char c;
 4339: 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
 4340: 
 4341:             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
 4342: 
 4343: 	    PREFETCH ();
 4344: 	    c = TRANSLATE (*d); /* The character to match.  */
 4345: 
 4346:             /* Cast to `unsigned' instead of `unsigned char' in case the
 4347:                bit list is a full 32 bytes long.  */
 4348: 	    if (c < (unsigned) (*p * BYTEWIDTH)
 4349: 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
 4350: 	      not = !not;
 4351: 
 4352: 	    p += 1 + *p;
 4353: 
 4354: 	    if (!not) goto fail;
 4355: 
 4356: 	    SET_REGS_MATCHED ();
 4357:             d++;
 4358: 	    break;
 4359: 	  }
 4360: 
 4361: 
 4362:         /* The beginning of a group is represented by start_memory.
 4363:            The arguments are the register number in the next byte, and the
 4364:            number of groups inner to this one in the next.  The text
 4365:            matched within the group is recorded (in the internal
 4366:            registers data structure) under the register number.  */
 4367:         case start_memory:
 4368: 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
 4369: 
 4370:           /* Find out if this group can match the empty string.  */
 4371: 	  p1 = p;		/* To send to group_match_null_string_p.  */
 4372: 
 4373:           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
 4374:             REG_MATCH_NULL_STRING_P (reg_info[*p])
 4375:               = group_match_null_string_p (&p1, pend, reg_info);
 4376: 
 4377:           /* Save the position in the string where we were the last time
 4378:              we were at this open-group operator in case the group is
 4379:              operated upon by a repetition operator, e.g., with `(a*)*b'
 4380:              against `ab'; then we want to ignore where we are now in
 4381:              the string in case this attempt to match fails.  */
 4382:           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
 4383:                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
 4384:                              : regstart[*p];
 4385: 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
 4386: 			 POINTER_TO_OFFSET (old_regstart[*p]));
 4387: 
 4388:           regstart[*p] = d;
 4389: 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
 4390: 
 4391:           IS_ACTIVE (reg_info[*p]) = 1;
 4392:           MATCHED_SOMETHING (reg_info[*p]) = 0;
 4393: 
 4394: 	  /* Clear this whenever we change the register activity status.  */
 4395: 	  set_regs_matched_done = 0;
 4396: 
 4397:           /* This is the new highest active register.  */
 4398:           highest_active_reg = *p;
 4399: 
 4400:           /* If nothing was active before, this is the new lowest active
 4401:              register.  */
 4402:           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
 4403:             lowest_active_reg = *p;
 4404: 
 4405:           /* Move past the register number and inner group count.  */
 4406:           p += 2;
 4407: 	  just_past_start_mem = p;
 4408: 
 4409:           break;
 4410: 
 4411: 
 4412:         /* The stop_memory opcode represents the end of a group.  Its
 4413:            arguments are the same as start_memory's: the register
 4414:            number, and the number of inner groups.  */
 4415: 	case stop_memory:
 4416: 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
 4417: 
 4418:           /* We need to save the string position the last time we were at
 4419:              this close-group operator in case the group is operated
 4420:              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
 4421:              against `aba'; then we want to ignore where we are now in
 4422:              the string in case this attempt to match fails.  */
 4423:           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
 4424:                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
 4425: 			   : regend[*p];
 4426: 	  DEBUG_PRINT2 ("      old_regend: %d\n",
 4427: 			 POINTER_TO_OFFSET (old_regend[*p]));
 4428: 
 4429:           regend[*p] = d;
 4430: 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
 4431: 
 4432:           /* This register isn't active anymore.  */
 4433:           IS_ACTIVE (reg_info[*p]) = 0;
 4434: 
 4435: 	  /* Clear this whenever we change the register activity status.  */
 4436: 	  set_regs_matched_done = 0;
 4437: 
 4438:           /* If this was the only register active, nothing is active
 4439:              anymore.  */
 4440:           if (lowest_active_reg == highest_active_reg)
 4441:             {
 4442:               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
 4443:               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
 4444:             }
 4445:           else
 4446:             { /* We must scan for the new highest active register, since
 4447:                  it isn't necessarily one less than now: consider
 4448:                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
 4449:                  new highest active register is 1.  */
 4450:               unsigned char r = *p - 1;
 4451:               while (r > 0 && !IS_ACTIVE (reg_info[r]))
 4452:                 r--;
 4453: 
 4454:               /* If we end up at register zero, that means that we saved
 4455:                  the registers as the result of an `on_failure_jump', not
 4456:                  a `start_memory', and we jumped to past the innermost
 4457:                  `stop_memory'.  For example, in ((.)*) we save
 4458:                  registers 1 and 2 as a result of the *, but when we pop
 4459:                  back to the second ), we are at the stop_memory 1.
 4460:                  Thus, nothing is active.  */
 4461: 	      if (r == 0)
 4462:                 {
 4463:                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
 4464:                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
 4465:                 }
 4466:               else
 4467:                 highest_active_reg = r;
 4468:             }
 4469: 
 4470:           /* If just failed to match something this time around with a
 4471:              group that's operated on by a repetition operator, try to
 4472:              force exit from the ``loop'', and restore the register
 4473:              information for this group that we had before trying this
 4474:              last match.  */
 4475:           if ((!MATCHED_SOMETHING (reg_info[*p])
 4476:                || just_past_start_mem == p - 1)
 4477: 	      && (p + 2) < pend)
 4478:             {
 4479:               boolean is_a_jump_n = false;
 4480: 
 4481:               p1 = p + 2;
 4482:               mcnt = 0;
 4483:               switch ((re_opcode_t) *p1++)
 4484:                 {
 4485:                   case jump_n:
 4486: 		    is_a_jump_n = true;
 4487:                   case pop_failure_jump:
 4488: 		  case maybe_pop_jump:
 4489: 		  case jump:
 4490: 		  case dummy_failure_jump:
 4491:                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
 4492: 		    if (is_a_jump_n)
 4493: 		      p1 += 2;
 4494:                     break;
 4495: 
 4496:                   default:
 4497:                     /* do nothing */ ;
 4498:                 }
 4499: 	      p1 += mcnt;
 4500: 
 4501:               /* If the next operation is a jump backwards in the pattern
 4502: 	         to an on_failure_jump right before the start_memory
 4503:                  corresponding to this stop_memory, exit from the loop
 4504:                  by forcing a failure after pushing on the stack the
 4505:                  on_failure_jump's jump in the pattern, and d.  */
 4506:               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
 4507:                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
 4508: 		{
 4509:                   /* If this group ever matched anything, then restore
 4510:                      what its registers were before trying this last
 4511:                      failed match, e.g., with `(a*)*b' against `ab' for
 4512:                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
 4513:                      against `aba' for regend[3].
 4514: 
 4515:                      Also restore the registers for inner groups for,
 4516:                      e.g., `((a*)(b*))*' against `aba' (register 3 would
 4517:                      otherwise get trashed).  */
 4518: 
 4519:                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
 4520: 		    {
 4521: 		      unsigned r;
 4522: 
 4523:                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
 4524: 
 4525: 		      /* Restore this and inner groups' (if any) registers.  */
 4526:                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
 4527: 			   r++)
 4528:                         {
 4529:                           regstart[r] = old_regstart[r];
 4530: 
 4531:                           /* xx why this test?  */
 4532:                           if (old_regend[r] >= regstart[r])
 4533:                             regend[r] = old_regend[r];
 4534:                         }
 4535:                     }
 4536: 		  p1++;
 4537:                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
 4538:                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
 4539: 
 4540:                   goto fail;
 4541:                 }
 4542:             }
 4543: 
 4544:           /* Move past the register number and the inner group count.  */
 4545:           p += 2;
 4546:           break;
 4547: 
 4548: 
 4549: 	/* \<digit> has been turned into a `duplicate' command which is
 4550:            followed by the numeric value of <digit> as the register number.  */
 4551:         case duplicate:
 4552: 	  {
 4553: 	    register const char *d2, *dend2;
 4554: 	    int regno = *p++;   /* Get which register to match against.  */
 4555: 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
 4556: 
 4557: 	    /* Can't back reference a group which we've never matched.  */
 4558:             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
 4559:               goto fail;
 4560: 
 4561:             /* Where in input to try to start matching.  */
 4562:             d2 = regstart[regno];
 4563: 
 4564:             /* Where to stop matching; if both the place to start and
 4565:                the place to stop matching are in the same string, then
 4566:                set to the place to stop, otherwise, for now have to use
 4567:                the end of the first string.  */
 4568: 
 4569:             dend2 = ((FIRST_STRING_P (regstart[regno])
 4570: 		      == FIRST_STRING_P (regend[regno]))
 4571: 		     ? regend[regno] : end_match_1);
 4572: 	    for (;;)
 4573: 	      {
 4574: 		/* If necessary, advance to next segment in register
 4575:                    contents.  */
 4576: 		while (d2 == dend2)
 4577: 		  {
 4578: 		    if (dend2 == end_match_2) break;
 4579: 		    if (dend2 == regend[regno]) break;
 4580: 
 4581:                     /* End of string1 => advance to string2. */
 4582:                     d2 = string2;
 4583:                     dend2 = regend[regno];
 4584: 		  }
 4585: 		/* At end of register contents => success */
 4586: 		if (d2 == dend2) break;
 4587: 
 4588: 		/* If necessary, advance to next segment in data.  */
 4589: 		PREFETCH ();
 4590: 
 4591: 		/* How many characters left in this segment to match.  */
 4592: 		mcnt = dend - d;
 4593: 
 4594: 		/* Want how many consecutive characters we can match in
 4595:                    one shot, so, if necessary, adjust the count.  */
 4596:                 if (mcnt > dend2 - d2)
 4597: 		  mcnt = dend2 - d2;
 4598: 
 4599: 		/* Compare that many; failure if mismatch, else move
 4600:                    past them.  */
 4601: 		if (translate
 4602:                     ? bcmp_translate (d, d2, mcnt, translate)
 4603:                     : memcmp (d, d2, mcnt))
 4604: 		  goto fail;
 4605: 		d += mcnt, d2 += mcnt;
 4606: 
 4607: 		/* Do this because we've match some characters.  */
 4608: 		SET_REGS_MATCHED ();
 4609: 	      }
 4610: 	  }
 4611: 	  break;
 4612: 
 4613: 
 4614:         /* begline matches the empty string at the beginning of the string
 4615:            (unless `not_bol' is set in `bufp'), and, if
 4616:            `newline_anchor' is set, after newlines.  */
 4617: 	case begline:
 4618:           DEBUG_PRINT1 ("EXECUTING begline.\n");
 4619: 
 4620:           if (AT_STRINGS_BEG (d))
 4621:             {
 4622:               if (!bufp->not_bol) break;
 4623:             }
 4624:           else if (d[-1] == '\n' && bufp->newline_anchor)
 4625:             {
 4626:               break;
 4627:             }
 4628:           /* In all other cases, we fail.  */
 4629:           goto fail;
 4630: 
 4631: 
 4632:         /* endline is the dual of begline.  */
 4633: 	case endline:
 4634:           DEBUG_PRINT1 ("EXECUTING endline.\n");
 4635: 
 4636:           if (AT_STRINGS_END (d))
 4637:             {
 4638:               if (!bufp->not_eol) break;
 4639:             }
 4640: 
 4641:           /* We have to ``prefetch'' the next character.  */
 4642:           else if ((d == end1 ? *string2 : *d) == '\n'
 4643:                    && bufp->newline_anchor)
 4644:             {
 4645:               break;
 4646:             }
 4647:           goto fail;
 4648: 
 4649: 
 4650: 	/* Match at the very beginning of the data.  */
 4651:         case begbuf:
 4652:           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
 4653:           if (AT_STRINGS_BEG (d))
 4654:             break;
 4655:           goto fail;
 4656: 
 4657: 
 4658: 	/* Match at the very end of the data.  */
 4659:         case endbuf:
 4660:           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
 4661: 	  if (AT_STRINGS_END (d))
 4662: 	    break;
 4663:           goto fail;
 4664: 
 4665: 
 4666:         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
 4667:            pushes NULL as the value for the string on the stack.  Then
 4668:            `pop_failure_point' will keep the current value for the
 4669:            string, instead of restoring it.  To see why, consider
 4670:            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
 4671:            then the . fails against the \n.  But the next thing we want
 4672:            to do is match the \n against the \n; if we restored the
 4673:            string value, we would be back at the foo.
 4674: 
 4675:            Because this is used only in specific cases, we don't need to
 4676:            check all the things that `on_failure_jump' does, to make
 4677:            sure the right things get saved on the stack.  Hence we don't
 4678:            share its code.  The only reason to push anything on the
 4679:            stack at all is that otherwise we would have to change
 4680:            `anychar's code to do something besides goto fail in this
 4681:            case; that seems worse than this.  */
 4682:         case on_failure_keep_string_jump:
 4683:           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
 4684: 
 4685:           EXTRACT_NUMBER_AND_INCR (mcnt, p);
 4686: #ifdef _LIBC
 4687:           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
 4688: #else
 4689:           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
 4690: #endif
 4691: 
 4692:           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
 4693:           break;
 4694: 
 4695: 
 4696: 	/* Uses of on_failure_jump:
 4697: 
 4698:            Each alternative starts with an on_failure_jump that points
 4699:            to the beginning of the next alternative.  Each alternative
 4700:            except the last ends with a jump that in effect jumps past
 4701:            the rest of the alternatives.  (They really jump to the
 4702:            ending jump of the following alternative, because tensioning
 4703:            these jumps is a hassle.)
 4704: 
 4705:            Repeats start with an on_failure_jump that points past both
 4706:            the repetition text and either the following jump or
 4707:            pop_failure_jump back to this on_failure_jump.  */
 4708: 	case on_failure_jump:
 4709:         on_failure:
 4710:           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
 4711: 
 4712:           EXTRACT_NUMBER_AND_INCR (mcnt, p);
 4713: #ifdef _LIBC
 4714:           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
 4715: #else
 4716:           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
 4717: #endif
 4718: 
 4719:           /* If this on_failure_jump comes right before a group (i.e.,
 4720:              the original * applied to a group), save the information
 4721:              for that group and all inner ones, so that if we fail back
 4722:              to this point, the group's information will be correct.
 4723:              For example, in \(a*\)*\1, we need the preceding group,
 4724:              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
 4725: 
 4726:           /* We can't use `p' to check ahead because we push
 4727:              a failure point to `p + mcnt' after we do this.  */
 4728:           p1 = p;
 4729: 
 4730:           /* We need to skip no_op's before we look for the
 4731:              start_memory in case this on_failure_jump is happening as
 4732:              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
 4733:              against aba.  */
 4734:           while (p1 < pend && (re_opcode_t) *p1 == no_op)
 4735:             p1++;
 4736: 
 4737:           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
 4738:             {
 4739:               /* We have a new highest active register now.  This will
 4740:                  get reset at the start_memory we are about to get to,
 4741:                  but we will have saved all the registers relevant to
 4742:                  this repetition op, as described above.  */
 4743:               highest_active_reg = *(p1 + 1) + *(p1 + 2);
 4744:               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
 4745:                 lowest_active_reg = *(p1 + 1);
 4746:             }
 4747: 
 4748:           DEBUG_PRINT1 (":\n");
 4749:           PUSH_FAILURE_POINT (p + mcnt, d, -2);
 4750:           break;
 4751: 
 4752: 
 4753:         /* A smart repeat ends with `maybe_pop_jump'.
 4754: 	   We change it to either `pop_failure_jump' or `jump'.  */
 4755:         case maybe_pop_jump:
 4756:           EXTRACT_NUMBER_AND_INCR (mcnt, p);
 4757:           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
 4758:           {
 4759: 	    register unsigned char *p2 = p;
 4760: 
 4761:             /* Compare the beginning of the repeat with what in the
 4762:                pattern follows its end. If we can establish that there
 4763:                is nothing that they would both match, i.e., that we
 4764:                would have to backtrack because of (as in, e.g., `a*a')
 4765:                then we can change to pop_failure_jump, because we'll
 4766:                never have to backtrack.
 4767: 
 4768:                This is not true in the case of alternatives: in
 4769:                `(a|ab)*' we do need to backtrack to the `ab' alternative
 4770:                (e.g., if the string was `ab').  But instead of trying to
 4771:                detect that here, the alternative has put on a dummy
 4772:                failure point which is what we will end up popping.  */
 4773: 
 4774: 	    /* Skip over open/close-group commands.
 4775: 	       If what follows this loop is a ...+ construct,
 4776: 	       look at what begins its body, since we will have to
 4777: 	       match at least one of that.  */
 4778: 	    while (1)
 4779: 	      {
 4780: 		if (p2 + 2 < pend
 4781: 		    && ((re_opcode_t) *p2 == stop_memory
 4782: 			|| (re_opcode_t) *p2 == start_memory))
 4783: 		  p2 += 3;
 4784: 		else if (p2 + 6 < pend
 4785: 			 && (re_opcode_t) *p2 == dummy_failure_jump)
 4786: 		  p2 += 6;
 4787: 		else
 4788: 		  break;
 4789: 	      }
 4790: 
 4791: 	    p1 = p + mcnt;
 4792: 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
 4793: 	       to the `maybe_finalize_jump' of this case.  Examine what
 4794: 	       follows.  */
 4795: 
 4796:             /* If we're at the end of the pattern, we can change.  */
 4797:             if (p2 == pend)
 4798: 	      {
 4799: 		/* Consider what happens when matching ":\(.*\)"
 4800: 		   against ":/".  I don't really understand this code
 4801: 		   yet.  */
 4802:   	        p[-3] = (unsigned char) pop_failure_jump;
 4803:                 DEBUG_PRINT1
 4804:                   ("  End of pattern: change to `pop_failure_jump'.\n");
 4805:               }
 4806: 
 4807:             else if ((re_opcode_t) *p2 == exactn
 4808: 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
 4809: 	      {
 4810: 		register unsigned char c
 4811:                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
 4812: 
 4813:                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
 4814:                   {
 4815:   		    p[-3] = (unsigned char) pop_failure_jump;
 4816:                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
 4817:                                   c, p1[5]);
 4818:                   }
 4819: 
 4820: 		else if ((re_opcode_t) p1[3] == charset
 4821: 			 || (re_opcode_t) p1[3] == charset_not)
 4822: 		  {
 4823: 		    int not = (re_opcode_t) p1[3] == charset_not;
 4824: 
 4825: 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
 4826: 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
 4827: 		      not = !not;
 4828: 
 4829:                     /* `not' is equal to 1 if c would match, which means
 4830:                         that we can't change to pop_failure_jump.  */
 4831: 		    if (!not)
 4832:                       {
 4833:   		        p[-3] = (unsigned char) pop_failure_jump;
 4834:                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
 4835:                       }
 4836: 		  }
 4837: 	      }
 4838:             else if ((re_opcode_t) *p2 == charset)
 4839: 	      {
 4840: #ifdef DEBUG
 4841: 		register unsigned char c
 4842:                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
 4843: #endif
 4844: 
 4845: #if 0
 4846:                 if ((re_opcode_t) p1[3] == exactn
 4847: 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
 4848: 			  && (p2[2 + p1[5] / BYTEWIDTH]
 4849: 			      & (1 << (p1[5] % BYTEWIDTH)))))
 4850: #else
 4851:                 if ((re_opcode_t) p1[3] == exactn
 4852: 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
 4853: 			  && (p2[2 + p1[4] / BYTEWIDTH]
 4854: 			      & (1 << (p1[4] % BYTEWIDTH)))))
 4855: #endif
 4856:                   {
 4857:   		    p[-3] = (unsigned char) pop_failure_jump;
 4858:                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
 4859:                                   c, p1[5]);
 4860:                   }
 4861: 
 4862: 		else if ((re_opcode_t) p1[3] == charset_not)
 4863: 		  {
 4864: 		    int idx;
 4865: 		    /* We win if the charset_not inside the loop
 4866: 		       lists every character listed in the charset after.  */
 4867: 		    for (idx = 0; idx < (int) p2[1]; idx++)
 4868: 		      if (! (p2[2 + idx] == 0
 4869: 			     || (idx < (int) p1[4]
 4870: 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
 4871: 			break;
 4872: 
 4873: 		    if (idx == p2[1])
 4874:                       {
 4875:   		        p[-3] = (unsigned char) pop_failure_jump;
 4876:                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
 4877:                       }
 4878: 		  }
 4879: 		else if ((re_opcode_t) p1[3] == charset)
 4880: 		  {
 4881: 		    int idx;
 4882: 		    /* We win if the charset inside the loop
 4883: 		       has no overlap with the one after the loop.  */
 4884: 		    for (idx = 0;
 4885: 			 idx < (int) p2[1] && idx < (int) p1[4];
 4886: 			 idx++)
 4887: 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
 4888: 			break;
 4889: 
 4890: 		    if (idx == p2[1] || idx == p1[4])
 4891:                       {
 4892:   		        p[-3] = (unsigned char) pop_failure_jump;
 4893:                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
 4894:                       }
 4895: 		  }
 4896: 	      }
 4897: 	  }
 4898: 	  p -= 2;		/* Point at relative address again.  */
 4899: 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
 4900: 	    {
 4901: 	      p[-1] = (unsigned char) jump;
 4902:               DEBUG_PRINT1 ("  Match => jump.\n");
 4903: 	      goto unconditional_jump;
 4904: 	    }
 4905:         /* Note fall through.  */
 4906: 
 4907: 
 4908: 	/* The end of a simple repeat has a pop_failure_jump back to
 4909:            its matching on_failure_jump, where the latter will push a
 4910:            failure point.  The pop_failure_jump takes off failure
 4911:            points put on by this pop_failure_jump's matching
 4912:            on_failure_jump; we got through the pattern to here from the
 4913:            matching on_failure_jump, so didn't fail.  */
 4914:         case pop_failure_jump:
 4915:           {
 4916:             /* We need to pass separate storage for the lowest and
 4917:                highest registers, even though we don't care about the
 4918:                actual values.  Otherwise, we will restore only one
 4919:                register from the stack, since lowest will == highest in
 4920:                `pop_failure_point'.  */
 4921:             active_reg_t dummy_low_reg, dummy_high_reg;
 4922:             unsigned char *pdummy;
 4923:             const char *sdummy;
 4924: 
 4925:             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
 4926:             POP_FAILURE_POINT (sdummy, pdummy,
 4927:                                dummy_low_reg, dummy_high_reg,
 4928:                                reg_dummy, reg_dummy, reg_info_dummy);
 4929:           }
 4930: 	  /* Note fall through.  */
 4931: 
 4932: 	unconditional_jump:
 4933: #ifdef _LIBC
 4934: 	  DEBUG_PRINT2 ("\n%p: ", p);
 4935: #else
 4936: 	  DEBUG_PRINT2 ("\n0x%x: ", p);
 4937: #endif
 4938:           /* Note fall through.  */
 4939: 
 4940:         /* Unconditionally jump (without popping any failure points).  */
 4941:         case jump:
 4942: 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
 4943:           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
 4944: 	  p += mcnt;				/* Do the jump.  */
 4945: #ifdef _LIBC
 4946:           DEBUG_PRINT2 ("(to %p).\n", p);
 4947: #else
 4948:           DEBUG_PRINT2 ("(to 0x%x).\n", p);
 4949: #endif
 4950: 	  break;
 4951: 
 4952: 
 4953:         /* We need this opcode so we can detect where alternatives end
 4954:            in `group_match_null_string_p' et al.  */
 4955:         case jump_past_alt:
 4956:           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
 4957:           goto unconditional_jump;
 4958: 
 4959: 
 4960:         /* Normally, the on_failure_jump pushes a failure point, which
 4961:            then gets popped at pop_failure_jump.  We will end up at
 4962:            pop_failure_jump, also, and with a pattern of, say, `a+', we
 4963:            are skipping over the on_failure_jump, so we have to push
 4964:            something meaningless for pop_failure_jump to pop.  */
 4965:         case dummy_failure_jump:
 4966:           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
 4967:           /* It doesn't matter what we push for the string here.  What
 4968:              the code at `fail' tests is the value for the pattern.  */
 4969:           PUSH_FAILURE_POINT (NULL, NULL, -2);
 4970:           goto unconditional_jump;
 4971: 
 4972: 
 4973:         /* At the end of an alternative, we need to push a dummy failure
 4974:            point in case we are followed by a `pop_failure_jump', because
 4975:            we don't want the failure point for the alternative to be
 4976:            popped.  For example, matching `(a|ab)*' against `aab'
 4977:            requires that we match the `ab' alternative.  */
 4978:         case push_dummy_failure:
 4979:           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
 4980:           /* See comments just above at `dummy_failure_jump' about the
 4981:              two zeroes.  */
 4982:           PUSH_FAILURE_POINT (NULL, NULL, -2);
 4983:           break;
 4984: 
 4985:         /* Have to succeed matching what follows at least n times.
 4986:            After that, handle like `on_failure_jump'.  */
 4987:         case succeed_n:
 4988:           EXTRACT_NUMBER (mcnt, p + 2);
 4989:           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
 4990: 
 4991:           assert (mcnt >= 0);
 4992:           /* Originally, this is how many times we HAVE to succeed.  */
 4993:           if (mcnt > 0)
 4994:             {
 4995:                mcnt--;
 4996: 	       p += 2;
 4997:                STORE_NUMBER_AND_INCR (p, mcnt);
 4998: #ifdef _LIBC
 4999:                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
 5000: #else
 5001:                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
 5002: #endif
 5003:             }
 5004: 	  else if (mcnt == 0)
 5005:             {
 5006: #ifdef _LIBC
 5007:               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
 5008: #else
 5009:               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
 5010: #endif
 5011: 	      p[2] = (unsigned char) no_op;
 5012:               p[3] = (unsigned char) no_op;
 5013:               goto on_failure;
 5014:             }
 5015:           break;
 5016: 
 5017:         case jump_n:
 5018:           EXTRACT_NUMBER (mcnt, p + 2);
 5019:           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
 5020: 
 5021:           /* Originally, this is how many times we CAN jump.  */
 5022:           if (mcnt)
 5023:             {
 5024:                mcnt--;
 5025:                STORE_NUMBER (p + 2, mcnt);
 5026: #ifdef _LIBC
 5027:                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
 5028: #else
 5029:                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
 5030: #endif
 5031: 	       goto unconditional_jump;
 5032:             }
 5033:           /* If don't have to jump any more, skip over the rest of command.  */
 5034: 	  else
 5035: 	    p += 4;
 5036:           break;
 5037: 
 5038: 	case set_number_at:
 5039: 	  {
 5040:             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
 5041: 
 5042:             EXTRACT_NUMBER_AND_INCR (mcnt, p);
 5043:             p1 = p + mcnt;
 5044:             EXTRACT_NUMBER_AND_INCR (mcnt, p);
 5045: #ifdef _LIBC
 5046:             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
 5047: #else
 5048:             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
 5049: #endif
 5050: 	    STORE_NUMBER (p1, mcnt);
 5051:             break;
 5052:           }
 5053: 
 5054: #if 0
 5055: 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
 5056: 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
 5057: 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
 5058: 	   macro and introducing temporary variables works around the bug.  */
 5059: 
 5060: 	case wordbound:
 5061: 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
 5062: 	  if (AT_WORD_BOUNDARY (d))
 5063: 	    break;
 5064: 	  goto fail;
 5065: 
 5066: 	case notwordbound:
 5067: 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
 5068: 	  if (AT_WORD_BOUNDARY (d))
 5069: 	    goto fail;
 5070: 	  break;
 5071: #else
 5072: 	case wordbound:
 5073: 	{
 5074: 	  boolean prevchar, thischar;
 5075: 
 5076: 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
 5077: 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
 5078: 	    break;
 5079: 
 5080: 	  prevchar = WORDCHAR_P (d - 1);
 5081: 	  thischar = WORDCHAR_P (d);
 5082: 	  if (prevchar != thischar)
 5083: 	    break;
 5084: 	  goto fail;
 5085: 	}
 5086: 
 5087:       case notwordbound:
 5088: 	{
 5089: 	  boolean prevchar, thischar;
 5090: 
 5091: 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
 5092: 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
 5093: 	    goto fail;
 5094: 
 5095: 	  prevchar = WORDCHAR_P (d - 1);
 5096: 	  thischar = WORDCHAR_P (d);
 5097: 	  if (prevchar != thischar)
 5098: 	    goto fail;
 5099: 	  break;
 5100: 	}
 5101: #endif
 5102: 
 5103: 	case wordbeg:
 5104:           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
 5105: 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
 5106: 	    break;
 5107:           goto fail;
 5108: 
 5109: 	case wordend:
 5110:           DEBUG_PRINT1 ("EXECUTING wordend.\n");
 5111: 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
 5112:               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
 5113: 	    break;
 5114:           goto fail;
 5115: 
 5116: #ifdef emacs
 5117:   	case before_dot:
 5118:           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
 5119:  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
 5120:   	    goto fail;
 5121:   	  break;
 5122: 
 5123:   	case at_dot:
 5124:           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
 5125:  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
 5126:   	    goto fail;
 5127:   	  break;
 5128: 
 5129:   	case after_dot:
 5130:           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
 5131:           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
 5132:   	    goto fail;
 5133:   	  break;
 5134: 
 5135: 	case syntaxspec:
 5136:           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
 5137: 	  mcnt = *p++;
 5138: 	  goto matchsyntax;
 5139: 
 5140:         case wordchar:
 5141:           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
 5142: 	  mcnt = (int) Sword;
 5143:         matchsyntax:
 5144: 	  PREFETCH ();
 5145: 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
 5146: 	  d++;
 5147: 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
 5148: 	    goto fail;
 5149:           SET_REGS_MATCHED ();
 5150: 	  break;
 5151: 
 5152: 	case notsyntaxspec:
 5153:           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
 5154: 	  mcnt = *p++;
 5155: 	  goto matchnotsyntax;
 5156: 
 5157:         case notwordchar:
 5158:           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
 5159: 	  mcnt = (int) Sword;
 5160:         matchnotsyntax:
 5161: 	  PREFETCH ();
 5162: 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
 5163: 	  d++;
 5164: 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
 5165: 	    goto fail;
 5166: 	  SET_REGS_MATCHED ();
 5167:           break;
 5168: 
 5169: #else /* not emacs */
 5170: 	case wordchar:
 5171:           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
 5172: 	  PREFETCH ();
 5173:           if (!WORDCHAR_P (d))
 5174:             goto fail;
 5175: 	  SET_REGS_MATCHED ();
 5176:           d++;
 5177: 	  break;
 5178: 
 5179: 	case notwordchar:
 5180:           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
 5181: 	  PREFETCH ();
 5182: 	  if (WORDCHAR_P (d))
 5183:             goto fail;
 5184:           SET_REGS_MATCHED ();
 5185:           d++;
 5186: 	  break;
 5187: #endif /* not emacs */
 5188: 
 5189:         default:
 5190:           abort ();
 5191: 	}
 5192:       continue;  /* Successfully executed one pattern command; keep going.  */
 5193: 
 5194: 
 5195:     /* We goto here if a matching operation fails. */
 5196:     fail:
 5197:       if (!FAIL_STACK_EMPTY ())
 5198: 	{ /* A restart point is known.  Restore to that state.  */
 5199:           DEBUG_PRINT1 ("\nFAIL:\n");
 5200:           POP_FAILURE_POINT (d, p,
 5201:                              lowest_active_reg, highest_active_reg,
 5202:                              regstart, regend, reg_info);
 5203: 
 5204:           /* If this failure point is a dummy, try the next one.  */
 5205:           if (!p)
 5206: 	    goto fail;
 5207: 
 5208:           /* If we failed to the end of the pattern, don't examine *p.  */
 5209: 	  assert (p <= pend);
 5210:           if (p < pend)
 5211:             {
 5212:               boolean is_a_jump_n = false;
 5213: 
 5214:               /* If failed to a backwards jump that's part of a repetition
 5215:                  loop, need to pop this failure point and use the next one.  */
 5216:               switch ((re_opcode_t) *p)
 5217:                 {
 5218:                 case jump_n:
 5219:                   is_a_jump_n = true;
 5220:                 case maybe_pop_jump:
 5221:                 case pop_failure_jump:
 5222:                 case jump:
 5223:                   p1 = p + 1;
 5224:                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
 5225:                   p1 += mcnt;
 5226: 
 5227:                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
 5228:                       || (!is_a_jump_n
 5229:                           && (re_opcode_t) *p1 == on_failure_jump))
 5230:                     goto fail;
 5231:                   break;
 5232:                 default:
 5233:                   /* do nothing */ ;
 5234:                 }
 5235:             }
 5236: 
 5237:           if (d >= string1 && d <= end1)
 5238: 	    dend = end_match_1;
 5239:         }
 5240:       else
 5241:         break;   /* Matching at this starting point really fails.  */
 5242:     } /* for (;;) */
 5243: 
 5244:   if (best_regs_set)
 5245:     goto restore_best_regs;
 5246: 
 5247:   FREE_VARIABLES ();
 5248: 
 5249:   return -1;         			/* Failure to match.  */
 5250: } /* re_match_2 */
 5251: 
 5252: /* Subroutine definitions for re_match_2.  */
 5253: 
 5254: 
 5255: /* We are passed P pointing to a register number after a start_memory.
 5256: 
 5257:    Return true if the pattern up to the corresponding stop_memory can
 5258:    match the empty string, and false otherwise.
 5259: 
 5260:    If we find the matching stop_memory, sets P to point to one past its number.
 5261:    Otherwise, sets P to an undefined byte less than or equal to END.
 5262: 
 5263:    We don't handle duplicates properly (yet).  */
 5264: 
 5265: static boolean
 5266: group_match_null_string_p (p, end, reg_info)
 5267:     unsigned char **p, *end;
 5268:     register_info_type *reg_info;
 5269: {
 5270:   int mcnt;
 5271:   /* Point to after the args to the start_memory.  */
 5272:   unsigned char *p1 = *p + 2;
 5273: 
 5274:   while (p1 < end)
 5275:     {
 5276:       /* Skip over opcodes that can match nothing, and return true or
 5277: 	 false, as appropriate, when we get to one that can't, or to the
 5278:          matching stop_memory.  */
 5279: 
 5280:       switch ((re_opcode_t) *p1)
 5281:         {
 5282:         /* Could be either a loop or a series of alternatives.  */
 5283:         case on_failure_jump:
 5284:           p1++;
 5285:           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
 5286: 
 5287:           /* If the next operation is not a jump backwards in the
 5288: 	     pattern.  */
 5289: 
 5290: 	  if (mcnt >= 0)
 5291: 	    {
 5292:               /* Go through the on_failure_jumps of the alternatives,
 5293:                  seeing if any of the alternatives cannot match nothing.
 5294:                  The last alternative starts with only a jump,
 5295:                  whereas the rest start with on_failure_jump and end
 5296:                  with a jump, e.g., here is the pattern for `a|b|c':
 5297: 
 5298:                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
 5299:                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
 5300:                  /exactn/1/c
 5301: 
 5302:                  So, we have to first go through the first (n-1)
 5303:                  alternatives and then deal with the last one separately.  */
 5304: 
 5305: 
 5306:               /* Deal with the first (n-1) alternatives, which start
 5307:                  with an on_failure_jump (see above) that jumps to right
 5308:                  past a jump_past_alt.  */
 5309: 
 5310:               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
 5311:                 {
 5312:                   /* `mcnt' holds how many bytes long the alternative
 5313:                      is, including the ending `jump_past_alt' and
 5314:                      its number.  */
 5315: 
 5316:                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
 5317: 				                      reg_info))
 5318:                     return false;
 5319: 
 5320:                   /* Move to right after this alternative, including the
 5321: 		     jump_past_alt.  */
 5322:                   p1 += mcnt;
 5323: 
 5324:                   /* Break if it's the beginning of an n-th alternative
 5325:                      that doesn't begin with an on_failure_jump.  */
 5326:                   if ((re_opcode_t) *p1 != on_failure_jump)
 5327:                     break;
 5328: 
 5329: 		  /* Still have to check that it's not an n-th
 5330: 		     alternative that starts with an on_failure_jump.  */
 5331: 		  p1++;
 5332:                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
 5333:                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
 5334:                     {
 5335: 		      /* Get to the beginning of the n-th alternative.  */
 5336:                       p1 -= 3;
 5337:                       break;
 5338:                     }
 5339:                 }
 5340: 
 5341:               /* Deal with the last alternative: go back and get number
 5342:                  of the `jump_past_alt' just before it.  `mcnt' contains
 5343:                  the length of the alternative.  */
 5344:               EXTRACT_NUMBER (mcnt, p1 - 2);
 5345: 
 5346:               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
 5347:                 return false;
 5348: 
 5349:               p1 += mcnt;	/* Get past the n-th alternative.  */
 5350:             } /* if mcnt > 0 */
 5351:           break;
 5352: 
 5353: 
 5354:         case stop_memory:
 5355: 	  assert (p1[1] == **p);
 5356:           *p = p1 + 2;
 5357:           return true;
 5358: 
 5359: 
 5360:         default:
 5361:           if (!common_op_match_null_string_p (&p1, end, reg_info))
 5362:             return false;
 5363:         }
 5364:     } /* while p1 < end */
 5365: 
 5366:   return false;
 5367: } /* group_match_null_string_p */
 5368: 
 5369: 
 5370: /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
 5371:    It expects P to be the first byte of a single alternative and END one
 5372:    byte past the last. The alternative can contain groups.  */
 5373: 
 5374: static boolean
 5375: alt_match_null_string_p (p, end, reg_info)
 5376:     unsigned char *p, *end;
 5377:     register_info_type *reg_info;
 5378: {
 5379:   int mcnt;
 5380:   unsigned char *p1 = p;
 5381: 
 5382:   while (p1 < end)
 5383:     {
 5384:       /* Skip over opcodes that can match nothing, and break when we get
 5385:          to one that can't.  */
 5386: 
 5387:       switch ((re_opcode_t) *p1)
 5388:         {
 5389: 	/* It's a loop.  */
 5390:         case on_failure_jump:
 5391:           p1++;
 5392:           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
 5393:           p1 += mcnt;
 5394:           break;
 5395: 
 5396: 	default:
 5397:           if (!common_op_match_null_string_p (&p1, end, reg_info))
 5398:             return false;
 5399:         }
 5400:     }  /* while p1 < end */
 5401: 
 5402:   return true;
 5403: } /* alt_match_null_string_p */
 5404: 
 5405: 
 5406: /* Deals with the ops common to group_match_null_string_p and
 5407:    alt_match_null_string_p.
 5408: 
 5409:    Sets P to one after the op and its arguments, if any.  */
 5410: 
 5411: static boolean
 5412: common_op_match_null_string_p (p, end, reg_info)
 5413:     unsigned char **p, *end;
 5414:     register_info_type *reg_info;
 5415: {
 5416:   int mcnt;
 5417:   boolean ret;
 5418:   int reg_no;
 5419:   unsigned char *p1 = *p;
 5420: 
 5421:   switch ((re_opcode_t) *p1++)
 5422:     {
 5423:     case no_op:
 5424:     case begline:
 5425:     case endline:
 5426:     case begbuf:
 5427:     case endbuf:
 5428:     case wordbeg:
 5429:     case wordend:
 5430:     case wordbound:
 5431:     case notwordbound:
 5432: #ifdef emacs
 5433:     case before_dot:
 5434:     case at_dot:
 5435:     case after_dot:
 5436: #endif
 5437:       break;
 5438: 
 5439:     case start_memory:
 5440:       reg_no = *p1;
 5441:       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
 5442:       ret = group_match_null_string_p (&p1, end, reg_info);
 5443: 
 5444:       /* Have to set this here in case we're checking a group which
 5445:          contains a group and a back reference to it.  */
 5446: 
 5447:       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
 5448:         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
 5449: 
 5450:       if (!ret)
 5451:         return false;
 5452:       break;
 5453: 
 5454:     /* If this is an optimized succeed_n for zero times, make the jump.  */
 5455:     case jump:
 5456:       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
 5457:       if (mcnt >= 0)
 5458:         p1 += mcnt;
 5459:       else
 5460:         return false;
 5461:       break;
 5462: 
 5463:     case succeed_n:
 5464:       /* Get to the number of times to succeed.  */
 5465:       p1 += 2;
 5466:       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
 5467: 
 5468:       if (mcnt == 0)
 5469:         {
 5470:           p1 -= 4;
 5471:           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
 5472:           p1 += mcnt;
 5473:         }
 5474:       else
 5475:         return false;
 5476:       break;
 5477: 
 5478:     case duplicate:
 5479:       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
 5480:         return false;
 5481:       break;
 5482: 
 5483:     case set_number_at:
 5484:       p1 += 4;
 5485: 
 5486:     default:
 5487:       /* All other opcodes mean we cannot match the empty string.  */
 5488:       return false;
 5489:   }
 5490: 
 5491:   *p = p1;
 5492:   return true;
 5493: } /* common_op_match_null_string_p */
 5494: 
 5495: 
 5496: /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
 5497:    bytes; nonzero otherwise.  */
 5498: 
 5499: static int
 5500: bcmp_translate (s1, s2, len, translate)
 5501:      const char *s1, *s2;
 5502:      register int len;
 5503:      RE_TRANSLATE_TYPE translate;
 5504: {
 5505:   register const unsigned char *p1 = (const unsigned char *) s1;
 5506:   register const unsigned char *p2 = (const unsigned char *) s2;
 5507:   while (len)
 5508:     {
 5509:       if (translate[*p1++] != translate[*p2++]) return 1;
 5510:       len--;
 5511:     }
 5512:   return 0;
 5513: }
 5514: 
 5515: /* Entry points for GNU code.  */
 5516: 
 5517: /* re_compile_pattern is the GNU regular expression compiler: it
 5518:    compiles PATTERN (of length SIZE) and puts the result in BUFP.
 5519:    Returns 0 if the pattern was valid, otherwise an error string.
 5520: 
 5521:    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
 5522:    are set in BUFP on entry.
 5523: 
 5524:    We call regex_compile to do the actual compilation.  */
 5525: 
 5526: const char *
 5527: re_compile_pattern (pattern, length, bufp)
 5528:      const char *pattern;
 5529:      size_t length;
 5530:      struct re_pattern_buffer *bufp;
 5531: {
 5532:   reg_errcode_t ret;
 5533: 
 5534:   /* GNU code is written to assume at least RE_NREGS registers will be set
 5535:      (and at least one extra will be -1).  */
 5536:   bufp->regs_allocated = REGS_UNALLOCATED;
 5537: 
 5538:   /* And GNU code determines whether or not to get register information
 5539:      by passing null for the REGS argument to re_match, etc., not by
 5540:      setting no_sub.  */
 5541:   bufp->no_sub = 0;
 5542: 
 5543:   /* Match anchors at newline.  */
 5544:   bufp->newline_anchor = 1;
 5545: 
 5546:   ret = regex_compile (pattern, length, re_syntax_options, bufp);
 5547: 
 5548:   if (!ret)
 5549:     return NULL;
 5550:   return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
 5551: }
 5552: #ifdef _LIBC
 5553: weak_alias (__re_compile_pattern, re_compile_pattern)
 5554: #endif
 5555: 
 5556: /* Entry points compatible with 4.2 BSD regex library.  We don't define
 5557:    them unless specifically requested.  */
 5558: 
 5559: #if defined _REGEX_RE_COMP || defined _LIBC
 5560: 
 5561: /* BSD has one and only one pattern buffer.  */
 5562: static struct re_pattern_buffer re_comp_buf;
 5563: 
 5564: char *
 5565: #ifdef _LIBC
 5566: /* Make these definitions weak in libc, so POSIX programs can redefine
 5567:    these names if they don't use our functions, and still use
 5568:    regcomp/regexec below without link errors.  */
 5569: weak_function
 5570: #endif
 5571: re_comp (s)
 5572:     const char *s;
 5573: {
 5574:   reg_errcode_t ret;
 5575: 
 5576:   if (!s)
 5577:     {
 5578:       if (!re_comp_buf.buffer)
 5579: 	return gettext ("No previous regular expression");
 5580:       return 0;
 5581:     }
 5582: 
 5583:   if (!re_comp_buf.buffer)
 5584:     {
 5585:       re_comp_buf.buffer = (unsigned char *) malloc (200);
 5586:       if (re_comp_buf.buffer == NULL)
 5587:         return (char *) gettext (re_error_msgid
 5588: 				 + re_error_msgid_idx[(int) REG_ESPACE]);
 5589:       re_comp_buf.allocated = 200;
 5590: 
 5591:       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
 5592:       if (re_comp_buf.fastmap == NULL)
 5593: 	return (char *) gettext (re_error_msgid
 5594: 				 + re_error_msgid_idx[(int) REG_ESPACE]);
 5595:     }
 5596: 
 5597:   /* Since `re_exec' always passes NULL for the `regs' argument, we
 5598:      don't need to initialize the pattern buffer fields which affect it.  */
 5599: 
 5600:   /* Match anchors at newlines.  */
 5601:   re_comp_buf.newline_anchor = 1;
 5602: 
 5603:   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
 5604: 
 5605:   if (!ret)
 5606:     return NULL;
 5607: 
 5608:   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
 5609:   return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
 5610: }
 5611: 
 5612: 
 5613: int
 5614: #ifdef _LIBC
 5615: weak_function
 5616: #endif
 5617: re_exec (s)
 5618:     const char *s;
 5619: {
 5620:   const int len = strlen (s);
 5621:   return
 5622:     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
 5623: }
 5624: 
 5625: #endif /* _REGEX_RE_COMP */
 5626: 
 5627: /* POSIX.2 functions.  Don't define these for Emacs.  */
 5628: 
 5629: #ifndef emacs
 5630: 
 5631: /* regcomp takes a regular expression as a string and compiles it.
 5632: 
 5633:    PREG is a regex_t *.  We do not expect any fields to be initialized,
 5634:    since POSIX says we shouldn't.  Thus, we set
 5635: 
 5636:      `buffer' to the compiled pattern;
 5637:      `used' to the length of the compiled pattern;
 5638:      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
 5639:        REG_EXTENDED bit in CFLAGS is set; otherwise, to
 5640:        RE_SYNTAX_POSIX_BASIC;
 5641:      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
 5642:      `fastmap' to an allocated space for the fastmap;
 5643:      `fastmap_accurate' to zero;
 5644:      `re_nsub' to the number of subexpressions in PATTERN.
 5645: 
 5646:    PATTERN is the address of the pattern string.
 5647: 
 5648:    CFLAGS is a series of bits which affect compilation.
 5649: 
 5650:      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
 5651:      use POSIX basic syntax.
 5652: 
 5653:      If REG_NEWLINE is set, then . and [^...] don't match newline.
 5654:      Also, regexec will try a match beginning after every newline.
 5655: 
 5656:      If REG_ICASE is set, then we considers upper- and lowercase
 5657:      versions of letters to be equivalent when matching.
 5658: 
 5659:      If REG_NOSUB is set, then when PREG is passed to regexec, that
 5660:      routine will report only success or failure, and nothing about the
 5661:      registers.
 5662: 
 5663:    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
 5664:    the return codes and their meanings.)  */
 5665: 
 5666: int
 5667: regcomp (preg, pattern, cflags)
 5668:     regex_t *preg;
 5669:     const char *pattern;
 5670:     int cflags;
 5671: {
 5672:   reg_errcode_t ret;
 5673:   reg_syntax_t syntax
 5674:     = (cflags & REG_EXTENDED) ?
 5675:       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
 5676: 
 5677:   /* regex_compile will allocate the space for the compiled pattern.  */
 5678:   preg->buffer = 0;
 5679:   preg->allocated = 0;
 5680:   preg->used = 0;
 5681: 
 5682:   /* Try to allocate space for the fastmap.  */
 5683:   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
 5684: 
 5685:   if (cflags & REG_ICASE)
 5686:     {
 5687:       unsigned i;
 5688: 
 5689:       preg->translate
 5690: 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
 5691: 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
 5692:       if (preg->translate == NULL)
 5693:         return (int) REG_ESPACE;
 5694: 
 5695:       /* Map uppercase characters to corresponding lowercase ones.  */
 5696:       for (i = 0; i < CHAR_SET_SIZE; i++)
 5697:         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
 5698:     }
 5699:   else
 5700:     preg->translate = NULL;
 5701: 
 5702:   /* If REG_NEWLINE is set, newlines are treated differently.  */
 5703:   if (cflags & REG_NEWLINE)
 5704:     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
 5705:       syntax &= ~RE_DOT_NEWLINE;
 5706:       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
 5707:       /* It also changes the matching behavior.  */
 5708:       preg->newline_anchor = 1;
 5709:     }
 5710:   else
 5711:     preg->newline_anchor = 0;
 5712: 
 5713:   preg->no_sub = !!(cflags & REG_NOSUB);
 5714: 
 5715:   /* POSIX says a null character in the pattern terminates it, so we
 5716:      can use strlen here in compiling the pattern.  */
 5717:   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
 5718: 
 5719:   /* POSIX doesn't distinguish between an unmatched open-group and an
 5720:      unmatched close-group: both are REG_EPAREN.  */
 5721:   if (ret == REG_ERPAREN) ret = REG_EPAREN;
 5722: 
 5723:   if (ret == REG_NOERROR && preg->fastmap)
 5724:     {
 5725:       /* Compute the fastmap now, since regexec cannot modify the pattern
 5726: 	 buffer.  */
 5727:       if (re_compile_fastmap (preg) == -2)
 5728: 	{
 5729: 	  /* Some error occured while computing the fastmap, just forget
 5730: 	     about it.  */
 5731: 	  free (preg->fastmap);
 5732: 	  preg->fastmap = NULL;
 5733: 	}
 5734:     }
 5735: 
 5736:   return (int) ret;
 5737: }
 5738: #ifdef _LIBC
 5739: weak_alias (__regcomp, regcomp)
 5740: #endif
 5741: 
 5742: 
 5743: /* regexec searches for a given pattern, specified by PREG, in the
 5744:    string STRING.
 5745: 
 5746:    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 5747:    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 5748:    least NMATCH elements, and we set them to the offsets of the
 5749:    corresponding matched substrings.
 5750: 
 5751:    EFLAGS specifies `execution flags' which affect matching: if
 5752:    REG_NOTBOL is set, then ^ does not match at the beginning of the
 5753:    string; if REG_NOTEOL is set, then $ does not match at the end.
 5754: 
 5755:    We return 0 if we find a match and REG_NOMATCH if not.  */
 5756: 
 5757: int
 5758: regexec (preg, string, nmatch, pmatch, eflags)
 5759:     const regex_t *preg;
 5760:     const char *string;
 5761:     size_t nmatch;
 5762:     regmatch_t pmatch[];
 5763:     int eflags;
 5764: {
 5765:   int ret;
 5766:   struct re_registers regs;
 5767:   regex_t private_preg;
 5768:   int len = strlen (string);
 5769:   boolean want_reg_info = !preg->no_sub && nmatch > 0;
 5770: 
 5771:   private_preg = *preg;
 5772: 
 5773:   private_preg.not_bol = !!(eflags & REG_NOTBOL);
 5774:   private_preg.not_eol = !!(eflags & REG_NOTEOL);
 5775: 
 5776:   /* The user has told us exactly how many registers to return
 5777:      information about, via `nmatch'.  We have to pass that on to the
 5778:      matching routines.  */
 5779:   private_preg.regs_allocated = REGS_FIXED;
 5780: 
 5781:   if (want_reg_info)
 5782:     {
 5783:       regs.num_regs = nmatch;
 5784:       regs.start = TALLOC (nmatch * 2, regoff_t);
 5785:       if (regs.start == NULL)
 5786:         return (int) REG_NOMATCH;
 5787:       regs.end = regs.start + nmatch;
 5788:     }
 5789: 
 5790:   /* Perform the searching operation.  */
 5791:   ret = re_search (&private_preg, string, len,
 5792:                    /* start: */ 0, /* range: */ len,
 5793:                    want_reg_info ? &regs : (struct re_registers *) 0);
 5794: 
 5795:   /* Copy the register information to the POSIX structure.  */
 5796:   if (want_reg_info)
 5797:     {
 5798:       if (ret >= 0)
 5799:         {
 5800:           unsigned r;
 5801: 
 5802:           for (r = 0; r < nmatch; r++)
 5803:             {
 5804:               pmatch[r].rm_so = regs.start[r];
 5805:               pmatch[r].rm_eo = regs.end[r];
 5806:             }
 5807:         }
 5808: 
 5809:       /* If we needed the temporary register info, free the space now.  */
 5810:       free (regs.start);
 5811:     }
 5812: 
 5813:   /* We want zero return to mean success, unlike `re_search'.  */
 5814:   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
 5815: }
 5816: #ifdef _LIBC
 5817: weak_alias (__regexec, regexec)
 5818: #endif
 5819: 
 5820: 
 5821: /* Returns a message corresponding to an error code, ERRCODE, returned
 5822:    from either regcomp or regexec.   We don't use PREG here.  */
 5823: 
 5824: size_t
 5825: regerror (err, preg, errbuf, errbuf_size)
 5826:     int err;
 5827:     const regex_t *preg;
 5828:     char *errbuf;
 5829:     size_t errbuf_size;
 5830: {
 5831:   const char *msg;
 5832:   size_t msg_size;
 5833: 
 5834:   if (err < 0
 5835:       || err >= (int) (sizeof (re_error_msgid_idx)
 5836: 			   / sizeof (re_error_msgid_idx[0])))
 5837:     /* Only error codes returned by the rest of the code should be passed
 5838:        to this routine.  If we are given anything else, or if other regex
 5839:        code generates an invalid error code, then the program has a bug.
 5840:        Dump core so we can fix it.  */
 5841:     abort ();
 5842: 
 5843:   msg = gettext (re_error_msgid + re_error_msgid_idx[err]);
 5844: 
 5845:   msg_size = strlen (msg) + 1; /* Includes the null.  */
 5846: 
 5847:   if (errbuf_size != 0)
 5848:     {
 5849:       if (msg_size > errbuf_size)
 5850:         {
 5851: #if defined HAVE_MEMPCPY || defined _LIBC
 5852: 	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
 5853: #else
 5854:           memcpy (errbuf, msg, errbuf_size - 1);
 5855:           errbuf[errbuf_size - 1] = 0;
 5856: #endif
 5857:         }
 5858:       else
 5859:         memcpy (errbuf, msg, msg_size);
 5860:     }
 5861: 
 5862:   return msg_size;
 5863: }
 5864: #ifdef _LIBC
 5865: weak_alias (__regerror, regerror)
 5866: #endif
 5867: 
 5868: 
 5869: /* Free dynamically allocated space used by PREG.  */
 5870: 
 5871: void
 5872: regfree (preg)
 5873:     regex_t *preg;
 5874: {
 5875:   if (preg->buffer != NULL)
 5876:     free (preg->buffer);
 5877:   preg->buffer = NULL;
 5878: 
 5879:   preg->allocated = 0;
 5880:   preg->used = 0;
 5881: 
 5882:   if (preg->fastmap != NULL)
 5883:     free (preg->fastmap);
 5884:   preg->fastmap = NULL;
 5885:   preg->fastmap_accurate = 0;
 5886: 
 5887:   if (preg->translate != NULL)
 5888:     free (preg->translate);
 5889:   preg->translate = NULL;
 5890: }
 5891: #ifdef _LIBC
 5892: weak_alias (__regfree, regfree)
 5893: #endif
 5894: 
 5895: #endif /* not emacs  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>