Annotation of embedaddon/quagga/lib/regex.c, revision 1.1.1.2

1.1       misho       1: /* Extended regular expression matching and search library,
                      2:    version 0.12.
                      3:    (Implements POSIX draft P1003.2/D11.2, except for some of the
                      4:    internationalization features.)
                      5:    Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
                      6: 
                      7:    The GNU C Library is free software; you can redistribute it and/or
                      8:    modify it under the terms of the GNU Library General Public License as
                      9:    published by the Free Software Foundation; either version 2 of the
                     10:    License, or (at your option) any later version.
                     11: 
                     12:    The GNU C Library is distributed in the hope that it will be useful,
                     13:    but WITHOUT ANY WARRANTY; without even the implied warranty of
                     14:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     15:    Library General Public License for more details.
                     16: 
                     17:    You should have received a copy of the GNU Library General Public
                     18:    License along with the GNU C Library; see the file COPYING.LIB.  If not,
                     19:    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
                     20:    Boston, MA 02111-1307, USA.  */
                     21: 
                     22: /* AIX requires this to be the first thing in the file. */
                     23: #if defined _AIX && !defined REGEX_MALLOC
                     24:   #pragma alloca
                     25: #endif
                     26: 
                     27: #undef _GNU_SOURCE
                     28: #define _GNU_SOURCE
                     29: 
                     30: #ifdef HAVE_CONFIG_H
                     31: # include <config.h>
                     32: #endif
                     33: #ifdef _WIN32
                     34: /* Windows does not provide unistd.h, which is required for abort() */
                     35: #include <process.h>
                     36: #endif /* _WIN32 */
                     37: 
                     38: #ifndef PARAMS
                     39: # if defined __GNUC__ || (defined __STDC__ && __STDC__)
                     40: #  define PARAMS(args) args
                     41: # else
                     42: #  define PARAMS(args) ()
                     43: # endif  /* GCC.  */
                     44: #endif  /* Not PARAMS.  */
                     45: 
                     46: #if defined STDC_HEADERS && !defined emacs
                     47: # include <stddef.h>
                     48: #else
                     49: /* We need this for `regex.h', and perhaps for the Emacs include files.  */
                     50: # include <sys/types.h>
                     51: #endif
                     52: 
                     53: #define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
                     54: 
                     55: /* For platform which support the ISO C amendement 1 functionality we
                     56:    support user defined character classes.  */
                     57: #if defined _LIBC || WIDE_CHAR_SUPPORT
                     58: /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
                     59: # include <wchar.h>
                     60: # include <wctype.h>
                     61: #endif
                     62: 
                     63: #ifdef _LIBC
                     64: /* We have to keep the namespace clean.  */
                     65: # define regfree(preg) __regfree (preg)
                     66: # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
                     67: # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
                     68: # define regerror(errcode, preg, errbuf, errbuf_size) \
                     69:        __regerror(errcode, preg, errbuf, errbuf_size)
                     70: # define re_set_registers(bu, re, nu, st, en) \
                     71:        __re_set_registers (bu, re, nu, st, en)
                     72: # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
                     73:        __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
                     74: # define re_match(bufp, string, size, pos, regs) \
                     75:        __re_match (bufp, string, size, pos, regs)
                     76: # define re_search(bufp, string, size, startpos, range, regs) \
                     77:        __re_search (bufp, string, size, startpos, range, regs)
                     78: # define re_compile_pattern(pattern, length, bufp) \
                     79:        __re_compile_pattern (pattern, length, bufp)
                     80: # define re_set_syntax(syntax) __re_set_syntax (syntax)
                     81: # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
                     82:        __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
                     83: # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
                     84: 
                     85: #define btowc __btowc
                     86: #endif
                     87: 
                     88: /* This is for other GNU distributions with internationalized messages.  */
                     89: #if HAVE_LIBINTL_H || defined _LIBC
                     90: # include <libintl.h>
                     91: #else
                     92: # define gettext(msgid) (msgid)
                     93: #endif
                     94: 
                     95: #ifndef gettext_noop
                     96: /* This define is so xgettext can find the internationalizable
                     97:    strings.  */
                     98: # define gettext_noop(String) String
                     99: #endif
                    100: 
                    101: /* The `emacs' switch turns on certain matching commands
                    102:    that make sense only in Emacs. */
                    103: #ifdef emacs
                    104: 
                    105: # include "lisp.h"
                    106: # include "buffer.h"
                    107: # include "syntax.h"
                    108: 
                    109: #else  /* not emacs */
                    110: 
                    111: /* If we are not linking with Emacs proper,
                    112:    we can't use the relocating allocator
                    113:    even if config.h says that we can.  */
                    114: # undef REL_ALLOC
                    115: 
                    116: # if defined STDC_HEADERS || defined _LIBC
                    117: #  include <stdlib.h>
                    118: # else
                    119: char *malloc ();
                    120: char *realloc ();
                    121: # endif
                    122: 
                    123: /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
                    124:    If nothing else has been done, use the method below.  */
                    125: # ifdef INHIBIT_STRING_HEADER
                    126: #  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
                    127: #   if !defined bzero && !defined bcopy
                    128: #    undef INHIBIT_STRING_HEADER
                    129: #   endif
                    130: #  endif
                    131: # endif
                    132: 
                    133: /* This is the normal way of making sure we have a bcopy and a bzero.
                    134:    This is used in most programs--a few other programs avoid this
                    135:    by defining INHIBIT_STRING_HEADER.  */
                    136: # ifndef INHIBIT_STRING_HEADER
                    137: #  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
                    138: #   include <string.h>
                    139: #   ifndef bzero
                    140: #    ifndef _LIBC
                    141: #     define bzero(s, n)       (memset (s, '\0', n), (s))
                    142: #    else
                    143: #     define bzero(s, n)       __bzero (s, n)
                    144: #    endif
                    145: #   endif
                    146: #  else
                    147: #   include <strings.h>
                    148: #   ifndef memcmp
                    149: #    define memcmp(s1, s2, n)  bcmp (s1, s2, n)
                    150: #   endif
                    151: #   ifndef memcpy
                    152: #    define memcpy(d, s, n)    (bcopy (s, d, n), (d))
                    153: #   endif
                    154: #  endif
                    155: # endif
                    156: 
                    157: /* Define the syntax stuff for \<, \>, etc.  */
                    158: 
                    159: /* This must be nonzero for the wordchar and notwordchar pattern
                    160:    commands in re_match_2.  */
                    161: # ifndef Sword
                    162: #  define Sword 1
                    163: # endif
                    164: 
                    165: # ifdef SWITCH_ENUM_BUG
                    166: #  define SWITCH_ENUM_CAST(x) ((int)(x))
                    167: # else
                    168: #  define SWITCH_ENUM_CAST(x) (x)
                    169: # endif
                    170: 
                    171: /* How many characters in the character set.  */
                    172: # define CHAR_SET_SIZE 256
                    173: 
                    174: # ifdef SYNTAX_TABLE
                    175: 
                    176: extern char *re_syntax_table;
                    177: 
                    178: # else /* not SYNTAX_TABLE */
                    179: 
                    180: static char re_syntax_table[CHAR_SET_SIZE];
                    181: 
                    182: static void
                    183: init_syntax_once ()
                    184: {
                    185:    register int c;
                    186:    static int done;
                    187: 
                    188:    if (done)
                    189:      return;
                    190: 
                    191:    memset (re_syntax_table, 0, sizeof re_syntax_table);
                    192: 
                    193:    for (c = 'a'; c <= 'z'; c++)
                    194:      re_syntax_table[c] = Sword;
                    195: 
                    196:    for (c = 'A'; c <= 'Z'; c++)
                    197:      re_syntax_table[c] = Sword;
                    198: 
                    199:    for (c = '0'; c <= '9'; c++)
                    200:      re_syntax_table[c] = Sword;
                    201: 
                    202:    re_syntax_table['_'] = Sword;
                    203: 
                    204:    done = 1;
                    205: }
                    206: 
                    207: # endif /* not SYNTAX_TABLE */
                    208: 
                    209: # define SYNTAX(c) re_syntax_table[c]
                    210: 
                    211: #endif /* not emacs */
1.1.1.2 ! misho     212: 
1.1       misho     213: /* Get the interface, including the syntax bits.  */
                    214: #include <regex-gnu.h>
                    215: 
                    216: /* isalpha etc. are used for the character classes.  */
                    217: #include <ctype.h>
                    218: 
                    219: /* Jim Meyering writes:
                    220: 
                    221:    "... Some ctype macros are valid only for character codes that
                    222:    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
                    223:    using /bin/cc or gcc but without giving an ansi option).  So, all
                    224:    ctype uses should be through macros like ISPRINT...  If
                    225:    STDC_HEADERS is defined, then autoconf has verified that the ctype
                    226:    macros don't need to be guarded with references to isascii. ...
                    227:    Defining isascii to 1 should let any compiler worth its salt
                    228:    eliminate the && through constant folding."
                    229:    Solaris defines some of these symbols so we must undefine them first.  */
                    230: 
                    231: #undef ISASCII
                    232: #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
                    233: # define ISASCII(c) 1
                    234: #else
                    235: # define ISASCII(c) isascii(c)
                    236: #endif
                    237: 
                    238: #ifdef isblank
                    239: # define ISBLANK(c) (ISASCII (c) && isblank (c))
                    240: #else
                    241: # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
                    242: #endif
                    243: #ifdef isgraph
                    244: # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
                    245: #else
                    246: # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
                    247: #endif
                    248: 
                    249: #undef ISPRINT
                    250: #define ISPRINT(c) (ISASCII (c) && isprint (c))
                    251: #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
                    252: #define ISALNUM(c) (ISASCII (c) && isalnum (c))
                    253: #define ISALPHA(c) (ISASCII (c) && isalpha (c))
                    254: #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
                    255: #define ISLOWER(c) (ISASCII (c) && islower (c))
                    256: #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
                    257: #define ISSPACE(c) (ISASCII (c) && isspace (c))
                    258: #define ISUPPER(c) (ISASCII (c) && isupper (c))
                    259: #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
                    260: 
                    261: #ifdef _tolower
                    262: # define TOLOWER(c) _tolower(c)
                    263: #else
                    264: # define TOLOWER(c) tolower(c)
                    265: #endif
                    266: 
                    267: #ifndef NULL
                    268: # define NULL (void *)0
                    269: #endif
                    270: 
                    271: /* We remove any previous definition of `SIGN_EXTEND_CHAR',
                    272:    since ours (we hope) works properly with all combinations of
                    273:    machines, compilers, `char' and `unsigned char' argument types.
                    274:    (Per Bothner suggested the basic approach.)  */
                    275: #undef SIGN_EXTEND_CHAR
                    276: #if __STDC__
                    277: # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
                    278: #else  /* not __STDC__ */
                    279: /* As in Harbison and Steele.  */
                    280: # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
                    281: #endif
1.1.1.2 ! misho     282: 
1.1       misho     283: /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
                    284:    use `alloca' instead of `malloc'.  This is because using malloc in
                    285:    re_search* or re_match* could cause memory leaks when C-g is used in
                    286:    Emacs; also, malloc is slower and causes storage fragmentation.  On
                    287:    the other hand, malloc is more portable, and easier to debug.
                    288: 
                    289:    Because we sometimes use alloca, some routines have to be macros,
                    290:    not functions -- `alloca'-allocated space disappears at the end of the
                    291:    function it is called in.  */
                    292: 
                    293: #ifdef REGEX_MALLOC
                    294: 
                    295: # define REGEX_ALLOCATE malloc
                    296: # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
                    297: # define REGEX_FREE free
                    298: 
                    299: #else /* not REGEX_MALLOC  */
                    300: 
                    301: /* Emacs already defines alloca, sometimes.  */
                    302: # ifndef alloca
                    303: 
                    304: /* Make alloca work the best possible way.  */
                    305: #  ifdef __GNUC__
                    306: #   define alloca __builtin_alloca
                    307: #  else /* not __GNUC__ */
                    308: #   if HAVE_ALLOCA_H
                    309: #    include <alloca.h>
                    310: #   endif /* HAVE_ALLOCA_H */
                    311: #  endif /* not __GNUC__ */
                    312: 
                    313: # endif /* not alloca */
                    314: 
                    315: # define REGEX_ALLOCATE alloca
                    316: 
                    317: /* Assumes a `char *destination' variable.  */
                    318: # define REGEX_REALLOCATE(source, osize, nsize)                                \
                    319:   (destination = (char *) alloca (nsize),                              \
                    320:    memcpy (destination, source, osize))
                    321: 
                    322: /* No need to do anything to free, after alloca.  */
                    323: # define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
                    324: 
                    325: #endif /* not REGEX_MALLOC */
                    326: 
                    327: /* Define how to allocate the failure stack.  */
                    328: 
                    329: #if defined REL_ALLOC && defined REGEX_MALLOC
                    330: 
                    331: # define REGEX_ALLOCATE_STACK(size)                            \
                    332:   r_alloc (&failure_stack_ptr, (size))
                    333: # define REGEX_REALLOCATE_STACK(source, osize, nsize)          \
                    334:   r_re_alloc (&failure_stack_ptr, (nsize))
                    335: # define REGEX_FREE_STACK(ptr)                                 \
                    336:   r_alloc_free (&failure_stack_ptr)
                    337: 
                    338: #else /* not using relocating allocator */
                    339: 
                    340: # ifdef REGEX_MALLOC
                    341: 
                    342: #  define REGEX_ALLOCATE_STACK malloc
                    343: #  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
                    344: #  define REGEX_FREE_STACK free
                    345: 
                    346: # else /* not REGEX_MALLOC */
                    347: 
                    348: #  define REGEX_ALLOCATE_STACK alloca
                    349: 
                    350: #  define REGEX_REALLOCATE_STACK(source, osize, nsize)                 \
                    351:    REGEX_REALLOCATE (source, osize, nsize)
                    352: /* No need to explicitly free anything.  */
                    353: #  define REGEX_FREE_STACK(arg)
                    354: 
                    355: # endif /* not REGEX_MALLOC */
                    356: #endif /* not using relocating allocator */
                    357: 
                    358: 
                    359: /* True if `size1' is non-NULL and PTR is pointing anywhere inside
                    360:    `string1' or just past its end.  This works if PTR is NULL, which is
                    361:    a good thing.  */
                    362: #define FIRST_STRING_P(ptr)                                    \
                    363:   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
                    364: 
                    365: /* (Re)Allocate N items of type T using malloc, or fail.  */
                    366: #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
                    367: #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
                    368: #define RETALLOC_IF(addr, n, t) \
                    369:   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
                    370: #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
                    371: 
                    372: #define BYTEWIDTH 8 /* In bits.  */
                    373: 
                    374: #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
                    375: 
                    376: #undef MAX
                    377: #undef MIN
                    378: #define MAX(a, b) ((a) > (b) ? (a) : (b))
                    379: #define MIN(a, b) ((a) < (b) ? (a) : (b))
                    380: 
                    381: typedef char boolean;
                    382: #define false 0
                    383: #define true 1
                    384: 
                    385: static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
                    386:                                        const char *string1, int size1,
                    387:                                        const char *string2, int size2,
                    388:                                        int pos,
                    389:                                        struct re_registers *regs,
                    390:                                        int stop));
1.1.1.2 ! misho     391: 
1.1       misho     392: /* These are the command codes that appear in compiled regular
                    393:    expressions.  Some opcodes are followed by argument bytes.  A
                    394:    command code can specify any interpretation whatsoever for its
                    395:    arguments.  Zero bytes may appear in the compiled regular expression.  */
                    396: 
                    397: typedef enum
                    398: {
                    399:   no_op = 0,
                    400: 
                    401:   /* Succeed right away--no more backtracking.  */
                    402:   succeed,
                    403: 
                    404:         /* Followed by one byte giving n, then by n literal bytes.  */
                    405:   exactn,
                    406: 
                    407:         /* Matches any (more or less) character.  */
                    408:   anychar,
                    409: 
                    410:         /* Matches any one char belonging to specified set.  First
                    411:            following byte is number of bitmap bytes.  Then come bytes
                    412:            for a bitmap saying which chars are in.  Bits in each byte
                    413:            are ordered low-bit-first.  A character is in the set if its
                    414:            bit is 1.  A character too large to have a bit in the map is
                    415:            automatically not in the set.  */
                    416:   charset,
                    417: 
                    418:         /* Same parameters as charset, but match any character that is
                    419:            not one of those specified.  */
                    420:   charset_not,
                    421: 
                    422:         /* Start remembering the text that is matched, for storing in a
                    423:            register.  Followed by one byte with the register number, in
                    424:            the range 0 to one less than the pattern buffer's re_nsub
                    425:            field.  Then followed by one byte with the number of groups
                    426:            inner to this one.  (This last has to be part of the
                    427:            start_memory only because we need it in the on_failure_jump
                    428:            of re_match_2.)  */
                    429:   start_memory,
                    430: 
                    431:         /* Stop remembering the text that is matched and store it in a
                    432:            memory register.  Followed by one byte with the register
                    433:            number, in the range 0 to one less than `re_nsub' in the
                    434:            pattern buffer, and one byte with the number of inner groups,
                    435:            just like `start_memory'.  (We need the number of inner
                    436:            groups here because we don't have any easy way of finding the
                    437:            corresponding start_memory when we're at a stop_memory.)  */
                    438:   stop_memory,
                    439: 
                    440:         /* Match a duplicate of something remembered. Followed by one
                    441:            byte containing the register number.  */
                    442:   duplicate,
                    443: 
                    444:         /* Fail unless at beginning of line.  */
                    445:   begline,
                    446: 
                    447:         /* Fail unless at end of line.  */
                    448:   endline,
                    449: 
                    450:         /* Succeeds if at beginning of buffer (if emacs) or at beginning
                    451:            of string to be matched (if not).  */
                    452:   begbuf,
                    453: 
                    454:         /* Analogously, for end of buffer/string.  */
                    455:   endbuf,
                    456: 
                    457:         /* Followed by two byte relative address to which to jump.  */
                    458:   jump,
                    459: 
                    460:        /* Same as jump, but marks the end of an alternative.  */
                    461:   jump_past_alt,
                    462: 
                    463:         /* Followed by two-byte relative address of place to resume at
                    464:            in case of failure.  */
                    465:   on_failure_jump,
                    466: 
                    467:         /* Like on_failure_jump, but pushes a placeholder instead of the
                    468:            current string position when executed.  */
                    469:   on_failure_keep_string_jump,
                    470: 
                    471:         /* Throw away latest failure point and then jump to following
                    472:            two-byte relative address.  */
                    473:   pop_failure_jump,
                    474: 
                    475:         /* Change to pop_failure_jump if know won't have to backtrack to
                    476:            match; otherwise change to jump.  This is used to jump
                    477:            back to the beginning of a repeat.  If what follows this jump
                    478:            clearly won't match what the repeat does, such that we can be
                    479:            sure that there is no use backtracking out of repetitions
                    480:            already matched, then we change it to a pop_failure_jump.
                    481:            Followed by two-byte address.  */
                    482:   maybe_pop_jump,
                    483: 
                    484:         /* Jump to following two-byte address, and push a dummy failure
                    485:            point. This failure point will be thrown away if an attempt
                    486:            is made to use it for a failure.  A `+' construct makes this
                    487:            before the first repeat.  Also used as an intermediary kind
                    488:            of jump when compiling an alternative.  */
                    489:   dummy_failure_jump,
                    490: 
                    491:        /* Push a dummy failure point and continue.  Used at the end of
                    492:           alternatives.  */
                    493:   push_dummy_failure,
                    494: 
                    495:         /* Followed by two-byte relative address and two-byte number n.
                    496:            After matching N times, jump to the address upon failure.  */
                    497:   succeed_n,
                    498: 
                    499:         /* Followed by two-byte relative address, and two-byte number n.
                    500:            Jump to the address N times, then fail.  */
                    501:   jump_n,
                    502: 
                    503:         /* Set the following two-byte relative address to the
                    504:            subsequent two-byte number.  The address *includes* the two
                    505:            bytes of number.  */
                    506:   set_number_at,
                    507: 
                    508:   wordchar,    /* Matches any word-constituent character.  */
                    509:   notwordchar, /* Matches any char that is not a word-constituent.  */
                    510: 
                    511:   wordbeg,     /* Succeeds if at word beginning.  */
                    512:   wordend,     /* Succeeds if at word end.  */
                    513: 
                    514:   wordbound,   /* Succeeds if at a word boundary.  */
                    515:   notwordbound /* Succeeds if not at a word boundary.  */
                    516: 
                    517: #ifdef emacs
                    518:   ,before_dot, /* Succeeds if before point.  */
                    519:   at_dot,      /* Succeeds if at point.  */
                    520:   after_dot,   /* Succeeds if after point.  */
                    521: 
                    522:        /* Matches any character whose syntax is specified.  Followed by
                    523:            a byte which contains a syntax code, e.g., Sword.  */
                    524:   syntaxspec,
                    525: 
                    526:        /* Matches any character whose syntax is not that specified.  */
                    527:   notsyntaxspec
                    528: #endif /* emacs */
                    529: } re_opcode_t;
1.1.1.2 ! misho     530: 
1.1       misho     531: /* Common operations on the compiled pattern.  */
                    532: 
                    533: /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
                    534: 
                    535: #define STORE_NUMBER(destination, number)                              \
                    536:   do {                                                                 \
                    537:     (destination)[0] = (number) & 0377;                                        \
                    538:     (destination)[1] = (number) >> 8;                                  \
                    539:   } while (0)
                    540: 
                    541: /* Same as STORE_NUMBER, except increment DESTINATION to
                    542:    the byte after where the number is stored.  Therefore, DESTINATION
                    543:    must be an lvalue.  */
                    544: 
                    545: #define STORE_NUMBER_AND_INCR(destination, number)                     \
                    546:   do {                                                                 \
                    547:     STORE_NUMBER (destination, number);                                        \
                    548:     (destination) += 2;                                                        \
                    549:   } while (0)
                    550: 
                    551: /* Put into DESTINATION a number stored in two contiguous bytes starting
                    552:    at SOURCE.  */
                    553: 
                    554: #define EXTRACT_NUMBER(destination, source)                            \
                    555:   do {                                                                 \
                    556:     (destination) = *(source) & 0377;                                  \
                    557:     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;          \
                    558:   } while (0)
                    559: 
                    560: #ifdef DEBUG
                    561: static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
                    562: static void
                    563: extract_number (dest, source)
                    564:     int *dest;
                    565:     unsigned char *source;
                    566: {
                    567:   int temp = SIGN_EXTEND_CHAR (*(source + 1));
                    568:   *dest = *source & 0377;
                    569:   *dest += temp << 8;
                    570: }
                    571: 
                    572: # ifndef EXTRACT_MACROS /* To debug the macros.  */
                    573: #  undef EXTRACT_NUMBER
                    574: #  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
                    575: # endif /* not EXTRACT_MACROS */
                    576: 
                    577: #endif /* DEBUG */
                    578: 
                    579: /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
                    580:    SOURCE must be an lvalue.  */
                    581: 
                    582: #define EXTRACT_NUMBER_AND_INCR(destination, source)                   \
                    583:   do {                                                                 \
                    584:     EXTRACT_NUMBER (destination, source);                              \
                    585:     (source) += 2;                                                     \
                    586:   } while (0)
                    587: 
                    588: #ifdef DEBUG
                    589: static void extract_number_and_incr _RE_ARGS ((int *destination,
                    590:                                               unsigned char **source));
                    591: static void
                    592: extract_number_and_incr (destination, source)
                    593:     int *destination;
                    594:     unsigned char **source;
                    595: {
                    596:   extract_number (destination, *source);
                    597:   *source += 2;
                    598: }
                    599: 
                    600: # ifndef EXTRACT_MACROS
                    601: #  undef EXTRACT_NUMBER_AND_INCR
                    602: #  define EXTRACT_NUMBER_AND_INCR(dest, src) \
                    603:   extract_number_and_incr (&dest, &src)
                    604: # endif /* not EXTRACT_MACROS */
                    605: 
                    606: #endif /* DEBUG */
1.1.1.2 ! misho     607: 
1.1       misho     608: /* If DEBUG is defined, Regex prints many voluminous messages about what
                    609:    it is doing (if the variable `debug' is nonzero).  If linked with the
                    610:    main program in `iregex.c', you can enter patterns and strings
                    611:    interactively.  And if linked with the main program in `main.c' and
                    612:    the other test files, you can run the already-written tests.  */
                    613: 
                    614: #ifdef DEBUG
                    615: 
                    616: /* We use standard I/O for debugging.  */
                    617: # include <stdio.h>
                    618: 
                    619: /* It is useful to test things that ``must'' be true when debugging.  */
                    620: # include "zassert.h"
                    621: 
                    622: static int debug;
                    623: 
                    624: # define DEBUG_STATEMENT(e) e
                    625: # define DEBUG_PRINT1(x) if (debug) printf (x)
                    626: # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
                    627: # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
                    628: # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
                    629: # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                                 \
                    630:   if (debug) print_partial_compiled_pattern (s, e)
                    631: # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                        \
                    632:   if (debug) print_double_string (w, s1, sz1, s2, sz2)
                    633: 
                    634: 
                    635: /* Print the fastmap in human-readable form.  */
                    636: 
                    637: void
                    638: print_fastmap (fastmap)
                    639:     char *fastmap;
                    640: {
                    641:   unsigned was_a_range = 0;
                    642:   unsigned i = 0;
                    643: 
                    644:   while (i < (1 << BYTEWIDTH))
                    645:     {
                    646:       if (fastmap[i++])
                    647:        {
                    648:          was_a_range = 0;
                    649:           putchar (i - 1);
                    650:           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
                    651:             {
                    652:               was_a_range = 1;
                    653:               i++;
                    654:             }
                    655:          if (was_a_range)
                    656:             {
                    657:               printf ("-");
                    658:               putchar (i - 1);
                    659:             }
                    660:         }
                    661:     }
                    662:   putchar ('\n');
                    663: }
                    664: 
                    665: 
                    666: /* Print a compiled pattern string in human-readable form, starting at
                    667:    the START pointer into it and ending just before the pointer END.  */
                    668: 
                    669: void
                    670: print_partial_compiled_pattern (start, end)
                    671:     unsigned char *start;
                    672:     unsigned char *end;
                    673: {
                    674:   int mcnt, mcnt2;
                    675:   unsigned char *p1;
                    676:   unsigned char *p = start;
                    677:   unsigned char *pend = end;
                    678: 
                    679:   if (start == NULL)
                    680:     {
                    681:       printf ("(null)\n");
                    682:       return;
                    683:     }
                    684: 
                    685:   /* Loop over pattern commands.  */
                    686:   while (p < pend)
                    687:     {
                    688:       printf ("%d:\t", p - start);
                    689: 
                    690:       switch ((re_opcode_t) *p++)
                    691:        {
                    692:         case no_op:
                    693:           printf ("/no_op");
                    694:           break;
                    695: 
                    696:        case exactn:
                    697:          mcnt = *p++;
                    698:           printf ("/exactn/%d", mcnt);
                    699:           do
                    700:            {
                    701:               putchar ('/');
                    702:              putchar (*p++);
                    703:             }
                    704:           while (--mcnt);
                    705:           break;
                    706: 
                    707:        case start_memory:
                    708:           mcnt = *p++;
                    709:           printf ("/start_memory/%d/%d", mcnt, *p++);
                    710:           break;
                    711: 
                    712:        case stop_memory:
                    713:           mcnt = *p++;
                    714:          printf ("/stop_memory/%d/%d", mcnt, *p++);
                    715:           break;
                    716: 
                    717:        case duplicate:
                    718:          printf ("/duplicate/%d", *p++);
                    719:          break;
                    720: 
                    721:        case anychar:
                    722:          printf ("/anychar");
                    723:          break;
                    724: 
                    725:        case charset:
                    726:         case charset_not:
                    727:           {
                    728:             register int c, last = -100;
                    729:            register int in_range = 0;
                    730: 
                    731:            printf ("/charset [%s",
                    732:                    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
                    733: 
                    734:             assert (p + *p < pend);
                    735: 
                    736:             for (c = 0; c < 256; c++)
                    737:              if (c / 8 < *p
                    738:                  && (p[1 + (c/8)] & (1 << (c % 8))))
                    739:                {
                    740:                  /* Are we starting a range?  */
                    741:                  if (last + 1 == c && ! in_range)
                    742:                    {
                    743:                      putchar ('-');
                    744:                      in_range = 1;
                    745:                    }
                    746:                  /* Have we broken a range?  */
                    747:                  else if (last + 1 != c && in_range)
                    748:               {
                    749:                      putchar (last);
                    750:                      in_range = 0;
                    751:                    }
                    752: 
                    753:                  if (! in_range)
                    754:                    putchar (c);
                    755: 
                    756:                  last = c;
                    757:               }
                    758: 
                    759:            if (in_range)
                    760:              putchar (last);
                    761: 
                    762:            putchar (']');
                    763: 
                    764:            p += 1 + *p;
                    765:          }
                    766:          break;
                    767: 
                    768:        case begline:
                    769:          printf ("/begline");
                    770:           break;
                    771: 
                    772:        case endline:
                    773:           printf ("/endline");
                    774:           break;
                    775: 
                    776:        case on_failure_jump:
                    777:           extract_number_and_incr (&mcnt, &p);
                    778:          printf ("/on_failure_jump to %d", p + mcnt - start);
                    779:           break;
                    780: 
                    781:        case on_failure_keep_string_jump:
                    782:           extract_number_and_incr (&mcnt, &p);
                    783:          printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
                    784:           break;
                    785: 
                    786:        case dummy_failure_jump:
                    787:           extract_number_and_incr (&mcnt, &p);
                    788:          printf ("/dummy_failure_jump to %d", p + mcnt - start);
                    789:           break;
                    790: 
                    791:        case push_dummy_failure:
                    792:           printf ("/push_dummy_failure");
                    793:           break;
                    794: 
                    795:         case maybe_pop_jump:
                    796:           extract_number_and_incr (&mcnt, &p);
                    797:          printf ("/maybe_pop_jump to %d", p + mcnt - start);
                    798:          break;
                    799: 
                    800:         case pop_failure_jump:
                    801:          extract_number_and_incr (&mcnt, &p);
                    802:          printf ("/pop_failure_jump to %d", p + mcnt - start);
                    803:          break;
                    804: 
                    805:         case jump_past_alt:
                    806:          extract_number_and_incr (&mcnt, &p);
                    807:          printf ("/jump_past_alt to %d", p + mcnt - start);
                    808:          break;
                    809: 
                    810:         case jump:
                    811:          extract_number_and_incr (&mcnt, &p);
                    812:          printf ("/jump to %d", p + mcnt - start);
                    813:          break;
                    814: 
                    815:         case succeed_n:
                    816:           extract_number_and_incr (&mcnt, &p);
                    817:          p1 = p + mcnt;
                    818:           extract_number_and_incr (&mcnt2, &p);
                    819:          printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
                    820:           break;
                    821: 
                    822:         case jump_n:
                    823:           extract_number_and_incr (&mcnt, &p);
                    824:          p1 = p + mcnt;
                    825:           extract_number_and_incr (&mcnt2, &p);
                    826:          printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
                    827:           break;
                    828: 
                    829:         case set_number_at:
                    830:           extract_number_and_incr (&mcnt, &p);
                    831:          p1 = p + mcnt;
                    832:           extract_number_and_incr (&mcnt2, &p);
                    833:          printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
                    834:           break;
                    835: 
                    836:         case wordbound:
                    837:          printf ("/wordbound");
                    838:          break;
                    839: 
                    840:        case notwordbound:
                    841:          printf ("/notwordbound");
                    842:           break;
                    843: 
                    844:        case wordbeg:
                    845:          printf ("/wordbeg");
                    846:          break;
                    847: 
                    848:        case wordend:
                    849:          printf ("/wordend");
                    850: 
                    851: # ifdef emacs
                    852:        case before_dot:
                    853:          printf ("/before_dot");
                    854:           break;
                    855: 
                    856:        case at_dot:
                    857:          printf ("/at_dot");
                    858:           break;
                    859: 
                    860:        case after_dot:
                    861:          printf ("/after_dot");
                    862:           break;
                    863: 
                    864:        case syntaxspec:
                    865:           printf ("/syntaxspec");
                    866:          mcnt = *p++;
                    867:          printf ("/%d", mcnt);
                    868:           break;
                    869: 
                    870:        case notsyntaxspec:
                    871:           printf ("/notsyntaxspec");
                    872:          mcnt = *p++;
                    873:          printf ("/%d", mcnt);
                    874:          break;
                    875: # endif /* emacs */
                    876: 
                    877:        case wordchar:
                    878:          printf ("/wordchar");
                    879:           break;
                    880: 
                    881:        case notwordchar:
                    882:          printf ("/notwordchar");
                    883:           break;
                    884: 
                    885:        case begbuf:
                    886:          printf ("/begbuf");
                    887:           break;
                    888: 
                    889:        case endbuf:
                    890:          printf ("/endbuf");
                    891:           break;
                    892: 
                    893:         default:
                    894:           printf ("?%d", *(p-1));
                    895:        }
                    896: 
                    897:       putchar ('\n');
                    898:     }
                    899: 
                    900:   printf ("%d:\tend of pattern.\n", p - start);
                    901: }
                    902: 
                    903: 
                    904: void
                    905: print_compiled_pattern (bufp)
                    906:     struct re_pattern_buffer *bufp;
                    907: {
                    908:   unsigned char *buffer = bufp->buffer;
                    909: 
                    910:   print_partial_compiled_pattern (buffer, buffer + bufp->used);
                    911:   printf ("%ld bytes used/%ld bytes allocated.\n",
                    912:          bufp->used, bufp->allocated);
                    913: 
                    914:   if (bufp->fastmap_accurate && bufp->fastmap)
                    915:     {
                    916:       printf ("fastmap: ");
                    917:       print_fastmap (bufp->fastmap);
                    918:     }
                    919: 
                    920:   printf ("re_nsub: %d\t", bufp->re_nsub);
                    921:   printf ("regs_alloc: %d\t", bufp->regs_allocated);
                    922:   printf ("can_be_null: %d\t", bufp->can_be_null);
                    923:   printf ("newline_anchor: %d\n", bufp->newline_anchor);
                    924:   printf ("no_sub: %d\t", bufp->no_sub);
                    925:   printf ("not_bol: %d\t", bufp->not_bol);
                    926:   printf ("not_eol: %d\t", bufp->not_eol);
                    927:   printf ("syntax: %lx\n", bufp->syntax);
                    928:   /* Perhaps we should print the translate table?  */
                    929: }
                    930: 
                    931: 
                    932: void
                    933: print_double_string (where, string1, size1, string2, size2)
                    934:     const char *where;
                    935:     const char *string1;
                    936:     const char *string2;
                    937:     int size1;
                    938:     int size2;
                    939: {
                    940:   int this_char;
                    941: 
                    942:   if (where == NULL)
                    943:     printf ("(null)");
                    944:   else
                    945:     {
                    946:       if (FIRST_STRING_P (where))
                    947:         {
                    948:           for (this_char = where - string1; this_char < size1; this_char++)
                    949:             putchar (string1[this_char]);
                    950: 
                    951:           where = string2;
                    952:         }
                    953: 
                    954:       for (this_char = where - string2; this_char < size2; this_char++)
                    955:         putchar (string2[this_char]);
                    956:     }
                    957: }
                    958: 
                    959: void
                    960: printchar (c)
                    961:      int c;
                    962: {
                    963:   putc (c, stderr);
                    964: }
                    965: 
                    966: #else /* not DEBUG */
                    967: 
                    968: # undef assert
                    969: # define assert(e)
                    970: 
                    971: # define DEBUG_STATEMENT(e)
                    972: # define DEBUG_PRINT1(x)
                    973: # define DEBUG_PRINT2(x1, x2)
                    974: # define DEBUG_PRINT3(x1, x2, x3)
                    975: # define DEBUG_PRINT4(x1, x2, x3, x4)
                    976: # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
                    977: # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
                    978: 
                    979: #endif /* not DEBUG */
1.1.1.2 ! misho     980: 
1.1       misho     981: /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
                    982:    also be assigned to arbitrarily: each pattern buffer stores its own
                    983:    syntax, so it can be changed between regex compilations.  */
                    984: /* This has no initializer because initialized variables in Emacs
                    985:    become read-only after dumping.  */
                    986: reg_syntax_t re_syntax_options;
                    987: 
                    988: 
                    989: /* Specify the precise syntax of regexps for compilation.  This provides
                    990:    for compatibility for various utilities which historically have
                    991:    different, incompatible syntaxes.
                    992: 
                    993:    The argument SYNTAX is a bit mask comprised of the various bits
                    994:    defined in regex.h.  We return the old syntax.  */
                    995: 
                    996: reg_syntax_t
                    997: re_set_syntax (syntax)
                    998:     reg_syntax_t syntax;
                    999: {
                   1000:   reg_syntax_t ret = re_syntax_options;
                   1001: 
                   1002:   re_syntax_options = syntax;
                   1003: #ifdef DEBUG
                   1004:   if (syntax & RE_DEBUG)
                   1005:     debug = 1;
                   1006:   else if (debug) /* was on but now is not */
                   1007:     debug = 0;
                   1008: #endif /* DEBUG */
                   1009:   return ret;
                   1010: }
                   1011: #ifdef _LIBC
                   1012: weak_alias (__re_set_syntax, re_set_syntax)
                   1013: #endif
1.1.1.2 ! misho    1014: 
1.1       misho    1015: /* This table gives an error message for each of the error codes listed
                   1016:    in regex.h.  Obviously the order here has to be same as there.
                   1017:    POSIX doesn't require that we do anything for REG_NOERROR,
                   1018:    but why not be nice?  */
                   1019: 
                   1020: static const char re_error_msgid[] =
                   1021:   {
                   1022: #define REG_NOERROR_IDX        0
                   1023:     gettext_noop ("Success")   /* REG_NOERROR */
                   1024:     "\0"
                   1025: #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
                   1026:     gettext_noop ("No match")  /* REG_NOMATCH */
                   1027:     "\0"
                   1028: #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
                   1029:     gettext_noop ("Invalid regular expression") /* REG_BADPAT */
                   1030:     "\0"
                   1031: #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
                   1032:     gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
                   1033:     "\0"
                   1034: #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
                   1035:     gettext_noop ("Invalid character class name") /* REG_ECTYPE */
                   1036:     "\0"
                   1037: #define REG_EESCAPE_IDX        (REG_ECTYPE_IDX + sizeof "Invalid character class name")
                   1038:     gettext_noop ("Trailing backslash") /* REG_EESCAPE */
                   1039:     "\0"
                   1040: #define REG_ESUBREG_IDX        (REG_EESCAPE_IDX + sizeof "Trailing backslash")
                   1041:     gettext_noop ("Invalid back reference") /* REG_ESUBREG */
                   1042:     "\0"
                   1043: #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
                   1044:     gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
                   1045:     "\0"
                   1046: #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
                   1047:     gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
                   1048:     "\0"
                   1049: #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
                   1050:     gettext_noop ("Unmatched \\{") /* REG_EBRACE */
                   1051:     "\0"
                   1052: #define REG_BADBR_IDX  (REG_EBRACE_IDX + sizeof "Unmatched \\{")
                   1053:     gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
                   1054:     "\0"
                   1055: #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
                   1056:     gettext_noop ("Invalid range end") /* REG_ERANGE */
                   1057:     "\0"
                   1058: #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
                   1059:     gettext_noop ("Memory exhausted") /* REG_ESPACE */
                   1060:     "\0"
                   1061: #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
                   1062:     gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
                   1063:     "\0"
                   1064: #define REG_EEND_IDX   (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
                   1065:     gettext_noop ("Premature end of regular expression") /* REG_EEND */
                   1066:     "\0"
                   1067: #define REG_ESIZE_IDX  (REG_EEND_IDX + sizeof "Premature end of regular expression")
                   1068:     gettext_noop ("Regular expression too big") /* REG_ESIZE */
                   1069:     "\0"
                   1070: #define REG_ERPAREN_IDX        (REG_ESIZE_IDX + sizeof "Regular expression too big")
                   1071:     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
                   1072:   };
                   1073: 
                   1074: static const size_t re_error_msgid_idx[] =
                   1075:   {
                   1076:     REG_NOERROR_IDX,
                   1077:     REG_NOMATCH_IDX,
                   1078:     REG_BADPAT_IDX,
                   1079:     REG_ECOLLATE_IDX,
                   1080:     REG_ECTYPE_IDX,
                   1081:     REG_EESCAPE_IDX,
                   1082:     REG_ESUBREG_IDX,
                   1083:     REG_EBRACK_IDX,
                   1084:     REG_EPAREN_IDX,
                   1085:     REG_EBRACE_IDX,
                   1086:     REG_BADBR_IDX,
                   1087:     REG_ERANGE_IDX,
                   1088:     REG_ESPACE_IDX,
                   1089:     REG_BADRPT_IDX,
                   1090:     REG_EEND_IDX,
                   1091:     REG_ESIZE_IDX,
                   1092:     REG_ERPAREN_IDX
                   1093:   };
1.1.1.2 ! misho    1094: 
1.1       misho    1095: /* Avoiding alloca during matching, to placate r_alloc.  */
                   1096: 
                   1097: /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
                   1098:    searching and matching functions should not call alloca.  On some
                   1099:    systems, alloca is implemented in terms of malloc, and if we're
                   1100:    using the relocating allocator routines, then malloc could cause a
                   1101:    relocation, which might (if the strings being searched are in the
                   1102:    ralloc heap) shift the data out from underneath the regexp
                   1103:    routines.
                   1104: 
                   1105:    Here's another reason to avoid allocation: Emacs
                   1106:    processes input from X in a signal handler; processing X input may
                   1107:    call malloc; if input arrives while a matching routine is calling
                   1108:    malloc, then we're scrod.  But Emacs can't just block input while
                   1109:    calling matching routines; then we don't notice interrupts when
                   1110:    they come in.  So, Emacs blocks input around all regexp calls
                   1111:    except the matching calls, which it leaves unprotected, in the
                   1112:    faith that they will not malloc.  */
                   1113: 
                   1114: /* Normally, this is fine.  */
                   1115: #define MATCH_MAY_ALLOCATE
                   1116: 
                   1117: /* When using GNU C, we are not REALLY using the C alloca, no matter
                   1118:    what config.h may say.  So don't take precautions for it.  */
                   1119: #ifdef __GNUC__
                   1120: # undef C_ALLOCA
                   1121: #endif
                   1122: 
                   1123: /* The match routines may not allocate if (1) they would do it with malloc
                   1124:    and (2) it's not safe for them to use malloc.
                   1125:    Note that if REL_ALLOC is defined, matching would not use malloc for the
                   1126:    failure stack, but we would still use it for the register vectors;
                   1127:    so REL_ALLOC should not affect this.  */
                   1128: #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
                   1129: # undef MATCH_MAY_ALLOCATE
                   1130: #endif
                   1131: 
1.1.1.2 ! misho    1132: 
1.1       misho    1133: /* Failure stack declarations and macros; both re_compile_fastmap and
                   1134:    re_match_2 use a failure stack.  These have to be macros because of
                   1135:    REGEX_ALLOCATE_STACK.  */
                   1136: 
                   1137: 
                   1138: /* Number of failure points for which to initially allocate space
                   1139:    when matching.  If this number is exceeded, we allocate more
                   1140:    space, so it is not a hard limit.  */
                   1141: #ifndef INIT_FAILURE_ALLOC
                   1142: # define INIT_FAILURE_ALLOC 5
                   1143: #endif
                   1144: 
                   1145: /* Roughly the maximum number of failure points on the stack.  Would be
                   1146:    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
                   1147:    This is a variable only so users of regex can assign to it; we never
                   1148:    change it ourselves.  */
                   1149: 
                   1150: #ifdef INT_IS_16BIT
                   1151: 
                   1152: # if defined MATCH_MAY_ALLOCATE
                   1153: /* 4400 was enough to cause a crash on Alpha OSF/1,
                   1154:    whose default stack limit is 2mb.  */
                   1155: long int re_max_failures = 4000;
                   1156: # else
                   1157: long int re_max_failures = 2000;
                   1158: # endif
                   1159: 
                   1160: union fail_stack_elt
                   1161: {
                   1162:   unsigned char *pointer;
                   1163:   long int integer;
                   1164: };
                   1165: 
                   1166: typedef union fail_stack_elt fail_stack_elt_t;
                   1167: 
                   1168: typedef struct
                   1169: {
                   1170:   fail_stack_elt_t *stack;
                   1171:   unsigned long int size;
                   1172:   unsigned long int avail;             /* Offset of next open position.  */
                   1173: } fail_stack_type;
                   1174: 
                   1175: #else /* not INT_IS_16BIT */
                   1176: 
                   1177: # if defined MATCH_MAY_ALLOCATE
                   1178: /* 4400 was enough to cause a crash on Alpha OSF/1,
                   1179:    whose default stack limit is 2mb.  */
                   1180: int re_max_failures = 20000;
                   1181: # else
                   1182: int re_max_failures = 2000;
                   1183: # endif
                   1184: 
                   1185: union fail_stack_elt
                   1186: {
                   1187:   unsigned char *pointer;
                   1188:   int integer;
                   1189: };
                   1190: 
                   1191: typedef union fail_stack_elt fail_stack_elt_t;
                   1192: 
                   1193: typedef struct
                   1194: {
                   1195:   fail_stack_elt_t *stack;
                   1196:   unsigned size;
                   1197:   unsigned avail;                      /* Offset of next open position.  */
                   1198: } fail_stack_type;
                   1199: 
                   1200: #endif /* INT_IS_16BIT */
                   1201: 
                   1202: #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
                   1203: #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
                   1204: #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
                   1205: 
                   1206: 
                   1207: /* Define macros to initialize and free the failure stack.
                   1208:    Do `return -2' if the alloc fails.  */
                   1209: 
                   1210: #ifdef MATCH_MAY_ALLOCATE
                   1211: # define INIT_FAIL_STACK()                                             \
                   1212:   do {                                                                 \
                   1213:     fail_stack.stack = (fail_stack_elt_t *)                            \
                   1214:       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
                   1215:                                                                        \
                   1216:     if (fail_stack.stack == NULL)                                      \
                   1217:       return -2;                                                       \
                   1218:                                                                        \
                   1219:     fail_stack.size = INIT_FAILURE_ALLOC;                              \
                   1220:     fail_stack.avail = 0;                                              \
                   1221:   } while (0)
                   1222: 
                   1223: # define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
                   1224: #else
                   1225: # define INIT_FAIL_STACK()                                             \
                   1226:   do {                                                                 \
                   1227:     fail_stack.avail = 0;                                              \
                   1228:   } while (0)
                   1229: 
                   1230: # define RESET_FAIL_STACK()
                   1231: #endif
                   1232: 
                   1233: 
                   1234: /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
                   1235: 
                   1236:    Return 1 if succeeds, and 0 if either ran out of memory
                   1237:    allocating space for it or it was already too large.
                   1238: 
                   1239:    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
                   1240: 
                   1241: #define DOUBLE_FAIL_STACK(fail_stack)                                  \
                   1242:   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)        \
                   1243:    ? 0                                                                 \
                   1244:    : ((fail_stack).stack = (fail_stack_elt_t *)                                \
                   1245:         REGEX_REALLOCATE_STACK ((fail_stack).stack,                    \
                   1246:           (fail_stack).size * sizeof (fail_stack_elt_t),               \
                   1247:           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),       \
                   1248:                                                                        \
                   1249:       (fail_stack).stack == NULL                                       \
                   1250:       ? 0                                                              \
                   1251:       : ((fail_stack).size <<= 1,                                      \
                   1252:          1)))
                   1253: 
                   1254: 
                   1255: /* Push pointer POINTER on FAIL_STACK.
                   1256:    Return 1 if was able to do so and 0 if ran out of memory allocating
                   1257:    space to do so.  */
                   1258: #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                           \
                   1259:   ((FAIL_STACK_FULL ()                                                 \
                   1260:     && !DOUBLE_FAIL_STACK (FAIL_STACK))                                        \
                   1261:    ? 0                                                                 \
                   1262:    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,      \
                   1263:       1))
                   1264: 
                   1265: /* Push a pointer value onto the failure stack.
                   1266:    Assumes the variable `fail_stack'.  Probably should only
                   1267:    be called from within `PUSH_FAILURE_POINT'.  */
                   1268: #define PUSH_FAILURE_POINTER(item)                                     \
                   1269:   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
                   1270: 
                   1271: /* This pushes an integer-valued item onto the failure stack.
                   1272:    Assumes the variable `fail_stack'.  Probably should only
                   1273:    be called from within `PUSH_FAILURE_POINT'.  */
                   1274: #define PUSH_FAILURE_INT(item)                                 \
                   1275:   fail_stack.stack[fail_stack.avail++].integer = (item)
                   1276: 
                   1277: /* Push a fail_stack_elt_t value onto the failure stack.
                   1278:    Assumes the variable `fail_stack'.  Probably should only
                   1279:    be called from within `PUSH_FAILURE_POINT'.  */
                   1280: #define PUSH_FAILURE_ELT(item)                                 \
                   1281:   fail_stack.stack[fail_stack.avail++] =  (item)
                   1282: 
                   1283: /* These three POP... operations complement the three PUSH... operations.
                   1284:    All assume that `fail_stack' is nonempty.  */
                   1285: #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
                   1286: #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
                   1287: #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
                   1288: 
                   1289: /* Used to omit pushing failure point id's when we're not debugging.  */
                   1290: #ifdef DEBUG
                   1291: # define DEBUG_PUSH PUSH_FAILURE_INT
                   1292: # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
                   1293: #else
                   1294: # define DEBUG_PUSH(item)
                   1295: # define DEBUG_POP(item_addr)
                   1296: #endif
                   1297: 
                   1298: 
                   1299: /* Push the information about the state we will need
                   1300:    if we ever fail back to it.
                   1301: 
                   1302:    Requires variables fail_stack, regstart, regend, reg_info, and
                   1303:    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
                   1304:    be declared.
                   1305: 
                   1306:    Does `return FAILURE_CODE' if runs out of memory.  */
                   1307: 
                   1308: #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)  \
                   1309:   do {                                                                 \
                   1310:     char *destination;                                                 \
                   1311:     /* Must be int, so when we don't save any registers, the arithmetic        \
                   1312:        of 0 + -1 isn't done as unsigned.  */                           \
                   1313:     /* Can't be int, since there is not a shred of a guarantee that int        \
                   1314:        is wide enough to hold a value of something to which pointer can        \
                   1315:        be assigned */                                                  \
                   1316:     active_reg_t this_reg;                                             \
                   1317:                                                                        \
                   1318:     DEBUG_STATEMENT (failure_id++);                                    \
                   1319:     DEBUG_STATEMENT (nfailure_points_pushed++);                                \
                   1320:     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);          \
                   1321:     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
                   1322:     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
                   1323:                                                                        \
                   1324:     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);         \
                   1325:     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);      \
                   1326:                                                                        \
                   1327:     /* Ensure we have enough space allocated for what we will push.  */        \
                   1328:     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                  \
                   1329:       {                                                                        \
                   1330:         if (!DOUBLE_FAIL_STACK (fail_stack))                           \
                   1331:           return failure_code;                                         \
                   1332:                                                                        \
                   1333:         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",             \
                   1334:                       (fail_stack).size);                              \
                   1335:         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
                   1336:       }                                                                        \
                   1337:                                                                        \
                   1338:     /* Push the info, starting with the registers.  */                 \
                   1339:     DEBUG_PRINT1 ("\n");                                               \
                   1340:                                                                        \
                   1341:     if (1)                                                             \
                   1342:       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
                   1343:           this_reg++)                                                  \
                   1344:        {                                                               \
                   1345:          DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);              \
                   1346:          DEBUG_STATEMENT (num_regs_pushed++);                          \
                   1347:                                                                        \
                   1348:          DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);         \
                   1349:          PUSH_FAILURE_POINTER (regstart[this_reg]);                    \
                   1350:                                                                        \
                   1351:          DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);             \
                   1352:          PUSH_FAILURE_POINTER (regend[this_reg]);                      \
                   1353:                                                                        \
                   1354:          DEBUG_PRINT2 ("    info: %p\n      ",                         \
                   1355:                        reg_info[this_reg].word.pointer);               \
                   1356:          DEBUG_PRINT2 (" match_null=%d",                               \
                   1357:                        REG_MATCH_NULL_STRING_P (reg_info[this_reg]));  \
                   1358:          DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));  \
                   1359:          DEBUG_PRINT2 (" matched_something=%d",                        \
                   1360:                        MATCHED_SOMETHING (reg_info[this_reg]));        \
                   1361:          DEBUG_PRINT2 (" ever_matched=%d",                             \
                   1362:                        EVER_MATCHED_SOMETHING (reg_info[this_reg]));   \
                   1363:          DEBUG_PRINT1 ("\n");                                          \
                   1364:          PUSH_FAILURE_ELT (reg_info[this_reg].word);                   \
                   1365:        }                                                               \
                   1366:                                                                        \
                   1367:     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
                   1368:     PUSH_FAILURE_INT (lowest_active_reg);                              \
                   1369:                                                                        \
                   1370:     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
                   1371:     PUSH_FAILURE_INT (highest_active_reg);                             \
                   1372:                                                                        \
                   1373:     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);           \
                   1374:     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);          \
                   1375:     PUSH_FAILURE_POINTER (pattern_place);                              \
                   1376:                                                                        \
                   1377:     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);             \
                   1378:     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
                   1379:                                 size2);                                \
                   1380:     DEBUG_PRINT1 ("'\n");                                              \
                   1381:     PUSH_FAILURE_POINTER (string_place);                               \
                   1382:                                                                        \
                   1383:     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);           \
                   1384:     DEBUG_PUSH (failure_id);                                           \
                   1385:   } while (0)
                   1386: 
                   1387: /* This is the number of items that are pushed and popped on the stack
                   1388:    for each register.  */
                   1389: #define NUM_REG_ITEMS  3
                   1390: 
                   1391: /* Individual items aside from the registers.  */
                   1392: #ifdef DEBUG
                   1393: # define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
                   1394: #else
                   1395: # define NUM_NONREG_ITEMS 4
                   1396: #endif
                   1397: 
                   1398: /* We push at most this many items on the stack.  */
                   1399: /* We used to use (num_regs - 1), which is the number of registers
                   1400:    this regexp will save; but that was changed to 5
                   1401:    to avoid stack overflow for a regexp with lots of parens.  */
                   1402: #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
                   1403: 
                   1404: /* We actually push this many items.  */
                   1405: #define NUM_FAILURE_ITEMS                              \
                   1406:   (((0                                                 \
                   1407:      ? 0 : highest_active_reg - lowest_active_reg + 1) \
                   1408:     * NUM_REG_ITEMS)                                   \
                   1409:    + NUM_NONREG_ITEMS)
                   1410: 
                   1411: /* How many items can still be added to the stack without overflowing it.  */
                   1412: #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
                   1413: 
                   1414: 
                   1415: /* Pops what PUSH_FAIL_STACK pushes.
                   1416: 
                   1417:    We restore into the parameters, all of which should be lvalues:
                   1418:      STR -- the saved data position.
                   1419:      PAT -- the saved pattern position.
                   1420:      LOW_REG, HIGH_REG -- the highest and lowest active registers.
                   1421:      REGSTART, REGEND -- arrays of string positions.
                   1422:      REG_INFO -- array of information about each subexpression.
                   1423: 
                   1424:    Also assumes the variables `fail_stack' and (if debugging), `bufp',
                   1425:    `pend', `string1', `size1', `string2', and `size2'.  */
                   1426: 
                   1427: #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
                   1428: {                                                                      \
                   1429:   DEBUG_STATEMENT (unsigned failure_id;)                               \
                   1430:   active_reg_t this_reg;                                               \
                   1431:   const unsigned char *string_temp;                                    \
                   1432:                                                                        \
                   1433:   assert (!FAIL_STACK_EMPTY ());                                       \
                   1434:                                                                        \
                   1435:   /* Remove failure points and point to how many regs pushed.  */      \
                   1436:   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                               \
                   1437:   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);   \
                   1438:   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);    \
                   1439:                                                                        \
                   1440:   assert (fail_stack.avail >= NUM_NONREG_ITEMS);                       \
                   1441:                                                                        \
                   1442:   DEBUG_POP (&failure_id);                                             \
                   1443:   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);             \
                   1444:                                                                        \
                   1445:   /* If the saved string location is NULL, it came from an             \
                   1446:      on_failure_keep_string_jump opcode, and we want to throw away the \
                   1447:      saved NULL, thus retaining our current position in the string.  */        \
                   1448:   string_temp = POP_FAILURE_POINTER ();                                        \
                   1449:   if (string_temp != NULL)                                             \
                   1450:     str = (const char *) string_temp;                                  \
                   1451:                                                                        \
                   1452:   DEBUG_PRINT2 ("  Popping string %p: `", str);                                \
                   1453:   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);     \
                   1454:   DEBUG_PRINT1 ("'\n");                                                        \
                   1455:                                                                        \
                   1456:   pat = (unsigned char *) POP_FAILURE_POINTER ();                      \
                   1457:   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);                       \
                   1458:   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                      \
                   1459:                                                                        \
                   1460:   /* Restore register info.  */                                                \
                   1461:   high_reg = (active_reg_t) POP_FAILURE_INT ();                                \
                   1462:   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);         \
                   1463:                                                                        \
                   1464:   low_reg = (active_reg_t) POP_FAILURE_INT ();                         \
                   1465:   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);          \
                   1466:                                                                        \
                   1467:   if (1)                                                               \
                   1468:     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)         \
                   1469:       {                                                                        \
                   1470:        DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);              \
                   1471:                                                                        \
                   1472:        reg_info[this_reg].word = POP_FAILURE_ELT ();                   \
                   1473:        DEBUG_PRINT2 ("      info: %p\n",                               \
                   1474:                      reg_info[this_reg].word.pointer);                 \
                   1475:                                                                        \
                   1476:        regend[this_reg] = (const char *) POP_FAILURE_POINTER ();       \
                   1477:        DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);             \
                   1478:                                                                        \
                   1479:        regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();     \
                   1480:        DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);         \
                   1481:       }                                                                        \
                   1482:   else                                                                 \
                   1483:     {                                                                  \
                   1484:       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
                   1485:        {                                                               \
                   1486:          reg_info[this_reg].word.integer = 0;                          \
                   1487:          regend[this_reg] = 0;                                         \
                   1488:          regstart[this_reg] = 0;                                       \
                   1489:        }                                                               \
                   1490:       highest_active_reg = high_reg;                                   \
                   1491:     }                                                                  \
                   1492:                                                                        \
                   1493:   set_regs_matched_done = 0;                                           \
                   1494:   DEBUG_STATEMENT (nfailure_points_popped++);                          \
                   1495: } /* POP_FAILURE_POINT */
                   1496: 
                   1497: 
1.1.1.2 ! misho    1498: 
1.1       misho    1499: /* Structure for per-register (a.k.a. per-group) information.
                   1500:    Other register information, such as the
                   1501:    starting and ending positions (which are addresses), and the list of
                   1502:    inner groups (which is a bits list) are maintained in separate
                   1503:    variables.
                   1504: 
                   1505:    We are making a (strictly speaking) nonportable assumption here: that
                   1506:    the compiler will pack our bit fields into something that fits into
                   1507:    the type of `word', i.e., is something that fits into one item on the
                   1508:    failure stack.  */
                   1509: 
                   1510: 
                   1511: /* Declarations and macros for re_match_2.  */
                   1512: 
                   1513: typedef union
                   1514: {
                   1515:   fail_stack_elt_t word;
                   1516:   struct
                   1517:   {
                   1518:       /* This field is one if this group can match the empty string,
                   1519:          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
                   1520: #define MATCH_NULL_UNSET_VALUE 3
                   1521:     unsigned match_null_string_p : 2;
                   1522:     unsigned is_active : 1;
                   1523:     unsigned matched_something : 1;
                   1524:     unsigned ever_matched_something : 1;
                   1525:   } bits;
                   1526: } register_info_type;
                   1527: 
                   1528: #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
                   1529: #define IS_ACTIVE(R)  ((R).bits.is_active)
                   1530: #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
                   1531: #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
                   1532: 
                   1533: 
                   1534: /* Call this when have matched a real character; it sets `matched' flags
                   1535:    for the subexpressions which we are currently inside.  Also records
                   1536:    that those subexprs have matched.  */
                   1537: #define SET_REGS_MATCHED()                                             \
                   1538:   do                                                                   \
                   1539:     {                                                                  \
                   1540:       if (!set_regs_matched_done)                                      \
                   1541:        {                                                               \
                   1542:          active_reg_t r;                                               \
                   1543:          set_regs_matched_done = 1;                                    \
                   1544:          for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
                   1545:            {                                                           \
                   1546:              MATCHED_SOMETHING (reg_info[r])                           \
                   1547:                = EVER_MATCHED_SOMETHING (reg_info[r])                  \
                   1548:                = 1;                                                    \
                   1549:            }                                                           \
                   1550:        }                                                               \
                   1551:     }                                                                  \
                   1552:   while (0)
                   1553: 
                   1554: /* Registers are set to a sentinel when they haven't yet matched.  */
                   1555: static char reg_unset_dummy;
                   1556: #define REG_UNSET_VALUE (&reg_unset_dummy)
                   1557: #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1.1.1.2 ! misho    1558: 
1.1       misho    1559: /* Subroutine declarations and macros for regex_compile.  */
                   1560: 
                   1561: static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
                   1562:                                              reg_syntax_t syntax,
                   1563:                                              struct re_pattern_buffer *bufp));
                   1564: static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
                   1565: static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
                   1566:                                 int arg1, int arg2));
                   1567: static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
                   1568:                                  int arg, unsigned char *end));
                   1569: static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
                   1570:                                  int arg1, int arg2, unsigned char *end));
                   1571: static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
                   1572:                                           reg_syntax_t syntax));
                   1573: static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
                   1574:                                           reg_syntax_t syntax));
                   1575: static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
                   1576:                                              const char *pend,
                   1577:                                              char *translate,
                   1578:                                              reg_syntax_t syntax,
                   1579:                                              unsigned char *b));
                   1580: 
                   1581: /* Fetch the next character in the uncompiled pattern---translating it
                   1582:    if necessary.  Also cast from a signed character in the constant
                   1583:    string passed to us by the user to an unsigned char that we can use
                   1584:    as an array index (in, e.g., `translate').  */
                   1585: #ifndef PATFETCH
                   1586: # define PATFETCH(c)                                                   \
                   1587:   do {if (p == pend) return REG_EEND;                                  \
                   1588:     c = (unsigned char) *p++;                                          \
                   1589:     if (translate) c = (unsigned char) translate[c];                   \
                   1590:   } while (0)
                   1591: #endif
                   1592: 
                   1593: /* Fetch the next character in the uncompiled pattern, with no
                   1594:    translation.  */
                   1595: #define PATFETCH_RAW(c)                                                        \
                   1596:   do {if (p == pend) return REG_EEND;                                  \
                   1597:     c = (unsigned char) *p++;                                          \
                   1598:   } while (0)
                   1599: 
                   1600: /* Go backwards one character in the pattern.  */
                   1601: #define PATUNFETCH p--
                   1602: 
                   1603: 
                   1604: /* If `translate' is non-null, return translate[D], else just D.  We
                   1605:    cast the subscript to translate because some data is declared as
                   1606:    `char *', to avoid warnings when a string constant is passed.  But
                   1607:    when we use a character as a subscript we must make it unsigned.  */
                   1608: #ifndef TRANSLATE
                   1609: # define TRANSLATE(d) \
                   1610:   (translate ? (char) translate[(unsigned char) (d)] : (d))
                   1611: #endif
                   1612: 
                   1613: 
                   1614: /* Macros for outputting the compiled pattern into `buffer'.  */
                   1615: 
                   1616: /* If the buffer isn't allocated when it comes in, use this.  */
                   1617: #define INIT_BUF_SIZE  32
                   1618: 
                   1619: /* Make sure we have at least N more bytes of space in buffer.  */
                   1620: #define GET_BUFFER_SPACE(n)                                            \
                   1621:     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
                   1622:       EXTEND_BUFFER ()
                   1623: 
                   1624: /* Make sure we have one more byte of buffer space and then add C to it.  */
                   1625: #define BUF_PUSH(c)                                                    \
                   1626:   do {                                                                 \
                   1627:     GET_BUFFER_SPACE (1);                                              \
                   1628:     *b++ = (unsigned char) (c);                                                \
                   1629:   } while (0)
                   1630: 
                   1631: 
                   1632: /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
                   1633: #define BUF_PUSH_2(c1, c2)                                             \
                   1634:   do {                                                                 \
                   1635:     GET_BUFFER_SPACE (2);                                              \
                   1636:     *b++ = (unsigned char) (c1);                                       \
                   1637:     *b++ = (unsigned char) (c2);                                       \
                   1638:   } while (0)
                   1639: 
                   1640: 
                   1641: /* As with BUF_PUSH_2, except for three bytes.  */
                   1642: #define BUF_PUSH_3(c1, c2, c3)                                         \
                   1643:   do {                                                                 \
                   1644:     GET_BUFFER_SPACE (3);                                              \
                   1645:     *b++ = (unsigned char) (c1);                                       \
                   1646:     *b++ = (unsigned char) (c2);                                       \
                   1647:     *b++ = (unsigned char) (c3);                                       \
                   1648:   } while (0)
                   1649: 
                   1650: 
                   1651: /* Store a jump with opcode OP at LOC to location TO.  We store a
                   1652:    relative address offset by the three bytes the jump itself occupies.  */
                   1653: #define STORE_JUMP(op, loc, to) \
                   1654:   store_op1 (op, loc, (int) ((to) - (loc) - 3))
                   1655: 
                   1656: /* Likewise, for a two-argument jump.  */
                   1657: #define STORE_JUMP2(op, loc, to, arg) \
                   1658:   store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
                   1659: 
                   1660: /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
                   1661: #define INSERT_JUMP(op, loc, to) \
                   1662:   insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
                   1663: 
                   1664: /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
                   1665: #define INSERT_JUMP2(op, loc, to, arg) \
                   1666:   insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
                   1667: 
                   1668: 
                   1669: /* This is not an arbitrary limit: the arguments which represent offsets
                   1670:    into the pattern are two bytes long.  So if 2^16 bytes turns out to
                   1671:    be too small, many things would have to change.  */
                   1672: /* Any other compiler which, like MSC, has allocation limit below 2^16
                   1673:    bytes will have to use approach similar to what was done below for
                   1674:    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
                   1675:    reallocating to 0 bytes.  Such thing is not going to work too well.
                   1676:    You have been warned!!  */
                   1677: #if defined _MSC_VER  && !defined _WIN32
                   1678: /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
                   1679:    The REALLOC define eliminates a flurry of conversion warnings,
                   1680:    but is not required. */
                   1681: # define MAX_BUF_SIZE  65500L
                   1682: # define REALLOC(p,s) realloc ((p), (size_t) (s))
                   1683: #else
                   1684: # define MAX_BUF_SIZE (1L << 16)
                   1685: # define REALLOC(p,s) realloc ((p), (s))
                   1686: #endif
                   1687: 
                   1688: /* Extend the buffer by twice its current size via realloc and
                   1689:    reset the pointers that pointed into the old block to point to the
                   1690:    correct places in the new one.  If extending the buffer results in it
                   1691:    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
                   1692: #define EXTEND_BUFFER()                                                        \
                   1693:   do {                                                                         \
                   1694:     unsigned char *old_buffer = bufp->buffer;                          \
                   1695:     if (bufp->allocated == MAX_BUF_SIZE)                               \
                   1696:       return REG_ESIZE;                                                        \
                   1697:     bufp->allocated <<= 1;                                             \
                   1698:     if (bufp->allocated > MAX_BUF_SIZE)                                        \
                   1699:       bufp->allocated = MAX_BUF_SIZE;                                  \
                   1700:     bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
                   1701:     if (bufp->buffer == NULL)                                          \
                   1702:       return REG_ESPACE;                                               \
                   1703:     /* If the buffer moved, move all the pointers into it.  */         \
                   1704:     if (old_buffer != bufp->buffer)                                    \
                   1705:       {                                                                        \
                   1706:         b = (b - old_buffer) + bufp->buffer;                           \
                   1707:         begalt = (begalt - old_buffer) + bufp->buffer;                 \
                   1708:         if (fixup_alt_jump)                                            \
                   1709:           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
                   1710:         if (laststart)                                                 \
                   1711:           laststart = (laststart - old_buffer) + bufp->buffer;         \
                   1712:         if (pending_exact)                                             \
                   1713:           pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
                   1714:       }                                                                        \
                   1715:   } while (0)
                   1716: 
                   1717: 
                   1718: /* Since we have one byte reserved for the register number argument to
                   1719:    {start,stop}_memory, the maximum number of groups we can report
                   1720:    things about is what fits in that byte.  */
                   1721: #define MAX_REGNUM 255
                   1722: 
                   1723: /* But patterns can have more than `MAX_REGNUM' registers.  We just
                   1724:    ignore the excess.  */
                   1725: typedef unsigned regnum_t;
                   1726: 
                   1727: 
                   1728: /* Macros for the compile stack.  */
                   1729: 
                   1730: /* Since offsets can go either forwards or backwards, this type needs to
                   1731:    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
                   1732: /* int may be not enough when sizeof(int) == 2.  */
                   1733: typedef long pattern_offset_t;
                   1734: 
                   1735: typedef struct
                   1736: {
                   1737:   pattern_offset_t begalt_offset;
                   1738:   pattern_offset_t fixup_alt_jump;
                   1739:   pattern_offset_t inner_group_offset;
                   1740:   pattern_offset_t laststart_offset;
                   1741:   regnum_t regnum;
                   1742: } compile_stack_elt_t;
                   1743: 
                   1744: 
                   1745: typedef struct
                   1746: {
                   1747:   compile_stack_elt_t *stack;
                   1748:   unsigned size;
                   1749:   unsigned avail;                      /* Offset of next open position.  */
                   1750: } compile_stack_type;
                   1751: 
                   1752: 
                   1753: #define INIT_COMPILE_STACK_SIZE 32
                   1754: 
                   1755: #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
                   1756: #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
                   1757: 
                   1758: /* The next available element.  */
                   1759: #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
                   1760: 
                   1761: 
                   1762: /* Set the bit for character C in a list.  */
                   1763: #define SET_LIST_BIT(c)                               \
                   1764:   (b[((unsigned char) (c)) / BYTEWIDTH]               \
                   1765:    |= 1 << (((unsigned char) c) % BYTEWIDTH))
                   1766: 
                   1767: 
                   1768: /* Get the next unsigned number in the uncompiled pattern.  */
                   1769: #define GET_UNSIGNED_NUMBER(num)                                       \
                   1770:   { if (p != pend)                                                     \
                   1771:      {                                                                 \
                   1772:        PATFETCH (c);                                                   \
                   1773:        while (ISDIGIT (c))                                             \
                   1774:          {                                                             \
                   1775:            if (num < 0)                                                        \
                   1776:               num = 0;                                                 \
                   1777:            num = num * 10 + c - '0';                                   \
                   1778:            if (p == pend)                                              \
                   1779:               break;                                                   \
                   1780:            PATFETCH (c);                                               \
                   1781:          }                                                             \
                   1782:        }                                                               \
                   1783:     }
                   1784: 
                   1785: #if defined _LIBC || WIDE_CHAR_SUPPORT
                   1786: /* The GNU C library provides support for user-defined character classes
                   1787:    and the functions from ISO C amendement 1.  */
                   1788: # ifdef CHARCLASS_NAME_MAX
                   1789: #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
                   1790: # else
                   1791: /* This shouldn't happen but some implementation might still have this
                   1792:    problem.  Use a reasonable default value.  */
                   1793: #  define CHAR_CLASS_MAX_LENGTH 256
                   1794: # endif
                   1795: 
                   1796: # ifdef _LIBC
                   1797: #  define IS_CHAR_CLASS(string) __wctype (string)
                   1798: # else
                   1799: #  define IS_CHAR_CLASS(string) wctype (string)
                   1800: # endif
                   1801: #else
                   1802: # define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
                   1803: 
                   1804: # define IS_CHAR_CLASS(string)                                         \
                   1805:    (STREQ (string, "alpha") || STREQ (string, "upper")                 \
                   1806:     || STREQ (string, "lower") || STREQ (string, "digit")              \
                   1807:     || STREQ (string, "alnum") || STREQ (string, "xdigit")             \
                   1808:     || STREQ (string, "space") || STREQ (string, "print")              \
                   1809:     || STREQ (string, "punct") || STREQ (string, "graph")              \
                   1810:     || STREQ (string, "cntrl") || STREQ (string, "blank"))
                   1811: #endif
1.1.1.2 ! misho    1812: 
1.1       misho    1813: #ifndef MATCH_MAY_ALLOCATE
                   1814: 
                   1815: /* If we cannot allocate large objects within re_match_2_internal,
                   1816:    we make the fail stack and register vectors global.
                   1817:    The fail stack, we grow to the maximum size when a regexp
                   1818:    is compiled.
                   1819:    The register vectors, we adjust in size each time we
                   1820:    compile a regexp, according to the number of registers it needs.  */
                   1821: 
                   1822: static fail_stack_type fail_stack;
                   1823: 
                   1824: /* Size with which the following vectors are currently allocated.
                   1825:    That is so we can make them bigger as needed,
                   1826:    but never make them smaller.  */
                   1827: static int regs_allocated_size;
                   1828: 
                   1829: static const char **     regstart, **     regend;
                   1830: static const char ** old_regstart, ** old_regend;
                   1831: static const char **best_regstart, **best_regend;
                   1832: static register_info_type *reg_info;
                   1833: static const char **reg_dummy;
                   1834: static register_info_type *reg_info_dummy;
                   1835: 
                   1836: /* Make the register vectors big enough for NUM_REGS registers,
                   1837:    but don't make them smaller.  */
                   1838: 
                   1839: static
                   1840: regex_grow_registers (num_regs)
                   1841:      int num_regs;
                   1842: {
                   1843:   if (num_regs > regs_allocated_size)
                   1844:     {
                   1845:       RETALLOC_IF (regstart,    num_regs, const char *);
                   1846:       RETALLOC_IF (regend,      num_regs, const char *);
                   1847:       RETALLOC_IF (old_regstart, num_regs, const char *);
                   1848:       RETALLOC_IF (old_regend,  num_regs, const char *);
                   1849:       RETALLOC_IF (best_regstart, num_regs, const char *);
                   1850:       RETALLOC_IF (best_regend,         num_regs, const char *);
                   1851:       RETALLOC_IF (reg_info,    num_regs, register_info_type);
                   1852:       RETALLOC_IF (reg_dummy,   num_regs, const char *);
                   1853:       RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
                   1854: 
                   1855:       regs_allocated_size = num_regs;
                   1856:     }
                   1857: }
                   1858: 
                   1859: #endif /* not MATCH_MAY_ALLOCATE */
1.1.1.2 ! misho    1860: 
1.1       misho    1861: static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
                   1862:                                                 compile_stack,
                   1863:                                                 regnum_t regnum));
                   1864: 
                   1865: /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
                   1866:    Returns one of error codes defined in `regex.h', or zero for success.
                   1867: 
                   1868:    Assumes the `allocated' (and perhaps `buffer') and `translate'
                   1869:    fields are set in BUFP on entry.
                   1870: 
                   1871:    If it succeeds, results are put in BUFP (if it returns an error, the
                   1872:    contents of BUFP are undefined):
                   1873:      `buffer' is the compiled pattern;
                   1874:      `syntax' is set to SYNTAX;
                   1875:      `used' is set to the length of the compiled pattern;
                   1876:      `fastmap_accurate' is zero;
                   1877:      `re_nsub' is the number of subexpressions in PATTERN;
                   1878:      `not_bol' and `not_eol' are zero;
                   1879: 
                   1880:    The `fastmap' and `newline_anchor' fields are neither
                   1881:    examined nor set.  */
                   1882: 
                   1883: /* Return, freeing storage we allocated.  */
                   1884: #define FREE_STACK_RETURN(value)               \
                   1885:   return (free (compile_stack.stack), value)
                   1886: 
                   1887: static reg_errcode_t
                   1888: regex_compile (pattern, size, syntax, bufp)
                   1889:      const char *pattern;
                   1890:      size_t size;
                   1891:      reg_syntax_t syntax;
                   1892:      struct re_pattern_buffer *bufp;
                   1893: {
                   1894:   /* We fetch characters from PATTERN here.  Even though PATTERN is
                   1895:      `char *' (i.e., signed), we declare these variables as unsigned, so
                   1896:      they can be reliably used as array indices.  */
                   1897:   register unsigned char c, c1;
                   1898: 
                   1899:   /* A random temporary spot in PATTERN.  */
                   1900:   const char *p1;
                   1901: 
                   1902:   /* Points to the end of the buffer, where we should append.  */
                   1903:   register unsigned char *b;
                   1904: 
                   1905:   /* Keeps track of unclosed groups.  */
                   1906:   compile_stack_type compile_stack;
                   1907: 
                   1908:   /* Points to the current (ending) position in the pattern.  */
                   1909:   const char *p = pattern;
                   1910:   const char *pend = pattern + size;
                   1911: 
                   1912:   /* How to translate the characters in the pattern.  */
                   1913:   RE_TRANSLATE_TYPE translate = bufp->translate;
                   1914: 
                   1915:   /* Address of the count-byte of the most recently inserted `exactn'
                   1916:      command.  This makes it possible to tell if a new exact-match
                   1917:      character can be added to that command or if the character requires
                   1918:      a new `exactn' command.  */
                   1919:   unsigned char *pending_exact = 0;
                   1920: 
                   1921:   /* Address of start of the most recently finished expression.
                   1922:      This tells, e.g., postfix * where to find the start of its
                   1923:      operand.  Reset at the beginning of groups and alternatives.  */
                   1924:   unsigned char *laststart = 0;
                   1925: 
                   1926:   /* Address of beginning of regexp, or inside of last group.  */
                   1927:   unsigned char *begalt;
                   1928: 
                   1929:   /* Place in the uncompiled pattern (i.e., the {) to
                   1930:      which to go back if the interval is invalid.  */
                   1931:   const char *beg_interval;
                   1932: 
                   1933:   /* Address of the place where a forward jump should go to the end of
                   1934:      the containing expression.  Each alternative of an `or' -- except the
                   1935:      last -- ends with a forward jump of this sort.  */
                   1936:   unsigned char *fixup_alt_jump = 0;
                   1937: 
                   1938:   /* Counts open-groups as they are encountered.  Remembered for the
                   1939:      matching close-group on the compile stack, so the same register
                   1940:      number is put in the stop_memory as the start_memory.  */
                   1941:   regnum_t regnum = 0;
                   1942: 
                   1943: #ifdef DEBUG
                   1944:   DEBUG_PRINT1 ("\nCompiling pattern: ");
                   1945:   if (debug)
                   1946:     {
                   1947:       unsigned debug_count;
                   1948: 
                   1949:       for (debug_count = 0; debug_count < size; debug_count++)
                   1950:         putchar (pattern[debug_count]);
                   1951:       putchar ('\n');
                   1952:     }
                   1953: #endif /* DEBUG */
                   1954: 
                   1955:   /* Initialize the compile stack.  */
                   1956:   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
                   1957:   if (compile_stack.stack == NULL)
                   1958:     return REG_ESPACE;
                   1959: 
                   1960:   compile_stack.size = INIT_COMPILE_STACK_SIZE;
                   1961:   compile_stack.avail = 0;
                   1962: 
                   1963:   /* Initialize the pattern buffer.  */
                   1964:   bufp->syntax = syntax;
                   1965:   bufp->fastmap_accurate = 0;
                   1966:   bufp->not_bol = bufp->not_eol = 0;
                   1967: 
                   1968:   /* Set `used' to zero, so that if we return an error, the pattern
                   1969:      printer (for debugging) will think there's no pattern.  We reset it
                   1970:      at the end.  */
                   1971:   bufp->used = 0;
                   1972: 
                   1973:   /* Always count groups, whether or not bufp->no_sub is set.  */
                   1974:   bufp->re_nsub = 0;
                   1975: 
                   1976: #if !defined emacs && !defined SYNTAX_TABLE
                   1977:   /* Initialize the syntax table.  */
                   1978:    init_syntax_once ();
                   1979: #endif
                   1980: 
                   1981:   if (bufp->allocated == 0)
                   1982:     {
                   1983:       if (bufp->buffer)
                   1984:        { /* If zero allocated, but buffer is non-null, try to realloc
                   1985:              enough space.  This loses if buffer's address is bogus, but
                   1986:              that is the user's responsibility.  */
                   1987:           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
                   1988:         }
                   1989:       else
                   1990:         { /* Caller did not allocate a buffer.  Do it for them.  */
                   1991:           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
                   1992:         }
                   1993:       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
                   1994: 
                   1995:       bufp->allocated = INIT_BUF_SIZE;
                   1996:     }
                   1997: 
                   1998:   begalt = b = bufp->buffer;
                   1999: 
                   2000:   /* Loop through the uncompiled pattern until we're at the end.  */
                   2001:   while (p != pend)
                   2002:     {
                   2003:       PATFETCH (c);
                   2004: 
                   2005:       switch (c)
                   2006:         {
                   2007:         case '^':
                   2008:           {
                   2009:             if (   /* If at start of pattern, it's an operator.  */
                   2010:                    p == pattern + 1
                   2011:                    /* If context independent, it's an operator.  */
                   2012:                 || syntax & RE_CONTEXT_INDEP_ANCHORS
                   2013:                    /* Otherwise, depends on what's come before.  */
                   2014:                 || at_begline_loc_p (pattern, p, syntax))
                   2015:               BUF_PUSH (begline);
                   2016:             else
                   2017:               goto normal_char;
                   2018:           }
                   2019:           break;
                   2020: 
                   2021: 
                   2022:         case '$':
                   2023:           {
                   2024:             if (   /* If at end of pattern, it's an operator.  */
                   2025:                    p == pend
                   2026:                    /* If context independent, it's an operator.  */
                   2027:                 || syntax & RE_CONTEXT_INDEP_ANCHORS
                   2028:                    /* Otherwise, depends on what's next.  */
                   2029:                 || at_endline_loc_p (p, pend, syntax))
                   2030:                BUF_PUSH (endline);
                   2031:              else
                   2032:                goto normal_char;
                   2033:            }
                   2034:            break;
                   2035: 
                   2036: 
                   2037:        case '+':
                   2038:         case '?':
                   2039:           if ((syntax & RE_BK_PLUS_QM)
                   2040:               || (syntax & RE_LIMITED_OPS))
                   2041:             goto normal_char;
                   2042:         handle_plus:
                   2043:         case '*':
                   2044:           /* If there is no previous pattern... */
                   2045:           if (!laststart)
                   2046:             {
                   2047:               if (syntax & RE_CONTEXT_INVALID_OPS)
                   2048:                 FREE_STACK_RETURN (REG_BADRPT);
                   2049:               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
                   2050:                 goto normal_char;
                   2051:             }
                   2052: 
                   2053:           {
                   2054:             /* Are we optimizing this jump?  */
                   2055:             boolean keep_string_p = false;
                   2056: 
                   2057:             /* 1 means zero (many) matches is allowed.  */
                   2058:             char zero_times_ok = 0, many_times_ok = 0;
                   2059: 
                   2060:             /* If there is a sequence of repetition chars, collapse it
                   2061:                down to just one (the right one).  We can't combine
                   2062:                interval operators with these because of, e.g., `a{2}*',
                   2063:                which should only match an even number of `a's.  */
                   2064: 
                   2065:             for (;;)
                   2066:               {
                   2067:                 zero_times_ok |= c != '+';
                   2068:                 many_times_ok |= c != '?';
                   2069: 
                   2070:                 if (p == pend)
                   2071:                   break;
                   2072: 
                   2073:                 PATFETCH (c);
                   2074: 
                   2075:                 if (c == '*'
                   2076:                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
                   2077:                   ;
                   2078: 
                   2079:                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
                   2080:                   {
                   2081:                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
                   2082: 
                   2083:                     PATFETCH (c1);
                   2084:                     if (!(c1 == '+' || c1 == '?'))
                   2085:                       {
                   2086:                         PATUNFETCH;
                   2087:                         PATUNFETCH;
                   2088:                         break;
                   2089:                       }
                   2090: 
                   2091:                     c = c1;
                   2092:                   }
                   2093:                 else
                   2094:                   {
                   2095:                     PATUNFETCH;
                   2096:                     break;
                   2097:                   }
                   2098: 
                   2099:                 /* If we get here, we found another repeat character.  */
                   2100:                }
                   2101: 
                   2102:             /* Star, etc. applied to an empty pattern is equivalent
                   2103:                to an empty pattern.  */
                   2104:             if (!laststart)
                   2105:               break;
                   2106: 
                   2107:             /* Now we know whether or not zero matches is allowed
                   2108:                and also whether or not two or more matches is allowed.  */
                   2109:             if (many_times_ok)
                   2110:               { /* More than one repetition is allowed, so put in at the
                   2111:                    end a backward relative jump from `b' to before the next
                   2112:                    jump we're going to put in below (which jumps from
                   2113:                    laststart to after this jump).
                   2114: 
                   2115:                    But if we are at the `*' in the exact sequence `.*\n',
                   2116:                    insert an unconditional jump backwards to the .,
                   2117:                    instead of the beginning of the loop.  This way we only
                   2118:                    push a failure point once, instead of every time
                   2119:                    through the loop.  */
                   2120:                 assert (p - 1 > pattern);
                   2121: 
                   2122:                 /* Allocate the space for the jump.  */
                   2123:                 GET_BUFFER_SPACE (3);
                   2124: 
                   2125:                 /* We know we are not at the first character of the pattern,
                   2126:                    because laststart was nonzero.  And we've already
                   2127:                    incremented `p', by the way, to be the character after
                   2128:                    the `*'.  Do we have to do something analogous here
                   2129:                    for null bytes, because of RE_DOT_NOT_NULL?  */
                   2130:                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
                   2131:                    && zero_times_ok
                   2132:                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
                   2133:                     && !(syntax & RE_DOT_NEWLINE))
                   2134:                   { /* We have .*\n.  */
                   2135:                     STORE_JUMP (jump, b, laststart);
                   2136:                     keep_string_p = true;
                   2137:                   }
                   2138:                 else
                   2139:                   /* Anything else.  */
                   2140:                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
                   2141: 
                   2142:                 /* We've added more stuff to the buffer.  */
                   2143:                 b += 3;
                   2144:               }
                   2145: 
                   2146:             /* On failure, jump from laststart to b + 3, which will be the
                   2147:                end of the buffer after this jump is inserted.  */
                   2148:             GET_BUFFER_SPACE (3);
                   2149:             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
                   2150:                                        : on_failure_jump,
                   2151:                          laststart, b + 3);
                   2152:             pending_exact = 0;
                   2153:             b += 3;
                   2154: 
                   2155:             if (!zero_times_ok)
                   2156:               {
                   2157:                 /* At least one repetition is required, so insert a
                   2158:                    `dummy_failure_jump' before the initial
                   2159:                    `on_failure_jump' instruction of the loop. This
                   2160:                    effects a skip over that instruction the first time
                   2161:                    we hit that loop.  */
                   2162:                 GET_BUFFER_SPACE (3);
                   2163:                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
                   2164:                 b += 3;
                   2165:               }
                   2166:             }
                   2167:          break;
                   2168: 
                   2169: 
                   2170:        case '.':
                   2171:           laststart = b;
                   2172:           BUF_PUSH (anychar);
                   2173:           break;
                   2174: 
                   2175: 
                   2176:         case '[':
                   2177:           {
                   2178:             boolean had_char_class = false;
                   2179: 
                   2180:             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
                   2181: 
                   2182:             /* Ensure that we have enough space to push a charset: the
                   2183:                opcode, the length count, and the bitset; 34 bytes in all.  */
                   2184:            GET_BUFFER_SPACE (34);
                   2185: 
                   2186:             laststart = b;
                   2187: 
                   2188:             /* We test `*p == '^' twice, instead of using an if
                   2189:                statement, so we only need one BUF_PUSH.  */
                   2190:             BUF_PUSH (*p == '^' ? charset_not : charset);
                   2191:             if (*p == '^')
                   2192:               p++;
                   2193: 
                   2194:             /* Remember the first position in the bracket expression.  */
                   2195:             p1 = p;
                   2196: 
                   2197:             /* Push the number of bytes in the bitmap.  */
                   2198:             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
                   2199: 
                   2200:             /* Clear the whole map.  */
                   2201:             memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
                   2202: 
                   2203:             /* charset_not matches newline according to a syntax bit.  */
                   2204:             if ((re_opcode_t) b[-2] == charset_not
                   2205:                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
                   2206:               SET_LIST_BIT ('\n');
                   2207: 
                   2208:             /* Read in characters and ranges, setting map bits.  */
                   2209:             for (;;)
                   2210:               {
                   2211:                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
                   2212: 
                   2213:                 PATFETCH (c);
                   2214: 
                   2215:                 /* \ might escape characters inside [...] and [^...].  */
                   2216:                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
                   2217:                   {
                   2218:                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
                   2219: 
                   2220:                     PATFETCH (c1);
                   2221:                     SET_LIST_BIT (c1);
                   2222:                     continue;
                   2223:                   }
                   2224: 
                   2225:                 /* Could be the end of the bracket expression.  If it's
                   2226:                    not (i.e., when the bracket expression is `[]' so
                   2227:                    far), the ']' character bit gets set way below.  */
                   2228:                 if (c == ']' && p != p1 + 1)
                   2229:                   break;
                   2230: 
                   2231:                 /* Look ahead to see if it's a range when the last thing
                   2232:                    was a character class.  */
                   2233:                 if (had_char_class && c == '-' && *p != ']')
                   2234:                   FREE_STACK_RETURN (REG_ERANGE);
                   2235: 
                   2236:                 /* Look ahead to see if it's a range when the last thing
                   2237:                    was a character: if this is a hyphen not at the
                   2238:                    beginning or the end of a list, then it's the range
                   2239:                    operator.  */
                   2240:                 if (c == '-'
                   2241:                     && !(p - 2 >= pattern && p[-2] == '[')
                   2242:                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
                   2243:                     && *p != ']')
                   2244:                   {
                   2245:                     reg_errcode_t ret
                   2246:                       = compile_range (&p, pend, translate, syntax, b);
                   2247:                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
                   2248:                   }
                   2249: 
                   2250:                 else if (p[0] == '-' && p[1] != ']')
                   2251:                   { /* This handles ranges made up of characters only.  */
                   2252:                     reg_errcode_t ret;
                   2253: 
                   2254:                    /* Move past the `-'.  */
                   2255:                     PATFETCH (c1);
                   2256: 
                   2257:                     ret = compile_range (&p, pend, translate, syntax, b);
                   2258:                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
                   2259:                   }
                   2260: 
                   2261:                 /* See if we're at the beginning of a possible character
                   2262:                    class.  */
                   2263: 
                   2264:                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
                   2265:                   { /* Leave room for the null.  */
                   2266:                     char str[CHAR_CLASS_MAX_LENGTH + 1];
                   2267: 
                   2268:                     PATFETCH (c);
                   2269:                     c1 = 0;
                   2270: 
                   2271:                     /* If pattern is `[[:'.  */
                   2272:                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
                   2273: 
                   2274:                     for (;;)
                   2275:                       {
                   2276:                         PATFETCH (c);
                   2277:                         if ((c == ':' && *p == ']') || p == pend)
                   2278:                           break;
                   2279:                        if (c1 < CHAR_CLASS_MAX_LENGTH)
                   2280:                          str[c1++] = c;
                   2281:                        else
                   2282:                          /* This is in any case an invalid class name.  */
                   2283:                          str[0] = '\0';
                   2284:                       }
                   2285:                     str[c1] = '\0';
                   2286: 
                   2287:                     /* If isn't a word bracketed by `[:' and `:]':
                   2288:                        undo the ending character, the letters, and leave
                   2289:                        the leading `:' and `[' (but set bits for them).  */
                   2290:                     if (c == ':' && *p == ']')
                   2291:                       {
                   2292: #if defined _LIBC || WIDE_CHAR_SUPPORT
                   2293:                         boolean is_lower = STREQ (str, "lower");
                   2294:                         boolean is_upper = STREQ (str, "upper");
                   2295:                        wctype_t wt;
                   2296:                         int ch;
                   2297: 
                   2298:                        wt = IS_CHAR_CLASS (str);
                   2299:                        if (wt == 0)
                   2300:                          FREE_STACK_RETURN (REG_ECTYPE);
                   2301: 
                   2302:                         /* Throw away the ] at the end of the character
                   2303:                            class.  */
                   2304:                         PATFETCH (c);
                   2305: 
                   2306:                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
                   2307: 
                   2308:                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
                   2309:                          {
                   2310: # ifdef _LIBC
                   2311:                            if (__iswctype (__btowc (ch), wt))
                   2312:                              SET_LIST_BIT (ch);
                   2313: # else
                   2314:                            if (iswctype (btowc (ch), wt))
                   2315:                              SET_LIST_BIT (ch);
                   2316: # endif
                   2317: 
                   2318:                            if (translate && (is_upper || is_lower)
                   2319:                                && (ISUPPER (ch) || ISLOWER (ch)))
                   2320:                              SET_LIST_BIT (ch);
                   2321:                          }
                   2322: 
                   2323:                         had_char_class = true;
                   2324: #else
                   2325:                         int ch;
                   2326:                         boolean is_alnum = STREQ (str, "alnum");
                   2327:                         boolean is_alpha = STREQ (str, "alpha");
                   2328:                         boolean is_blank = STREQ (str, "blank");
                   2329:                         boolean is_cntrl = STREQ (str, "cntrl");
                   2330:                         boolean is_digit = STREQ (str, "digit");
                   2331:                         boolean is_graph = STREQ (str, "graph");
                   2332:                         boolean is_lower = STREQ (str, "lower");
                   2333:                         boolean is_print = STREQ (str, "print");
                   2334:                         boolean is_punct = STREQ (str, "punct");
                   2335:                         boolean is_space = STREQ (str, "space");
                   2336:                         boolean is_upper = STREQ (str, "upper");
                   2337:                         boolean is_xdigit = STREQ (str, "xdigit");
                   2338: 
                   2339:                         if (!IS_CHAR_CLASS (str))
                   2340:                          FREE_STACK_RETURN (REG_ECTYPE);
                   2341: 
                   2342:                         /* Throw away the ] at the end of the character
                   2343:                            class.  */
                   2344:                         PATFETCH (c);
                   2345: 
                   2346:                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
                   2347: 
                   2348:                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
                   2349:                           {
                   2350:                            /* This was split into 3 if's to
                   2351:                               avoid an arbitrary limit in some compiler.  */
                   2352:                             if (   (is_alnum  && ISALNUM (ch))
                   2353:                                 || (is_alpha  && ISALPHA (ch))
                   2354:                                 || (is_blank  && ISBLANK (ch))
                   2355:                                 || (is_cntrl  && ISCNTRL (ch)))
                   2356:                              SET_LIST_BIT (ch);
                   2357:                            if (   (is_digit  && ISDIGIT (ch))
                   2358:                                 || (is_graph  && ISGRAPH (ch))
                   2359:                                 || (is_lower  && ISLOWER (ch))
                   2360:                                 || (is_print  && ISPRINT (ch)))
                   2361:                              SET_LIST_BIT (ch);
                   2362:                            if (   (is_punct  && ISPUNCT (ch))
                   2363:                                 || (is_space  && ISSPACE (ch))
                   2364:                                 || (is_upper  && ISUPPER (ch))
                   2365:                                 || (is_xdigit && ISXDIGIT (ch)))
                   2366:                              SET_LIST_BIT (ch);
                   2367:                            if (   translate && (is_upper || is_lower)
                   2368:                                && (ISUPPER (ch) || ISLOWER (ch)))
                   2369:                              SET_LIST_BIT (ch);
                   2370:                           }
                   2371:                         had_char_class = true;
                   2372: #endif /* libc || wctype.h */
                   2373:                       }
                   2374:                     else
                   2375:                       {
                   2376:                         c1++;
                   2377:                         while (c1--)
                   2378:                           PATUNFETCH;
                   2379:                         SET_LIST_BIT ('[');
                   2380:                         SET_LIST_BIT (':');
                   2381:                         had_char_class = false;
                   2382:                       }
                   2383:                   }
                   2384:                 else
                   2385:                   {
                   2386:                     had_char_class = false;
                   2387:                     SET_LIST_BIT (c);
                   2388:                   }
                   2389:               }
                   2390: 
                   2391:             /* Discard any (non)matching list bytes that are all 0 at the
                   2392:                end of the map.  Decrease the map-length byte too.  */
                   2393:             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
                   2394:               b[-1]--;
                   2395:             b += b[-1];
                   2396:           }
                   2397:           break;
                   2398: 
                   2399: 
                   2400:        case '(':
                   2401:           if (syntax & RE_NO_BK_PARENS)
                   2402:             goto handle_open;
                   2403:           else
                   2404:             goto normal_char;
                   2405: 
                   2406: 
                   2407:         case ')':
                   2408:           if (syntax & RE_NO_BK_PARENS)
                   2409:             goto handle_close;
                   2410:           else
                   2411:             goto normal_char;
                   2412: 
                   2413: 
                   2414:         case '\n':
                   2415:           if (syntax & RE_NEWLINE_ALT)
                   2416:             goto handle_alt;
                   2417:           else
                   2418:             goto normal_char;
                   2419: 
                   2420: 
                   2421:        case '|':
                   2422:           if (syntax & RE_NO_BK_VBAR)
                   2423:             goto handle_alt;
                   2424:           else
                   2425:             goto normal_char;
                   2426: 
                   2427: 
                   2428:         case '{':
                   2429:            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
                   2430:              goto handle_interval;
                   2431:            else
                   2432:              goto normal_char;
                   2433: 
                   2434: 
                   2435:         case '\\':
                   2436:           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
                   2437: 
                   2438:           /* Do not translate the character after the \, so that we can
                   2439:              distinguish, e.g., \B from \b, even if we normally would
                   2440:              translate, e.g., B to b.  */
                   2441:           PATFETCH_RAW (c);
                   2442: 
                   2443:           switch (c)
                   2444:             {
                   2445:             case '(':
                   2446:               if (syntax & RE_NO_BK_PARENS)
                   2447:                 goto normal_backslash;
                   2448: 
                   2449:             handle_open:
                   2450:               bufp->re_nsub++;
                   2451:               regnum++;
                   2452: 
                   2453:               if (COMPILE_STACK_FULL)
                   2454:                 {
                   2455:                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
                   2456:                             compile_stack_elt_t);
                   2457:                   if (compile_stack.stack == NULL) return REG_ESPACE;
                   2458: 
                   2459:                   compile_stack.size <<= 1;
                   2460:                 }
                   2461: 
                   2462:               /* These are the values to restore when we hit end of this
                   2463:                  group.  They are all relative offsets, so that if the
                   2464:                  whole pattern moves because of realloc, they will still
                   2465:                  be valid.  */
                   2466:               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
                   2467:               COMPILE_STACK_TOP.fixup_alt_jump
                   2468:                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
                   2469:               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
                   2470:               COMPILE_STACK_TOP.regnum = regnum;
                   2471: 
                   2472:               /* We will eventually replace the 0 with the number of
                   2473:                  groups inner to this one.  But do not push a
                   2474:                  start_memory for groups beyond the last one we can
                   2475:                  represent in the compiled pattern.  */
                   2476:               if (regnum <= MAX_REGNUM)
                   2477:                 {
                   2478:                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
                   2479:                   BUF_PUSH_3 (start_memory, regnum, 0);
                   2480:                 }
                   2481: 
                   2482:               compile_stack.avail++;
                   2483: 
                   2484:               fixup_alt_jump = 0;
                   2485:               laststart = 0;
                   2486:               begalt = b;
                   2487:              /* If we've reached MAX_REGNUM groups, then this open
                   2488:                 won't actually generate any code, so we'll have to
                   2489:                 clear pending_exact explicitly.  */
                   2490:              pending_exact = 0;
                   2491:               break;
                   2492: 
                   2493: 
                   2494:             case ')':
                   2495:               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
                   2496: 
                   2497:               if (COMPILE_STACK_EMPTY)
                   2498:                {
                   2499:                  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
                   2500:                    goto normal_backslash;
                   2501:                  else
                   2502:                    FREE_STACK_RETURN (REG_ERPAREN);
                   2503:                }
                   2504: 
                   2505:             handle_close:
                   2506:               if (fixup_alt_jump)
                   2507:                 { /* Push a dummy failure point at the end of the
                   2508:                      alternative for a possible future
                   2509:                      `pop_failure_jump' to pop.  See comments at
                   2510:                      `push_dummy_failure' in `re_match_2'.  */
                   2511:                   BUF_PUSH (push_dummy_failure);
                   2512: 
                   2513:                   /* We allocated space for this jump when we assigned
                   2514:                      to `fixup_alt_jump', in the `handle_alt' case below.  */
                   2515:                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
                   2516:                 }
                   2517: 
                   2518:               /* See similar code for backslashed left paren above.  */
                   2519:               if (COMPILE_STACK_EMPTY)
                   2520:                {
                   2521:                  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
                   2522:                    goto normal_char;
                   2523:                  else
                   2524:                    FREE_STACK_RETURN (REG_ERPAREN);
                   2525:                }
                   2526: 
                   2527:               /* Since we just checked for an empty stack above, this
                   2528:                  ``can't happen''.  */
                   2529:               assert (compile_stack.avail != 0);
                   2530:               {
                   2531:                 /* We don't just want to restore into `regnum', because
                   2532:                    later groups should continue to be numbered higher,
                   2533:                    as in `(ab)c(de)' -- the second group is #2.  */
                   2534:                 regnum_t this_group_regnum;
                   2535: 
                   2536:                 compile_stack.avail--;
                   2537:                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
                   2538:                 fixup_alt_jump
                   2539:                   = COMPILE_STACK_TOP.fixup_alt_jump
                   2540:                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
                   2541:                     : 0;
                   2542:                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
                   2543:                 this_group_regnum = COMPILE_STACK_TOP.regnum;
                   2544:                /* If we've reached MAX_REGNUM groups, then this open
                   2545:                   won't actually generate any code, so we'll have to
                   2546:                   clear pending_exact explicitly.  */
                   2547:                pending_exact = 0;
                   2548: 
                   2549:                 /* We're at the end of the group, so now we know how many
                   2550:                    groups were inside this one.  */
                   2551:                 if (this_group_regnum <= MAX_REGNUM)
                   2552:                   {
                   2553:                     unsigned char *inner_group_loc
                   2554:                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
                   2555: 
                   2556:                     *inner_group_loc = regnum - this_group_regnum;
                   2557:                     BUF_PUSH_3 (stop_memory, this_group_regnum,
                   2558:                                 regnum - this_group_regnum);
                   2559:                   }
                   2560:               }
                   2561:               break;
                   2562: 
                   2563: 
                   2564:             case '|':                                  /* `\|'.  */
                   2565:               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
                   2566:                 goto normal_backslash;
                   2567:             handle_alt:
                   2568:               if (syntax & RE_LIMITED_OPS)
                   2569:                 goto normal_char;
                   2570: 
                   2571:               /* Insert before the previous alternative a jump which
                   2572:                  jumps to this alternative if the former fails.  */
                   2573:               GET_BUFFER_SPACE (3);
                   2574:               INSERT_JUMP (on_failure_jump, begalt, b + 6);
                   2575:               pending_exact = 0;
                   2576:               b += 3;
                   2577: 
                   2578:               /* The alternative before this one has a jump after it
                   2579:                  which gets executed if it gets matched.  Adjust that
                   2580:                  jump so it will jump to this alternative's analogous
                   2581:                  jump (put in below, which in turn will jump to the next
                   2582:                  (if any) alternative's such jump, etc.).  The last such
                   2583:                  jump jumps to the correct final destination.  A picture:
                   2584:                           _____ _____
                   2585:                           |   | |   |
                   2586:                           |   v |   v
                   2587:                          a | b   | c
                   2588: 
                   2589:                  If we are at `b', then fixup_alt_jump right now points to a
                   2590:                  three-byte space after `a'.  We'll put in the jump, set
                   2591:                  fixup_alt_jump to right after `b', and leave behind three
                   2592:                  bytes which we'll fill in when we get to after `c'.  */
                   2593: 
                   2594:               if (fixup_alt_jump)
                   2595:                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
                   2596: 
                   2597:               /* Mark and leave space for a jump after this alternative,
                   2598:                  to be filled in later either by next alternative or
                   2599:                  when know we're at the end of a series of alternatives.  */
                   2600:               fixup_alt_jump = b;
                   2601:               GET_BUFFER_SPACE (3);
                   2602:               b += 3;
                   2603: 
                   2604:               laststart = 0;
                   2605:               begalt = b;
                   2606:               break;
                   2607: 
                   2608: 
                   2609:             case '{':
                   2610:               /* If \{ is a literal.  */
                   2611:               if (!(syntax & RE_INTERVALS)
                   2612:                      /* If we're at `\{' and it's not the open-interval
                   2613:                         operator.  */
                   2614:                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
                   2615:                   || (p - 2 == pattern  &&  p == pend))
                   2616:                 goto normal_backslash;
                   2617: 
                   2618:             handle_interval:
                   2619:               {
                   2620:                 /* If got here, then the syntax allows intervals.  */
                   2621: 
                   2622:                 /* At least (most) this many matches must be made.  */
                   2623:                 int lower_bound = -1, upper_bound = -1;
                   2624: 
                   2625:                 beg_interval = p - 1;
                   2626: 
                   2627:                 if (p == pend)
                   2628:                   {
                   2629:                     if (syntax & RE_NO_BK_BRACES)
                   2630:                       goto unfetch_interval;
                   2631:                     else
                   2632:                       FREE_STACK_RETURN (REG_EBRACE);
                   2633:                   }
                   2634: 
                   2635:                 GET_UNSIGNED_NUMBER (lower_bound);
                   2636: 
                   2637:                 if (c == ',')
                   2638:                   {
                   2639:                     GET_UNSIGNED_NUMBER (upper_bound);
                   2640:                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
                   2641:                   }
                   2642:                 else
                   2643:                   /* Interval such as `{1}' => match exactly once. */
                   2644:                   upper_bound = lower_bound;
                   2645: 
                   2646:                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
                   2647:                     || lower_bound > upper_bound)
                   2648:                   {
                   2649:                     if (syntax & RE_NO_BK_BRACES)
                   2650:                       goto unfetch_interval;
                   2651:                     else
                   2652:                       FREE_STACK_RETURN (REG_BADBR);
                   2653:                   }
                   2654: 
                   2655:                 if (!(syntax & RE_NO_BK_BRACES))
                   2656:                   {
                   2657:                     if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
                   2658: 
                   2659:                     PATFETCH (c);
                   2660:                   }
                   2661: 
                   2662:                 if (c != '}')
                   2663:                   {
                   2664:                     if (syntax & RE_NO_BK_BRACES)
                   2665:                       goto unfetch_interval;
                   2666:                     else
                   2667:                       FREE_STACK_RETURN (REG_BADBR);
                   2668:                   }
                   2669: 
                   2670:                 /* We just parsed a valid interval.  */
                   2671: 
                   2672:                 /* If it's invalid to have no preceding re.  */
                   2673:                 if (!laststart)
                   2674:                   {
                   2675:                     if (syntax & RE_CONTEXT_INVALID_OPS)
                   2676:                       FREE_STACK_RETURN (REG_BADRPT);
                   2677:                     else if (syntax & RE_CONTEXT_INDEP_OPS)
                   2678:                       laststart = b;
                   2679:                     else
                   2680:                       goto unfetch_interval;
                   2681:                   }
                   2682: 
                   2683:                 /* If the upper bound is zero, don't want to succeed at
                   2684:                    all; jump from `laststart' to `b + 3', which will be
                   2685:                    the end of the buffer after we insert the jump.  */
                   2686:                  if (upper_bound == 0)
                   2687:                    {
                   2688:                      GET_BUFFER_SPACE (3);
                   2689:                      INSERT_JUMP (jump, laststart, b + 3);
                   2690:                      b += 3;
                   2691:                    }
                   2692: 
                   2693:                  /* Otherwise, we have a nontrivial interval.  When
                   2694:                     we're all done, the pattern will look like:
                   2695:                       set_number_at <jump count> <upper bound>
                   2696:                       set_number_at <succeed_n count> <lower bound>
                   2697:                       succeed_n <after jump addr> <succeed_n count>
                   2698:                       <body of loop>
                   2699:                       jump_n <succeed_n addr> <jump count>
                   2700:                     (The upper bound and `jump_n' are omitted if
                   2701:                     `upper_bound' is 1, though.)  */
                   2702:                  else
                   2703:                    { /* If the upper bound is > 1, we need to insert
                   2704:                         more at the end of the loop.  */
                   2705:                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
                   2706: 
                   2707:                      GET_BUFFER_SPACE (nbytes);
                   2708: 
                   2709:                      /* Initialize lower bound of the `succeed_n', even
                   2710:                         though it will be set during matching by its
                   2711:                         attendant `set_number_at' (inserted next),
                   2712:                         because `re_compile_fastmap' needs to know.
                   2713:                         Jump to the `jump_n' we might insert below.  */
                   2714:                      INSERT_JUMP2 (succeed_n, laststart,
                   2715:                                    b + 5 + (upper_bound > 1) * 5,
                   2716:                                    lower_bound);
                   2717:                      b += 5;
                   2718: 
                   2719:                      /* Code to initialize the lower bound.  Insert
                   2720:                         before the `succeed_n'.  The `5' is the last two
                   2721:                         bytes of this `set_number_at', plus 3 bytes of
                   2722:                         the following `succeed_n'.  */
                   2723:                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
                   2724:                      b += 5;
                   2725: 
                   2726:                      if (upper_bound > 1)
                   2727:                        { /* More than one repetition is allowed, so
                   2728:                             append a backward jump to the `succeed_n'
                   2729:                             that starts this interval.
                   2730: 
                   2731:                             When we've reached this during matching,
                   2732:                             we'll have matched the interval once, so
                   2733:                             jump back only `upper_bound - 1' times.  */
                   2734:                          STORE_JUMP2 (jump_n, b, laststart + 5,
                   2735:                                       upper_bound - 1);
                   2736:                          b += 5;
                   2737: 
                   2738:                          /* The location we want to set is the second
                   2739:                             parameter of the `jump_n'; that is `b-2' as
                   2740:                             an absolute address.  `laststart' will be
                   2741:                             the `set_number_at' we're about to insert;
                   2742:                             `laststart+3' the number to set, the source
                   2743:                             for the relative address.  But we are
                   2744:                             inserting into the middle of the pattern --
                   2745:                             so everything is getting moved up by 5.
                   2746:                             Conclusion: (b - 2) - (laststart + 3) + 5,
                   2747:                             i.e., b - laststart.
                   2748: 
                   2749:                             We insert this at the beginning of the loop
                   2750:                             so that if we fail during matching, we'll
                   2751:                             reinitialize the bounds.  */
                   2752:                          insert_op2 (set_number_at, laststart, b - laststart,
                   2753:                                      upper_bound - 1, b);
                   2754:                          b += 5;
                   2755:                        }
                   2756:                    }
                   2757:                 pending_exact = 0;
                   2758:                 beg_interval = NULL;
                   2759:               }
                   2760:               break;
                   2761: 
                   2762:             unfetch_interval:
                   2763:               /* If an invalid interval, match the characters as literals.  */
                   2764:                assert (beg_interval);
                   2765:                p = beg_interval;
                   2766:                beg_interval = NULL;
                   2767: 
                   2768:                /* normal_char and normal_backslash need `c'.  */
                   2769:                PATFETCH (c);
                   2770: 
                   2771:                if (!(syntax & RE_NO_BK_BRACES))
                   2772:                  {
                   2773:                    if (p > pattern  &&  p[-1] == '\\')
                   2774:                      goto normal_backslash;
                   2775:                  }
                   2776:                goto normal_char;
                   2777: 
                   2778: #ifdef emacs
                   2779:             /* There is no way to specify the before_dot and after_dot
                   2780:                operators.  rms says this is ok.  --karl  */
                   2781:             case '=':
                   2782:               BUF_PUSH (at_dot);
                   2783:               break;
                   2784: 
                   2785:             case 's':
                   2786:               laststart = b;
                   2787:               PATFETCH (c);
                   2788:               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
                   2789:               break;
                   2790: 
                   2791:             case 'S':
                   2792:               laststart = b;
                   2793:               PATFETCH (c);
                   2794:               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
                   2795:               break;
                   2796: #endif /* emacs */
                   2797: 
                   2798: 
                   2799:             case 'w':
                   2800:              if (syntax & RE_NO_GNU_OPS)
                   2801:                goto normal_char;
                   2802:               laststart = b;
                   2803:               BUF_PUSH (wordchar);
                   2804:               break;
                   2805: 
                   2806: 
                   2807:             case 'W':
                   2808:              if (syntax & RE_NO_GNU_OPS)
                   2809:                goto normal_char;
                   2810:               laststart = b;
                   2811:               BUF_PUSH (notwordchar);
                   2812:               break;
                   2813: 
                   2814: 
                   2815:             case '<':
                   2816:              if (syntax & RE_NO_GNU_OPS)
                   2817:                goto normal_char;
                   2818:               BUF_PUSH (wordbeg);
                   2819:               break;
                   2820: 
                   2821:             case '>':
                   2822:              if (syntax & RE_NO_GNU_OPS)
                   2823:                goto normal_char;
                   2824:               BUF_PUSH (wordend);
                   2825:               break;
                   2826: 
                   2827:             case 'b':
                   2828:              if (syntax & RE_NO_GNU_OPS)
                   2829:                goto normal_char;
                   2830:               BUF_PUSH (wordbound);
                   2831:               break;
                   2832: 
                   2833:             case 'B':
                   2834:              if (syntax & RE_NO_GNU_OPS)
                   2835:                goto normal_char;
                   2836:               BUF_PUSH (notwordbound);
                   2837:               break;
                   2838: 
                   2839:             case '`':
                   2840:              if (syntax & RE_NO_GNU_OPS)
                   2841:                goto normal_char;
                   2842:               BUF_PUSH (begbuf);
                   2843:               break;
                   2844: 
                   2845:             case '\'':
                   2846:              if (syntax & RE_NO_GNU_OPS)
                   2847:                goto normal_char;
                   2848:               BUF_PUSH (endbuf);
                   2849:               break;
                   2850: 
                   2851:             case '1': case '2': case '3': case '4': case '5':
                   2852:             case '6': case '7': case '8': case '9':
                   2853:               if (syntax & RE_NO_BK_REFS)
                   2854:                 goto normal_char;
                   2855: 
                   2856:               c1 = c - '0';
                   2857: 
                   2858:               if (c1 > regnum)
                   2859:                 FREE_STACK_RETURN (REG_ESUBREG);
                   2860: 
                   2861:               /* Can't back reference to a subexpression if inside of it.  */
                   2862:               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
                   2863:                 goto normal_char;
                   2864: 
                   2865:               laststart = b;
                   2866:               BUF_PUSH_2 (duplicate, c1);
                   2867:               break;
                   2868: 
                   2869: 
                   2870:             case '+':
                   2871:             case '?':
                   2872:               if (syntax & RE_BK_PLUS_QM)
                   2873:                 goto handle_plus;
                   2874:               else
                   2875:                 goto normal_backslash;
                   2876: 
                   2877:             default:
                   2878:             normal_backslash:
                   2879:               /* You might think it would be useful for \ to mean
                   2880:                  not to translate; but if we don't translate it
                   2881:                  it will never match anything.  */
                   2882:               c = TRANSLATE (c);
                   2883:               goto normal_char;
                   2884:             }
                   2885:           break;
                   2886: 
                   2887: 
                   2888:        default:
                   2889:         /* Expects the character in `c'.  */
                   2890:        normal_char:
                   2891:              /* If no exactn currently being built.  */
                   2892:           if (!pending_exact
                   2893: 
                   2894:               /* If last exactn not at current position.  */
                   2895:               || pending_exact + *pending_exact + 1 != b
                   2896: 
                   2897:               /* We have only one byte following the exactn for the count.  */
                   2898:              || *pending_exact == (1 << BYTEWIDTH) - 1
                   2899: 
                   2900:               /* If followed by a repetition operator.  */
                   2901:               || *p == '*' || *p == '^'
                   2902:              || ((syntax & RE_BK_PLUS_QM)
                   2903:                  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
                   2904:                  : (*p == '+' || *p == '?'))
                   2905:              || ((syntax & RE_INTERVALS)
                   2906:                   && ((syntax & RE_NO_BK_BRACES)
                   2907:                      ? *p == '{'
                   2908:                       : (p[0] == '\\' && p[1] == '{'))))
                   2909:            {
                   2910:              /* Start building a new exactn.  */
                   2911: 
                   2912:               laststart = b;
                   2913: 
                   2914:              BUF_PUSH_2 (exactn, 0);
                   2915:              pending_exact = b - 1;
                   2916:             }
                   2917: 
                   2918:          BUF_PUSH (c);
                   2919:           (*pending_exact)++;
                   2920:          break;
                   2921:         } /* switch (c) */
                   2922:     } /* while p != pend */
                   2923: 
                   2924: 
                   2925:   /* Through the pattern now.  */
                   2926: 
                   2927:   if (fixup_alt_jump)
                   2928:     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
                   2929: 
                   2930:   if (!COMPILE_STACK_EMPTY)
                   2931:     FREE_STACK_RETURN (REG_EPAREN);
                   2932: 
                   2933:   /* If we don't want backtracking, force success
                   2934:      the first time we reach the end of the compiled pattern.  */
                   2935:   if (syntax & RE_NO_POSIX_BACKTRACKING)
                   2936:     BUF_PUSH (succeed);
                   2937: 
                   2938:   free (compile_stack.stack);
                   2939: 
                   2940:   /* We have succeeded; set the length of the buffer.  */
                   2941:   bufp->used = b - bufp->buffer;
                   2942: 
                   2943: #ifdef DEBUG
                   2944:   if (debug)
                   2945:     {
                   2946:       DEBUG_PRINT1 ("\nCompiled pattern: \n");
                   2947:       print_compiled_pattern (bufp);
                   2948:     }
                   2949: #endif /* DEBUG */
                   2950: 
                   2951: #ifndef MATCH_MAY_ALLOCATE
                   2952:   /* Initialize the failure stack to the largest possible stack.  This
                   2953:      isn't necessary unless we're trying to avoid calling alloca in
                   2954:      the search and match routines.  */
                   2955:   {
                   2956:     int num_regs = bufp->re_nsub + 1;
                   2957: 
                   2958:     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
                   2959:        is strictly greater than re_max_failures, the largest possible stack
                   2960:        is 2 * re_max_failures failure points.  */
                   2961:     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
                   2962:       {
                   2963:        fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
                   2964: 
                   2965: # ifdef emacs
                   2966:        if (! fail_stack.stack)
                   2967:          fail_stack.stack
                   2968:            = (fail_stack_elt_t *) xmalloc (fail_stack.size
                   2969:                                            * sizeof (fail_stack_elt_t));
                   2970:        else
                   2971:          fail_stack.stack
                   2972:            = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
                   2973:                                             (fail_stack.size
                   2974:                                              * sizeof (fail_stack_elt_t)));
                   2975: # else /* not emacs */
                   2976:        if (! fail_stack.stack)
                   2977:          fail_stack.stack
                   2978:            = (fail_stack_elt_t *) malloc (fail_stack.size
                   2979:                                           * sizeof (fail_stack_elt_t));
                   2980:        else
                   2981:          fail_stack.stack
                   2982:            = (fail_stack_elt_t *) realloc (fail_stack.stack,
                   2983:                                            (fail_stack.size
                   2984:                                             * sizeof (fail_stack_elt_t)));
                   2985: # endif /* not emacs */
                   2986:       }
                   2987: 
                   2988:     regex_grow_registers (num_regs);
                   2989:   }
                   2990: #endif /* not MATCH_MAY_ALLOCATE */
                   2991: 
                   2992:   return REG_NOERROR;
                   2993: } /* regex_compile */
1.1.1.2 ! misho    2994: 
1.1       misho    2995: /* Subroutines for `regex_compile'.  */
                   2996: 
                   2997: /* Store OP at LOC followed by two-byte integer parameter ARG.  */
                   2998: 
                   2999: static void
                   3000: store_op1 (op, loc, arg)
                   3001:     re_opcode_t op;
                   3002:     unsigned char *loc;
                   3003:     int arg;
                   3004: {
                   3005:   *loc = (unsigned char) op;
                   3006:   STORE_NUMBER (loc + 1, arg);
                   3007: }
                   3008: 
                   3009: 
                   3010: /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
                   3011: 
                   3012: static void
                   3013: store_op2 (op, loc, arg1, arg2)
                   3014:     re_opcode_t op;
                   3015:     unsigned char *loc;
                   3016:     int arg1, arg2;
                   3017: {
                   3018:   *loc = (unsigned char) op;
                   3019:   STORE_NUMBER (loc + 1, arg1);
                   3020:   STORE_NUMBER (loc + 3, arg2);
                   3021: }
                   3022: 
                   3023: 
                   3024: /* Copy the bytes from LOC to END to open up three bytes of space at LOC
                   3025:    for OP followed by two-byte integer parameter ARG.  */
                   3026: 
                   3027: static void
                   3028: insert_op1 (op, loc, arg, end)
                   3029:     re_opcode_t op;
                   3030:     unsigned char *loc;
                   3031:     int arg;
                   3032:     unsigned char *end;
                   3033: {
                   3034:   register unsigned char *pfrom = end;
                   3035:   register unsigned char *pto = end + 3;
                   3036: 
                   3037:   while (pfrom != loc)
                   3038:     *--pto = *--pfrom;
                   3039: 
                   3040:   store_op1 (op, loc, arg);
                   3041: }
                   3042: 
                   3043: 
                   3044: /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
                   3045: 
                   3046: static void
                   3047: insert_op2 (op, loc, arg1, arg2, end)
                   3048:     re_opcode_t op;
                   3049:     unsigned char *loc;
                   3050:     int arg1, arg2;
                   3051:     unsigned char *end;
                   3052: {
                   3053:   register unsigned char *pfrom = end;
                   3054:   register unsigned char *pto = end + 5;
                   3055: 
                   3056:   while (pfrom != loc)
                   3057:     *--pto = *--pfrom;
                   3058: 
                   3059:   store_op2 (op, loc, arg1, arg2);
                   3060: }
                   3061: 
                   3062: 
                   3063: /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
                   3064:    after an alternative or a begin-subexpression.  We assume there is at
                   3065:    least one character before the ^.  */
                   3066: 
                   3067: static boolean
                   3068: at_begline_loc_p (pattern, p, syntax)
                   3069:     const char *pattern, *p;
                   3070:     reg_syntax_t syntax;
                   3071: {
                   3072:   const char *prev = p - 2;
                   3073:   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
                   3074: 
                   3075:   return
                   3076:        /* After a subexpression?  */
                   3077:        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
                   3078:        /* After an alternative?  */
                   3079:     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
                   3080: }
                   3081: 
                   3082: 
                   3083: /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
                   3084:    at least one character after the $, i.e., `P < PEND'.  */
                   3085: 
                   3086: static boolean
                   3087: at_endline_loc_p (p, pend, syntax)
                   3088:     const char *p, *pend;
                   3089:     reg_syntax_t syntax;
                   3090: {
                   3091:   const char *next = p;
                   3092:   boolean next_backslash = *next == '\\';
                   3093:   const char *next_next = p + 1 < pend ? p + 1 : 0;
                   3094: 
                   3095:   return
                   3096:        /* Before a subexpression?  */
                   3097:        (syntax & RE_NO_BK_PARENS ? *next == ')'
                   3098:         : next_backslash && next_next && *next_next == ')')
                   3099:        /* Before an alternative?  */
                   3100:     || (syntax & RE_NO_BK_VBAR ? *next == '|'
                   3101:         : next_backslash && next_next && *next_next == '|');
                   3102: }
                   3103: 
                   3104: 
                   3105: /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
                   3106:    false if it's not.  */
                   3107: 
                   3108: static boolean
                   3109: group_in_compile_stack (compile_stack, regnum)
                   3110:     compile_stack_type compile_stack;
                   3111:     regnum_t regnum;
                   3112: {
                   3113:   int this_element;
                   3114: 
                   3115:   for (this_element = compile_stack.avail - 1;
                   3116:        this_element >= 0;
                   3117:        this_element--)
                   3118:     if (compile_stack.stack[this_element].regnum == regnum)
                   3119:       return true;
                   3120: 
                   3121:   return false;
                   3122: }
                   3123: 
                   3124: 
                   3125: /* Read the ending character of a range (in a bracket expression) from the
                   3126:    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
                   3127:    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
                   3128:    Then we set the translation of all bits between the starting and
                   3129:    ending characters (inclusive) in the compiled pattern B.
                   3130: 
                   3131:    Return an error code.
                   3132: 
                   3133:    We use these short variable names so we can use the same macros as
                   3134:    `regex_compile' itself.  */
                   3135: 
                   3136: static reg_errcode_t
                   3137: compile_range (p_ptr, pend, translate, syntax, b)
                   3138:     const char **p_ptr, *pend;
                   3139:     RE_TRANSLATE_TYPE translate;
                   3140:     reg_syntax_t syntax;
                   3141:     unsigned char *b;
                   3142: {
                   3143:   unsigned this_char;
                   3144: 
                   3145:   const char *p = *p_ptr;
                   3146:   unsigned int range_start, range_end;
                   3147: 
                   3148:   if (p == pend)
                   3149:     return REG_ERANGE;
                   3150: 
                   3151:   /* Even though the pattern is a signed `char *', we need to fetch
                   3152:      with unsigned char *'s; if the high bit of the pattern character
                   3153:      is set, the range endpoints will be negative if we fetch using a
                   3154:      signed char *.
                   3155: 
                   3156:      We also want to fetch the endpoints without translating them; the
                   3157:      appropriate translation is done in the bit-setting loop below.  */
                   3158:   /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
                   3159:   range_start = ((const unsigned char *) p)[-2];
                   3160:   range_end   = ((const unsigned char *) p)[0];
                   3161: 
                   3162:   /* Have to increment the pointer into the pattern string, so the
                   3163:      caller isn't still at the ending character.  */
                   3164:   (*p_ptr)++;
                   3165: 
                   3166:   /* If the start is after the end, the range is empty.  */
                   3167:   if (range_start > range_end)
                   3168:     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
                   3169: 
                   3170:   /* Here we see why `this_char' has to be larger than an `unsigned
                   3171:      char' -- the range is inclusive, so if `range_end' == 0xff
                   3172:      (assuming 8-bit characters), we would otherwise go into an infinite
                   3173:      loop, since all characters <= 0xff.  */
                   3174:   for (this_char = range_start; this_char <= range_end; this_char++)
                   3175:     {
                   3176:       SET_LIST_BIT (TRANSLATE (this_char));
                   3177:     }
                   3178: 
                   3179:   return REG_NOERROR;
                   3180: }
1.1.1.2 ! misho    3181: 
1.1       misho    3182: /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
                   3183:    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
                   3184:    characters can start a string that matches the pattern.  This fastmap
                   3185:    is used by re_search to skip quickly over impossible starting points.
                   3186: 
                   3187:    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
                   3188:    area as BUFP->fastmap.
                   3189: 
                   3190:    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
                   3191:    the pattern buffer.
                   3192: 
                   3193:    Returns 0 if we succeed, -2 if an internal error.   */
                   3194: 
                   3195: int
                   3196: re_compile_fastmap (bufp)
                   3197:      struct re_pattern_buffer *bufp;
                   3198: {
                   3199:   int j, k;
                   3200: #ifdef MATCH_MAY_ALLOCATE
                   3201:   fail_stack_type fail_stack;
                   3202: #endif
                   3203: #ifndef REGEX_MALLOC
                   3204:   char *destination;
                   3205: #endif
                   3206: 
                   3207:   register char *fastmap = bufp->fastmap;
                   3208:   unsigned char *pattern = bufp->buffer;
                   3209:   unsigned char *p = pattern;
                   3210:   register unsigned char *pend = pattern + bufp->used;
                   3211: 
                   3212: #ifdef REL_ALLOC
                   3213:   /* This holds the pointer to the failure stack, when
                   3214:      it is allocated relocatably.  */
                   3215:   fail_stack_elt_t *failure_stack_ptr;
                   3216: #endif
                   3217: 
                   3218:   /* Assume that each path through the pattern can be null until
                   3219:      proven otherwise.  We set this false at the bottom of switch
                   3220:      statement, to which we get only if a particular path doesn't
                   3221:      match the empty string.  */
                   3222:   boolean path_can_be_null = true;
                   3223: 
                   3224:   /* We aren't doing a `succeed_n' to begin with.  */
                   3225:   boolean succeed_n_p = false;
                   3226: 
                   3227:   assert (fastmap != NULL && p != NULL);
                   3228: 
                   3229:   INIT_FAIL_STACK ();
                   3230:   memset (fastmap, 0, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
                   3231:   bufp->fastmap_accurate = 1;      /* It will be when we're done.  */
                   3232:   bufp->can_be_null = 0;
                   3233: 
                   3234:   while (1)
                   3235:     {
                   3236:       if (p == pend || *p == succeed)
                   3237:        {
                   3238:          /* We have reached the (effective) end of pattern.  */
                   3239:          if (!FAIL_STACK_EMPTY ())
                   3240:            {
                   3241:              bufp->can_be_null |= path_can_be_null;
                   3242: 
                   3243:              /* Reset for next path.  */
                   3244:              path_can_be_null = true;
                   3245: 
                   3246:              p = fail_stack.stack[--fail_stack.avail].pointer;
                   3247: 
                   3248:              continue;
                   3249:            }
                   3250:          else
                   3251:            break;
                   3252:        }
                   3253: 
                   3254:       /* We should never be about to go beyond the end of the pattern.  */
                   3255:       assert (p < pend);
                   3256: 
                   3257:       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
                   3258:        {
                   3259: 
                   3260:         /* I guess the idea here is to simply not bother with a fastmap
                   3261:            if a backreference is used, since it's too hard to figure out
                   3262:            the fastmap for the corresponding group.  Setting
                   3263:            `can_be_null' stops `re_search_2' from using the fastmap, so
                   3264:            that is all we do.  */
                   3265:        case duplicate:
                   3266:          bufp->can_be_null = 1;
                   3267:           goto done;
                   3268: 
                   3269: 
                   3270:       /* Following are the cases which match a character.  These end
                   3271:          with `break'.  */
                   3272: 
                   3273:        case exactn:
                   3274:           fastmap[p[1]] = 1;
                   3275:          break;
                   3276: 
                   3277: 
                   3278:         case charset:
                   3279:           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
                   3280:            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
                   3281:               fastmap[j] = 1;
                   3282:          break;
                   3283: 
                   3284: 
                   3285:        case charset_not:
                   3286:          /* Chars beyond end of map must be allowed.  */
                   3287:          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
                   3288:             fastmap[j] = 1;
                   3289: 
                   3290:          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
                   3291:            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
                   3292:               fastmap[j] = 1;
                   3293:           break;
                   3294: 
                   3295: 
                   3296:        case wordchar:
                   3297:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                   3298:            if (SYNTAX (j) == Sword)
                   3299:              fastmap[j] = 1;
                   3300:          break;
                   3301: 
                   3302: 
                   3303:        case notwordchar:
                   3304:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                   3305:            if (SYNTAX (j) != Sword)
                   3306:              fastmap[j] = 1;
                   3307:          break;
                   3308: 
                   3309: 
                   3310:         case anychar:
                   3311:          {
                   3312:            int fastmap_newline = fastmap['\n'];
                   3313: 
                   3314:            /* `.' matches anything ...  */
                   3315:            for (j = 0; j < (1 << BYTEWIDTH); j++)
                   3316:              fastmap[j] = 1;
                   3317: 
                   3318:            /* ... except perhaps newline.  */
                   3319:            if (!(bufp->syntax & RE_DOT_NEWLINE))
                   3320:              fastmap['\n'] = fastmap_newline;
                   3321: 
                   3322:            /* Return if we have already set `can_be_null'; if we have,
                   3323:               then the fastmap is irrelevant.  Something's wrong here.  */
                   3324:            else if (bufp->can_be_null)
                   3325:              goto done;
                   3326: 
                   3327:            /* Otherwise, have to check alternative paths.  */
                   3328:            break;
                   3329:          }
                   3330: 
                   3331: #ifdef emacs
                   3332:         case syntaxspec:
                   3333:          k = *p++;
                   3334:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                   3335:            if (SYNTAX (j) == (enum syntaxcode) k)
                   3336:              fastmap[j] = 1;
                   3337:          break;
                   3338: 
                   3339: 
                   3340:        case notsyntaxspec:
                   3341:          k = *p++;
                   3342:          for (j = 0; j < (1 << BYTEWIDTH); j++)
                   3343:            if (SYNTAX (j) != (enum syntaxcode) k)
                   3344:              fastmap[j] = 1;
                   3345:          break;
                   3346: 
                   3347: 
                   3348:       /* All cases after this match the empty string.  These end with
                   3349:          `continue'.  */
                   3350: 
                   3351: 
                   3352:        case before_dot:
                   3353:        case at_dot:
                   3354:        case after_dot:
                   3355:           continue;
                   3356: #endif /* emacs */
                   3357: 
                   3358: 
                   3359:         case no_op:
                   3360:         case begline:
                   3361:         case endline:
                   3362:        case begbuf:
                   3363:        case endbuf:
                   3364:        case wordbound:
                   3365:        case notwordbound:
                   3366:        case wordbeg:
                   3367:        case wordend:
                   3368:         case push_dummy_failure:
                   3369:           continue;
                   3370: 
                   3371: 
                   3372:        case jump_n:
                   3373:         case pop_failure_jump:
                   3374:        case maybe_pop_jump:
                   3375:        case jump:
                   3376:         case jump_past_alt:
                   3377:        case dummy_failure_jump:
                   3378:           EXTRACT_NUMBER_AND_INCR (j, p);
                   3379:          p += j;
                   3380:          if (j > 0)
                   3381:            continue;
                   3382: 
                   3383:           /* Jump backward implies we just went through the body of a
                   3384:              loop and matched nothing.  Opcode jumped to should be
                   3385:              `on_failure_jump' or `succeed_n'.  Just treat it like an
                   3386:              ordinary jump.  For a * loop, it has pushed its failure
                   3387:              point already; if so, discard that as redundant.  */
                   3388:           if ((re_opcode_t) *p != on_failure_jump
                   3389:              && (re_opcode_t) *p != succeed_n)
                   3390:            continue;
                   3391: 
                   3392:           p++;
                   3393:           EXTRACT_NUMBER_AND_INCR (j, p);
                   3394:           p += j;
                   3395: 
                   3396:           /* If what's on the stack is where we are now, pop it.  */
                   3397:           if (!FAIL_STACK_EMPTY ()
                   3398:              && fail_stack.stack[fail_stack.avail - 1].pointer == p)
                   3399:             fail_stack.avail--;
                   3400: 
                   3401:           continue;
                   3402: 
                   3403: 
                   3404:         case on_failure_jump:
                   3405:         case on_failure_keep_string_jump:
                   3406:        handle_on_failure_jump:
                   3407:           EXTRACT_NUMBER_AND_INCR (j, p);
                   3408: 
                   3409:           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
                   3410:              end of the pattern.  We don't want to push such a point,
                   3411:              since when we restore it above, entering the switch will
                   3412:              increment `p' past the end of the pattern.  We don't need
                   3413:              to push such a point since we obviously won't find any more
                   3414:              fastmap entries beyond `pend'.  Such a pattern can match
                   3415:              the null string, though.  */
                   3416:           if (p + j < pend)
                   3417:             {
                   3418:               if (!PUSH_PATTERN_OP (p + j, fail_stack))
                   3419:                {
                   3420:                  RESET_FAIL_STACK ();
                   3421:                  return -2;
                   3422:                }
                   3423:             }
                   3424:           else
                   3425:             bufp->can_be_null = 1;
                   3426: 
                   3427:           if (succeed_n_p)
                   3428:             {
                   3429:               EXTRACT_NUMBER_AND_INCR (k, p);  /* Skip the n.  */
                   3430:               succeed_n_p = false;
                   3431:            }
                   3432: 
                   3433:           continue;
                   3434: 
                   3435: 
                   3436:        case succeed_n:
                   3437:           /* Get to the number of times to succeed.  */
                   3438:           p += 2;
                   3439: 
                   3440:           /* Increment p past the n for when k != 0.  */
                   3441:           EXTRACT_NUMBER_AND_INCR (k, p);
                   3442:           if (k == 0)
                   3443:            {
                   3444:               p -= 4;
                   3445:              succeed_n_p = true;  /* Spaghetti code alert.  */
                   3446:               goto handle_on_failure_jump;
                   3447:             }
                   3448:           continue;
                   3449: 
                   3450: 
                   3451:        case set_number_at:
                   3452:           p += 4;
                   3453:           continue;
                   3454: 
                   3455: 
                   3456:        case start_memory:
                   3457:         case stop_memory:
                   3458:          p += 2;
                   3459:          continue;
                   3460: 
                   3461: 
                   3462:        default:
                   3463:           abort (); /* We have listed all the cases.  */
                   3464:         } /* switch *p++ */
                   3465: 
                   3466:       /* Getting here means we have found the possible starting
                   3467:          characters for one path of the pattern -- and that the empty
                   3468:          string does not match.  We need not follow this path further.
                   3469:          Instead, look at the next alternative (remembered on the
                   3470:          stack), or quit if no more.  The test at the top of the loop
                   3471:          does these things.  */
                   3472:       path_can_be_null = false;
                   3473:       p = pend;
                   3474:     } /* while p */
                   3475: 
                   3476:   /* Set `can_be_null' for the last path (also the first path, if the
                   3477:      pattern is empty).  */
                   3478:   bufp->can_be_null |= path_can_be_null;
                   3479: 
                   3480:  done:
                   3481:   RESET_FAIL_STACK ();
                   3482:   return 0;
                   3483: } /* re_compile_fastmap */
                   3484: #ifdef _LIBC
                   3485: weak_alias (__re_compile_fastmap, re_compile_fastmap)
                   3486: #endif
1.1.1.2 ! misho    3487: 
1.1       misho    3488: /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
                   3489:    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
                   3490:    this memory for recording register information.  STARTS and ENDS
                   3491:    must be allocated using the malloc library routine, and must each
                   3492:    be at least NUM_REGS * sizeof (regoff_t) bytes long.
                   3493: 
                   3494:    If NUM_REGS == 0, then subsequent matches should allocate their own
                   3495:    register data.
                   3496: 
                   3497:    Unless this function is called, the first search or match using
                   3498:    PATTERN_BUFFER will allocate its own register data, without
                   3499:    freeing the old data.  */
                   3500: 
                   3501: void
                   3502: re_set_registers (bufp, regs, num_regs, starts, ends)
                   3503:     struct re_pattern_buffer *bufp;
                   3504:     struct re_registers *regs;
                   3505:     unsigned num_regs;
                   3506:     regoff_t *starts, *ends;
                   3507: {
                   3508:   if (num_regs)
                   3509:     {
                   3510:       bufp->regs_allocated = REGS_REALLOCATE;
                   3511:       regs->num_regs = num_regs;
                   3512:       regs->start = starts;
                   3513:       regs->end = ends;
                   3514:     }
                   3515:   else
                   3516:     {
                   3517:       bufp->regs_allocated = REGS_UNALLOCATED;
                   3518:       regs->num_regs = 0;
                   3519:       regs->start = regs->end = (regoff_t *) 0;
                   3520:     }
                   3521: }
                   3522: #ifdef _LIBC
                   3523: weak_alias (__re_set_registers, re_set_registers)
                   3524: #endif
1.1.1.2 ! misho    3525: 
1.1       misho    3526: /* Searching routines.  */
                   3527: 
                   3528: /* Like re_search_2, below, but only one string is specified, and
                   3529:    doesn't let you say where to stop matching. */
                   3530: 
                   3531: int
                   3532: re_search (bufp, string, size, startpos, range, regs)
                   3533:      struct re_pattern_buffer *bufp;
                   3534:      const char *string;
                   3535:      int size, startpos, range;
                   3536:      struct re_registers *regs;
                   3537: {
                   3538:   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
                   3539:                      regs, size);
                   3540: }
                   3541: #ifdef _LIBC
                   3542: weak_alias (__re_search, re_search)
                   3543: #endif
                   3544: 
                   3545: 
                   3546: /* Using the compiled pattern in BUFP->buffer, first tries to match the
                   3547:    virtual concatenation of STRING1 and STRING2, starting first at index
                   3548:    STARTPOS, then at STARTPOS + 1, and so on.
                   3549: 
                   3550:    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
                   3551: 
                   3552:    RANGE is how far to scan while trying to match.  RANGE = 0 means try
                   3553:    only at STARTPOS; in general, the last start tried is STARTPOS +
                   3554:    RANGE.
                   3555: 
                   3556:    In REGS, return the indices of the virtual concatenation of STRING1
                   3557:    and STRING2 that matched the entire BUFP->buffer and its contained
                   3558:    subexpressions.
                   3559: 
                   3560:    Do not consider matching one past the index STOP in the virtual
                   3561:    concatenation of STRING1 and STRING2.
                   3562: 
                   3563:    We return either the position in the strings at which the match was
                   3564:    found, -1 if no match, or -2 if error (such as failure
                   3565:    stack overflow).  */
                   3566: 
                   3567: int
                   3568: re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
                   3569:      struct re_pattern_buffer *bufp;
                   3570:      const char *string1, *string2;
                   3571:      int size1, size2;
                   3572:      int startpos;
                   3573:      int range;
                   3574:      struct re_registers *regs;
                   3575:      int stop;
                   3576: {
                   3577:   int val;
                   3578:   register char *fastmap = bufp->fastmap;
                   3579:   register RE_TRANSLATE_TYPE translate = bufp->translate;
                   3580:   int total_size = size1 + size2;
                   3581:   int endpos = startpos + range;
                   3582: 
                   3583:   /* Check for out-of-range STARTPOS.  */
                   3584:   if (startpos < 0 || startpos > total_size)
                   3585:     return -1;
                   3586: 
                   3587:   /* Fix up RANGE if it might eventually take us outside
                   3588:      the virtual concatenation of STRING1 and STRING2.
                   3589:      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
                   3590:   if (endpos < 0)
                   3591:     range = 0 - startpos;
                   3592:   else if (endpos > total_size)
                   3593:     range = total_size - startpos;
                   3594: 
                   3595:   /* If the search isn't to be a backwards one, don't waste time in a
                   3596:      search for a pattern that must be anchored.  */
                   3597:   if (bufp->used > 0 && range > 0
                   3598:       && ((re_opcode_t) bufp->buffer[0] == begbuf
                   3599:          /* `begline' is like `begbuf' if it cannot match at newlines.  */
                   3600:          || ((re_opcode_t) bufp->buffer[0] == begline
                   3601:              && !bufp->newline_anchor)))
                   3602:     {
                   3603:       if (startpos > 0)
                   3604:        return -1;
                   3605:       else
                   3606:        range = 1;
                   3607:     }
                   3608: 
                   3609: #ifdef emacs
                   3610:   /* In a forward search for something that starts with \=.
                   3611:      don't keep searching past point.  */
                   3612:   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
                   3613:     {
                   3614:       range = PT - startpos;
                   3615:       if (range <= 0)
                   3616:        return -1;
                   3617:     }
                   3618: #endif /* emacs */
                   3619: 
                   3620:   /* Update the fastmap now if not correct already.  */
                   3621:   if (fastmap && !bufp->fastmap_accurate)
                   3622:     if (re_compile_fastmap (bufp) == -2)
                   3623:       return -2;
                   3624: 
                   3625:   /* Loop through the string, looking for a place to start matching.  */
                   3626:   for (;;)
                   3627:     {
                   3628:       /* If a fastmap is supplied, skip quickly over characters that
                   3629:          cannot be the start of a match.  If the pattern can match the
                   3630:          null string, however, we don't need to skip characters; we want
                   3631:          the first null string.  */
                   3632:       if (fastmap && startpos < total_size && !bufp->can_be_null)
                   3633:        {
                   3634:          if (range > 0)        /* Searching forwards.  */
                   3635:            {
                   3636:              register const char *d;
                   3637:              register int lim = 0;
                   3638:              int irange = range;
                   3639: 
                   3640:               if (startpos < size1 && startpos + range >= size1)
                   3641:                 lim = range - (size1 - startpos);
                   3642: 
                   3643:              d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
                   3644: 
                   3645:               /* Written out as an if-else to avoid testing `translate'
                   3646:                  inside the loop.  */
                   3647:              if (translate)
                   3648:                 while (range > lim
                   3649:                        && !fastmap[(unsigned char)
                   3650:                                   translate[(unsigned char) *d++]])
                   3651:                   range--;
                   3652:              else
                   3653:                 while (range > lim && !fastmap[(unsigned char) *d++])
                   3654:                   range--;
                   3655: 
                   3656:              startpos += irange - range;
                   3657:            }
                   3658:          else                          /* Searching backwards.  */
                   3659:            {
                   3660:              register char c = (size1 == 0 || startpos >= size1
                   3661:                                  ? string2[startpos - size1]
                   3662:                                  : string1[startpos]);
                   3663: 
                   3664:              if (!fastmap[(unsigned char) TRANSLATE (c)])
                   3665:                goto advance;
                   3666:            }
                   3667:        }
                   3668: 
                   3669:       /* If can't match the null string, and that's all we have left, fail.  */
                   3670:       if (range >= 0 && startpos == total_size && fastmap
                   3671:           && !bufp->can_be_null)
                   3672:        return -1;
                   3673: 
                   3674:       val = re_match_2_internal (bufp, string1, size1, string2, size2,
                   3675:                                 startpos, regs, stop);
                   3676: #ifndef REGEX_MALLOC
                   3677: # ifdef C_ALLOCA
                   3678:       alloca (0);
                   3679: # endif
                   3680: #endif
                   3681: 
                   3682:       if (val >= 0)
                   3683:        return startpos;
                   3684: 
                   3685:       if (val == -2)
                   3686:        return -2;
                   3687: 
                   3688:     advance:
                   3689:       if (!range)
                   3690:         break;
                   3691:       else if (range > 0)
                   3692:         {
                   3693:           range--;
                   3694:           startpos++;
                   3695:         }
                   3696:       else
                   3697:         {
                   3698:           range++;
                   3699:           startpos--;
                   3700:         }
                   3701:     }
                   3702:   return -1;
                   3703: } /* re_search_2 */
                   3704: #ifdef _LIBC
                   3705: weak_alias (__re_search_2, re_search_2)
                   3706: #endif
1.1.1.2 ! misho    3707: 
1.1       misho    3708: /* This converts PTR, a pointer into one of the search strings `string1'
                   3709:    and `string2' into an offset from the beginning of that string.  */
                   3710: #define POINTER_TO_OFFSET(ptr)                 \
                   3711:   (FIRST_STRING_P (ptr)                                \
                   3712:    ? ((regoff_t) ((ptr) - string1))            \
                   3713:    : ((regoff_t) ((ptr) - string2 + size1)))
                   3714: 
                   3715: /* Macros for dealing with the split strings in re_match_2.  */
                   3716: 
                   3717: #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
                   3718: 
                   3719: /* Call before fetching a character with *d.  This switches over to
                   3720:    string2 if necessary.  */
                   3721: #define PREFETCH()                                                     \
                   3722:   while (d == dend)                                                    \
                   3723:     {                                                                  \
                   3724:       /* End of string2 => fail.  */                                   \
                   3725:       if (dend == end_match_2)                                                 \
                   3726:         goto fail;                                                     \
                   3727:       /* End of string1 => advance to string2.  */                     \
                   3728:       d = string2;                                                     \
                   3729:       dend = end_match_2;                                              \
                   3730:     }
                   3731: 
                   3732: 
                   3733: /* Test if at very beginning or at very end of the virtual concatenation
                   3734:    of `string1' and `string2'.  If only one string, it's `string2'.  */
                   3735: #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
                   3736: #define AT_STRINGS_END(d) ((d) == end2)
                   3737: 
                   3738: 
                   3739: /* Test if D points to a character which is word-constituent.  We have
                   3740:    two special cases to check for: if past the end of string1, look at
                   3741:    the first character in string2; and if before the beginning of
                   3742:    string2, look at the last character in string1.  */
                   3743: #define WORDCHAR_P(d)                                                  \
                   3744:   (SYNTAX ((d) == end1 ? *string2                                      \
                   3745:            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                  \
                   3746:    == Sword)
                   3747: 
                   3748: /* Disabled due to a compiler bug -- see comment at case wordbound */
                   3749: #if 0
                   3750: /* Test if the character before D and the one at D differ with respect
                   3751:    to being word-constituent.  */
                   3752: #define AT_WORD_BOUNDARY(d)                                            \
                   3753:   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                            \
                   3754:    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
                   3755: #endif
                   3756: 
                   3757: /* Free everything we malloc.  */
                   3758: #ifdef MATCH_MAY_ALLOCATE
                   3759: # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
                   3760: # define FREE_VARIABLES()                                              \
                   3761:   do {                                                                 \
                   3762:     REGEX_FREE_STACK (fail_stack.stack);                               \
                   3763:     FREE_VAR (regstart);                                               \
                   3764:     FREE_VAR (regend);                                                 \
                   3765:     FREE_VAR (old_regstart);                                           \
                   3766:     FREE_VAR (old_regend);                                             \
                   3767:     FREE_VAR (best_regstart);                                          \
                   3768:     FREE_VAR (best_regend);                                            \
                   3769:     FREE_VAR (reg_info);                                               \
                   3770:     FREE_VAR (reg_dummy);                                              \
                   3771:     FREE_VAR (reg_info_dummy);                                         \
                   3772:   } while (0)
                   3773: #else
                   3774: # define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
                   3775: #endif /* not MATCH_MAY_ALLOCATE */
                   3776: 
                   3777: /* These values must meet several constraints.  They must not be valid
                   3778:    register values; since we have a limit of 255 registers (because
                   3779:    we use only one byte in the pattern for the register number), we can
                   3780:    use numbers larger than 255.  They must differ by 1, because of
                   3781:    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
                   3782:    be larger than the value for the highest register, so we do not try
                   3783:    to actually save any registers when none are active.  */
                   3784: #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
                   3785: #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
1.1.1.2 ! misho    3786: 
1.1       misho    3787: /* Matching routines.  */
                   3788: 
                   3789: #ifndef emacs   /* Emacs never uses this.  */
                   3790: /* re_match is like re_match_2 except it takes only a single string.  */
                   3791: 
                   3792: int
                   3793: re_match (bufp, string, size, pos, regs)
                   3794:      struct re_pattern_buffer *bufp;
                   3795:      const char *string;
                   3796:      int size, pos;
                   3797:      struct re_registers *regs;
                   3798: {
                   3799:   int result = re_match_2_internal (bufp, NULL, 0, string, size,
                   3800:                                    pos, regs, size);
                   3801: # ifndef REGEX_MALLOC
                   3802: #  ifdef C_ALLOCA
                   3803:   alloca (0);
                   3804: #  endif
                   3805: # endif
                   3806:   return result;
                   3807: }
                   3808: # ifdef _LIBC
                   3809: weak_alias (__re_match, re_match)
                   3810: # endif
                   3811: #endif /* not emacs */
                   3812: 
                   3813: static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
                   3814:                                                    unsigned char *end,
                   3815:                                                register_info_type *reg_info));
                   3816: static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
                   3817:                                                  unsigned char *end,
                   3818:                                                register_info_type *reg_info));
                   3819: static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
                   3820:                                                        unsigned char *end,
                   3821:                                                register_info_type *reg_info));
                   3822: static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
                   3823:                                     int len, char *translate));
                   3824: 
                   3825: /* re_match_2 matches the compiled pattern in BUFP against the
                   3826:    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
                   3827:    and SIZE2, respectively).  We start matching at POS, and stop
                   3828:    matching at STOP.
                   3829: 
                   3830:    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
                   3831:    store offsets for the substring each group matched in REGS.  See the
                   3832:    documentation for exactly how many groups we fill.
                   3833: 
                   3834:    We return -1 if no match, -2 if an internal error (such as the
                   3835:    failure stack overflowing).  Otherwise, we return the length of the
                   3836:    matched substring.  */
                   3837: 
                   3838: int
                   3839: re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
                   3840:      struct re_pattern_buffer *bufp;
                   3841:      const char *string1, *string2;
                   3842:      int size1, size2;
                   3843:      int pos;
                   3844:      struct re_registers *regs;
                   3845:      int stop;
                   3846: {
                   3847:   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
                   3848:                                    pos, regs, stop);
                   3849: #ifndef REGEX_MALLOC
                   3850: # ifdef C_ALLOCA
                   3851:   alloca (0);
                   3852: # endif
                   3853: #endif
                   3854:   return result;
                   3855: }
                   3856: #ifdef _LIBC
                   3857: weak_alias (__re_match_2, re_match_2)
                   3858: #endif
                   3859: 
                   3860: /* This is a separate function so that we can force an alloca cleanup
                   3861:    afterwards.  */
                   3862: static int
                   3863: re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                   3864:      struct re_pattern_buffer *bufp;
                   3865:      const char *string1, *string2;
                   3866:      int size1, size2;
                   3867:      int pos;
                   3868:      struct re_registers *regs;
                   3869:      int stop;
                   3870: {
                   3871:   /* General temporaries.  */
                   3872:   int mcnt;
                   3873:   unsigned char *p1;
                   3874: 
                   3875:   /* Just past the end of the corresponding string.  */
                   3876:   const char *end1, *end2;
                   3877: 
                   3878:   /* Pointers into string1 and string2, just past the last characters in
                   3879:      each to consider matching.  */
                   3880:   const char *end_match_1, *end_match_2;
                   3881: 
                   3882:   /* Where we are in the data, and the end of the current string.  */
                   3883:   const char *d, *dend;
                   3884: 
                   3885:   /* Where we are in the pattern, and the end of the pattern.  */
                   3886:   unsigned char *p = bufp->buffer;
                   3887:   register unsigned char *pend = p + bufp->used;
                   3888: 
                   3889:   /* Mark the opcode just after a start_memory, so we can test for an
                   3890:      empty subpattern when we get to the stop_memory.  */
                   3891:   unsigned char *just_past_start_mem = 0;
                   3892: 
                   3893:   /* We use this to map every character in the string.  */
                   3894:   RE_TRANSLATE_TYPE translate = bufp->translate;
                   3895: 
                   3896:   /* Failure point stack.  Each place that can handle a failure further
                   3897:      down the line pushes a failure point on this stack.  It consists of
                   3898:      restart, regend, and reg_info for all registers corresponding to
                   3899:      the subexpressions we're currently inside, plus the number of such
                   3900:      registers, and, finally, two char *'s.  The first char * is where
                   3901:      to resume scanning the pattern; the second one is where to resume
                   3902:      scanning the strings.  If the latter is zero, the failure point is
                   3903:      a ``dummy''; if a failure happens and the failure point is a dummy,
                   3904:      it gets discarded and the next next one is tried.  */
                   3905: #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
                   3906:   fail_stack_type fail_stack;
                   3907: #endif
                   3908: #ifdef DEBUG
                   3909:   static unsigned failure_id;
                   3910:   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
                   3911: #endif
                   3912: 
                   3913: #ifdef REL_ALLOC
                   3914:   /* This holds the pointer to the failure stack, when
                   3915:      it is allocated relocatably.  */
                   3916:   fail_stack_elt_t *failure_stack_ptr;
                   3917: #endif
                   3918: 
                   3919:   /* We fill all the registers internally, independent of what we
                   3920:      return, for use in backreferences.  The number here includes
                   3921:      an element for register zero.  */
                   3922:   size_t num_regs = bufp->re_nsub + 1;
                   3923: 
                   3924:   /* The currently active registers.  */
                   3925:   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
                   3926:   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
                   3927: 
                   3928:   /* Information on the contents of registers. These are pointers into
                   3929:      the input strings; they record just what was matched (on this
                   3930:      attempt) by a subexpression part of the pattern, that is, the
                   3931:      regnum-th regstart pointer points to where in the pattern we began
                   3932:      matching and the regnum-th regend points to right after where we
                   3933:      stopped matching the regnum-th subexpression.  (The zeroth register
                   3934:      keeps track of what the whole pattern matches.)  */
                   3935: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
                   3936:   const char **regstart, **regend;
                   3937: #endif
                   3938: 
                   3939:   /* If a group that's operated upon by a repetition operator fails to
                   3940:      match anything, then the register for its start will need to be
                   3941:      restored because it will have been set to wherever in the string we
                   3942:      are when we last see its open-group operator.  Similarly for a
                   3943:      register's end.  */
                   3944: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
                   3945:   const char **old_regstart, **old_regend;
                   3946: #endif
                   3947: 
                   3948:   /* The is_active field of reg_info helps us keep track of which (possibly
                   3949:      nested) subexpressions we are currently in. The matched_something
                   3950:      field of reg_info[reg_num] helps us tell whether or not we have
                   3951:      matched any of the pattern so far this time through the reg_num-th
                   3952:      subexpression.  These two fields get reset each time through any
                   3953:      loop their register is in.  */
                   3954: #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
                   3955:   register_info_type *reg_info;
                   3956: #endif
                   3957: 
                   3958:   /* The following record the register info as found in the above
                   3959:      variables when we find a match better than any we've seen before.
                   3960:      This happens as we backtrack through the failure points, which in
                   3961:      turn happens only if we have not yet matched the entire string. */
                   3962:   unsigned best_regs_set = false;
                   3963: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
                   3964:   const char **best_regstart, **best_regend;
                   3965: #endif
                   3966: 
                   3967:   /* Logically, this is `best_regend[0]'.  But we don't want to have to
                   3968:      allocate space for that if we're not allocating space for anything
                   3969:      else (see below).  Also, we never need info about register 0 for
                   3970:      any of the other register vectors, and it seems rather a kludge to
                   3971:      treat `best_regend' differently than the rest.  So we keep track of
                   3972:      the end of the best match so far in a separate variable.  We
                   3973:      initialize this to NULL so that when we backtrack the first time
                   3974:      and need to test it, it's not garbage.  */
                   3975:   const char *match_end = NULL;
                   3976: 
                   3977:   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
                   3978:   int set_regs_matched_done = 0;
                   3979: 
                   3980:   /* Used when we pop values we don't care about.  */
                   3981: #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
                   3982:   const char **reg_dummy;
                   3983:   register_info_type *reg_info_dummy;
                   3984: #endif
                   3985: 
                   3986: #ifdef DEBUG
                   3987:   /* Counts the total number of registers pushed.  */
                   3988:   unsigned num_regs_pushed = 0;
                   3989: #endif
                   3990: 
                   3991:   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
                   3992: 
                   3993:   INIT_FAIL_STACK ();
                   3994: 
                   3995: #ifdef MATCH_MAY_ALLOCATE
                   3996:   /* Do not bother to initialize all the register variables if there are
                   3997:      no groups in the pattern, as it takes a fair amount of time.  If
                   3998:      there are groups, we include space for register 0 (the whole
                   3999:      pattern), even though we never use it, since it simplifies the
                   4000:      array indexing.  We should fix this.  */
                   4001:   if (bufp->re_nsub)
                   4002:     {
                   4003:       regstart = REGEX_TALLOC (num_regs, const char *);
                   4004:       regend = REGEX_TALLOC (num_regs, const char *);
                   4005:       old_regstart = REGEX_TALLOC (num_regs, const char *);
                   4006:       old_regend = REGEX_TALLOC (num_regs, const char *);
                   4007:       best_regstart = REGEX_TALLOC (num_regs, const char *);
                   4008:       best_regend = REGEX_TALLOC (num_regs, const char *);
                   4009:       reg_info = REGEX_TALLOC (num_regs, register_info_type);
                   4010:       reg_dummy = REGEX_TALLOC (num_regs, const char *);
                   4011:       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
                   4012: 
                   4013:       if (!(regstart && regend && old_regstart && old_regend && reg_info
                   4014:             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
                   4015:         {
                   4016:           FREE_VARIABLES ();
                   4017:           return -2;
                   4018:         }
                   4019:     }
                   4020:   else
                   4021:     {
                   4022:       /* We must initialize all our variables to NULL, so that
                   4023:          `FREE_VARIABLES' doesn't try to free them.  */
                   4024:       regstart = regend = old_regstart = old_regend = best_regstart
                   4025:         = best_regend = reg_dummy = NULL;
                   4026:       reg_info = reg_info_dummy = (register_info_type *) NULL;
                   4027:     }
                   4028: #endif /* MATCH_MAY_ALLOCATE */
                   4029: 
                   4030:   /* The starting position is bogus.  */
                   4031:   if (pos < 0 || pos > size1 + size2)
                   4032:     {
                   4033:       FREE_VARIABLES ();
                   4034:       return -1;
                   4035:     }
                   4036: 
                   4037:   /* Initialize subexpression text positions to -1 to mark ones that no
                   4038:      start_memory/stop_memory has been seen for. Also initialize the
                   4039:      register information struct.  */
                   4040:   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
                   4041:     {
                   4042:       regstart[mcnt] = regend[mcnt]
                   4043:         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
                   4044: 
                   4045:       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
                   4046:       IS_ACTIVE (reg_info[mcnt]) = 0;
                   4047:       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
                   4048:       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
                   4049:     }
                   4050: 
                   4051:   /* We move `string1' into `string2' if the latter's empty -- but not if
                   4052:      `string1' is null.  */
                   4053:   if (size2 == 0 && string1 != NULL)
                   4054:     {
                   4055:       string2 = string1;
                   4056:       size2 = size1;
                   4057:       string1 = 0;
                   4058:       size1 = 0;
                   4059:     }
                   4060:   end1 = string1 + size1;
                   4061:   end2 = string2 + size2;
                   4062: 
                   4063:   /* Compute where to stop matching, within the two strings.  */
                   4064:   if (stop <= size1)
                   4065:     {
                   4066:       end_match_1 = string1 + stop;
                   4067:       end_match_2 = string2;
                   4068:     }
                   4069:   else
                   4070:     {
                   4071:       end_match_1 = end1;
                   4072:       end_match_2 = string2 + stop - size1;
                   4073:     }
                   4074: 
                   4075:   /* `p' scans through the pattern as `d' scans through the data.
                   4076:      `dend' is the end of the input string that `d' points within.  `d'
                   4077:      is advanced into the following input string whenever necessary, but
                   4078:      this happens before fetching; therefore, at the beginning of the
                   4079:      loop, `d' can be pointing at the end of a string, but it cannot
                   4080:      equal `string2'.  */
                   4081:   if (size1 > 0 && pos <= size1)
                   4082:     {
                   4083:       d = string1 + pos;
                   4084:       dend = end_match_1;
                   4085:     }
                   4086:   else
                   4087:     {
                   4088:       d = string2 + pos - size1;
                   4089:       dend = end_match_2;
                   4090:     }
                   4091: 
                   4092:   DEBUG_PRINT1 ("The compiled pattern is:\n");
                   4093:   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
                   4094:   DEBUG_PRINT1 ("The string to match is: `");
                   4095:   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
                   4096:   DEBUG_PRINT1 ("'\n");
                   4097: 
                   4098:   /* This loops over pattern commands.  It exits by returning from the
                   4099:      function if the match is complete, or it drops through if the match
                   4100:      fails at this starting point in the input data.  */
                   4101:   for (;;)
                   4102:     {
                   4103: #ifdef _LIBC
                   4104:       DEBUG_PRINT2 ("\n%p: ", p);
                   4105: #else
                   4106:       DEBUG_PRINT2 ("\n0x%x: ", p);
                   4107: #endif
                   4108: 
                   4109:       if (p == pend)
                   4110:        { /* End of pattern means we might have succeeded.  */
                   4111:           DEBUG_PRINT1 ("end of pattern ... ");
                   4112: 
                   4113:          /* If we haven't matched the entire string, and we want the
                   4114:              longest match, try backtracking.  */
                   4115:           if (d != end_match_2)
                   4116:            {
                   4117:              /* 1 if this match ends in the same string (string1 or string2)
                   4118:                 as the best previous match.  */
                   4119:              boolean same_str_p = (FIRST_STRING_P (match_end)
                   4120:                                    == MATCHING_IN_FIRST_STRING);
                   4121:              /* 1 if this match is the best seen so far.  */
                   4122:              boolean best_match_p;
                   4123: 
                   4124:              /* AIX compiler got confused when this was combined
                   4125:                 with the previous declaration.  */
                   4126:              if (same_str_p)
                   4127:                best_match_p = d > match_end;
                   4128:              else
                   4129:                best_match_p = !MATCHING_IN_FIRST_STRING;
                   4130: 
                   4131:               DEBUG_PRINT1 ("backtracking.\n");
                   4132: 
                   4133:               if (!FAIL_STACK_EMPTY ())
                   4134:                 { /* More failure points to try.  */
                   4135: 
                   4136:                   /* If exceeds best match so far, save it.  */
                   4137:                   if (!best_regs_set || best_match_p)
                   4138:                     {
                   4139:                       best_regs_set = true;
                   4140:                       match_end = d;
                   4141: 
                   4142:                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
                   4143: 
                   4144:                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
                   4145:                         {
                   4146:                           best_regstart[mcnt] = regstart[mcnt];
                   4147:                           best_regend[mcnt] = regend[mcnt];
                   4148:                         }
                   4149:                     }
                   4150:                   goto fail;
                   4151:                 }
                   4152: 
                   4153:               /* If no failure points, don't restore garbage.  And if
                   4154:                  last match is real best match, don't restore second
                   4155:                  best one. */
                   4156:               else if (best_regs_set && !best_match_p)
                   4157:                 {
                   4158:                restore_best_regs:
                   4159:                   /* Restore best match.  It may happen that `dend ==
                   4160:                      end_match_1' while the restored d is in string2.
                   4161:                      For example, the pattern `x.*y.*z' against the
                   4162:                      strings `x-' and `y-z-', if the two strings are
                   4163:                      not consecutive in memory.  */
                   4164:                   DEBUG_PRINT1 ("Restoring best registers.\n");
                   4165: 
                   4166:                   d = match_end;
                   4167:                   dend = ((d >= string1 && d <= end1)
                   4168:                           ? end_match_1 : end_match_2);
                   4169: 
                   4170:                  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
                   4171:                    {
                   4172:                      regstart[mcnt] = best_regstart[mcnt];
                   4173:                      regend[mcnt] = best_regend[mcnt];
                   4174:                    }
                   4175:                 }
                   4176:             } /* d != end_match_2 */
                   4177: 
                   4178:        succeed_label:
                   4179:           DEBUG_PRINT1 ("Accepting match.\n");
                   4180: 
                   4181:           /* If caller wants register contents data back, do it.  */
                   4182:           if (regs && !bufp->no_sub)
                   4183:            {
                   4184:               /* Have the register data arrays been allocated?  */
                   4185:               if (bufp->regs_allocated == REGS_UNALLOCATED)
                   4186:                 { /* No.  So allocate them with malloc.  We need one
                   4187:                      extra element beyond `num_regs' for the `-1' marker
                   4188:                      GNU code uses.  */
                   4189:                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
                   4190:                   regs->start = TALLOC (regs->num_regs, regoff_t);
                   4191:                   regs->end = TALLOC (regs->num_regs, regoff_t);
                   4192:                   if (regs->start == NULL || regs->end == NULL)
                   4193:                    {
                   4194:                      FREE_VARIABLES ();
                   4195:                      return -2;
                   4196:                    }
                   4197:                   bufp->regs_allocated = REGS_REALLOCATE;
                   4198:                 }
                   4199:               else if (bufp->regs_allocated == REGS_REALLOCATE)
                   4200:                 { /* Yes.  If we need more elements than were already
                   4201:                      allocated, reallocate them.  If we need fewer, just
                   4202:                      leave it alone.  */
                   4203:                   if (regs->num_regs < num_regs + 1)
                   4204:                     {
                   4205:                       regs->num_regs = num_regs + 1;
                   4206:                       RETALLOC (regs->start, regs->num_regs, regoff_t);
                   4207:                       RETALLOC (regs->end, regs->num_regs, regoff_t);
                   4208:                       if (regs->start == NULL || regs->end == NULL)
                   4209:                        {
                   4210:                          FREE_VARIABLES ();
                   4211:                          return -2;
                   4212:                        }
                   4213:                     }
                   4214:                 }
                   4215:               else
                   4216:                {
                   4217:                  /* These braces fend off a "empty body in an else-statement"
                   4218:                     warning under GCC when assert expands to nothing.  */
                   4219:                  assert (bufp->regs_allocated == REGS_FIXED);
                   4220:                }
                   4221: 
                   4222:               /* Convert the pointer data in `regstart' and `regend' to
                   4223:                  indices.  Register zero has to be set differently,
                   4224:                  since we haven't kept track of any info for it.  */
                   4225:               if (regs->num_regs > 0)
                   4226:                 {
                   4227:                   regs->start[0] = pos;
                   4228:                   regs->end[0] = (MATCHING_IN_FIRST_STRING
                   4229:                                  ? ((regoff_t) (d - string1))
                   4230:                                  : ((regoff_t) (d - string2 + size1)));
                   4231:                 }
                   4232: 
                   4233:               /* Go through the first `min (num_regs, regs->num_regs)'
                   4234:                  registers, since that is all we initialized.  */
                   4235:              for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
                   4236:                   mcnt++)
                   4237:                {
                   4238:                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
                   4239:                     regs->start[mcnt] = regs->end[mcnt] = -1;
                   4240:                   else
                   4241:                     {
                   4242:                      regs->start[mcnt]
                   4243:                        = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
                   4244:                       regs->end[mcnt]
                   4245:                        = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
                   4246:                     }
                   4247:                }
                   4248: 
                   4249:               /* If the regs structure we return has more elements than
                   4250:                  were in the pattern, set the extra elements to -1.  If
                   4251:                  we (re)allocated the registers, this is the case,
                   4252:                  because we always allocate enough to have at least one
                   4253:                  -1 at the end.  */
                   4254:               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
                   4255:                 regs->start[mcnt] = regs->end[mcnt] = -1;
                   4256:            } /* regs && !bufp->no_sub */
                   4257: 
                   4258:           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
                   4259:                         nfailure_points_pushed, nfailure_points_popped,
                   4260:                         nfailure_points_pushed - nfailure_points_popped);
                   4261:           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
                   4262: 
                   4263:           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
                   4264:                            ? string1
                   4265:                            : string2 - size1);
                   4266: 
                   4267:           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
                   4268: 
                   4269:           FREE_VARIABLES ();
                   4270:           return mcnt;
                   4271:         }
                   4272: 
                   4273:       /* Otherwise match next pattern command.  */
                   4274:       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
                   4275:        {
                   4276:         /* Ignore these.  Used to ignore the n of succeed_n's which
                   4277:            currently have n == 0.  */
                   4278:         case no_op:
                   4279:           DEBUG_PRINT1 ("EXECUTING no_op.\n");
                   4280:           break;
                   4281: 
                   4282:        case succeed:
                   4283:           DEBUG_PRINT1 ("EXECUTING succeed.\n");
                   4284:          goto succeed_label;
                   4285: 
                   4286:         /* Match the next n pattern characters exactly.  The following
                   4287:            byte in the pattern defines n, and the n bytes after that
                   4288:            are the characters to match.  */
                   4289:        case exactn:
                   4290:          mcnt = *p++;
                   4291:           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
                   4292: 
                   4293:           /* This is written out as an if-else so we don't waste time
                   4294:              testing `translate' inside the loop.  */
                   4295:           if (translate)
                   4296:            {
                   4297:              do
                   4298:                {
                   4299:                  PREFETCH ();
                   4300:                  if ((unsigned char) translate[(unsigned char) *d++]
                   4301:                      != (unsigned char) *p++)
                   4302:                     goto fail;
                   4303:                }
                   4304:              while (--mcnt);
                   4305:            }
                   4306:          else
                   4307:            {
                   4308:              do
                   4309:                {
                   4310:                  PREFETCH ();
                   4311:                  if (*d++ != (char) *p++) goto fail;
                   4312:                }
                   4313:              while (--mcnt);
                   4314:            }
                   4315:          SET_REGS_MATCHED ();
                   4316:           break;
                   4317: 
                   4318: 
                   4319:         /* Match any character except possibly a newline or a null.  */
                   4320:        case anychar:
                   4321:           DEBUG_PRINT1 ("EXECUTING anychar.\n");
                   4322: 
                   4323:           PREFETCH ();
                   4324: 
                   4325:           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
                   4326:               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
                   4327:            goto fail;
                   4328: 
                   4329:           SET_REGS_MATCHED ();
                   4330:           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
                   4331:           d++;
                   4332:          break;
                   4333: 
                   4334: 
                   4335:        case charset:
                   4336:        case charset_not:
                   4337:          {
                   4338:            register unsigned char c;
                   4339:            boolean not = (re_opcode_t) *(p - 1) == charset_not;
                   4340: 
                   4341:             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
                   4342: 
                   4343:            PREFETCH ();
                   4344:            c = TRANSLATE (*d); /* The character to match.  */
                   4345: 
                   4346:             /* Cast to `unsigned' instead of `unsigned char' in case the
                   4347:                bit list is a full 32 bytes long.  */
                   4348:            if (c < (unsigned) (*p * BYTEWIDTH)
                   4349:                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                   4350:              not = !not;
                   4351: 
                   4352:            p += 1 + *p;
                   4353: 
                   4354:            if (!not) goto fail;
                   4355: 
                   4356:            SET_REGS_MATCHED ();
                   4357:             d++;
                   4358:            break;
                   4359:          }
                   4360: 
                   4361: 
                   4362:         /* The beginning of a group is represented by start_memory.
                   4363:            The arguments are the register number in the next byte, and the
                   4364:            number of groups inner to this one in the next.  The text
                   4365:            matched within the group is recorded (in the internal
                   4366:            registers data structure) under the register number.  */
                   4367:         case start_memory:
                   4368:          DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
                   4369: 
                   4370:           /* Find out if this group can match the empty string.  */
                   4371:          p1 = p;               /* To send to group_match_null_string_p.  */
                   4372: 
                   4373:           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
                   4374:             REG_MATCH_NULL_STRING_P (reg_info[*p])
                   4375:               = group_match_null_string_p (&p1, pend, reg_info);
                   4376: 
                   4377:           /* Save the position in the string where we were the last time
                   4378:              we were at this open-group operator in case the group is
                   4379:              operated upon by a repetition operator, e.g., with `(a*)*b'
                   4380:              against `ab'; then we want to ignore where we are now in
                   4381:              the string in case this attempt to match fails.  */
                   4382:           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
                   4383:                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
                   4384:                              : regstart[*p];
                   4385:          DEBUG_PRINT2 ("  old_regstart: %d\n",
                   4386:                         POINTER_TO_OFFSET (old_regstart[*p]));
                   4387: 
                   4388:           regstart[*p] = d;
                   4389:          DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
                   4390: 
                   4391:           IS_ACTIVE (reg_info[*p]) = 1;
                   4392:           MATCHED_SOMETHING (reg_info[*p]) = 0;
                   4393: 
                   4394:          /* Clear this whenever we change the register activity status.  */
                   4395:          set_regs_matched_done = 0;
                   4396: 
                   4397:           /* This is the new highest active register.  */
                   4398:           highest_active_reg = *p;
                   4399: 
                   4400:           /* If nothing was active before, this is the new lowest active
                   4401:              register.  */
                   4402:           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
                   4403:             lowest_active_reg = *p;
                   4404: 
                   4405:           /* Move past the register number and inner group count.  */
                   4406:           p += 2;
                   4407:          just_past_start_mem = p;
                   4408: 
                   4409:           break;
                   4410: 
                   4411: 
                   4412:         /* The stop_memory opcode represents the end of a group.  Its
                   4413:            arguments are the same as start_memory's: the register
                   4414:            number, and the number of inner groups.  */
                   4415:        case stop_memory:
                   4416:          DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
                   4417: 
                   4418:           /* We need to save the string position the last time we were at
                   4419:              this close-group operator in case the group is operated
                   4420:              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
                   4421:              against `aba'; then we want to ignore where we are now in
                   4422:              the string in case this attempt to match fails.  */
                   4423:           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
                   4424:                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
                   4425:                           : regend[*p];
                   4426:          DEBUG_PRINT2 ("      old_regend: %d\n",
                   4427:                         POINTER_TO_OFFSET (old_regend[*p]));
                   4428: 
                   4429:           regend[*p] = d;
                   4430:          DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
                   4431: 
                   4432:           /* This register isn't active anymore.  */
                   4433:           IS_ACTIVE (reg_info[*p]) = 0;
                   4434: 
                   4435:          /* Clear this whenever we change the register activity status.  */
                   4436:          set_regs_matched_done = 0;
                   4437: 
                   4438:           /* If this was the only register active, nothing is active
                   4439:              anymore.  */
                   4440:           if (lowest_active_reg == highest_active_reg)
                   4441:             {
                   4442:               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
                   4443:               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
                   4444:             }
                   4445:           else
                   4446:             { /* We must scan for the new highest active register, since
                   4447:                  it isn't necessarily one less than now: consider
                   4448:                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
                   4449:                  new highest active register is 1.  */
                   4450:               unsigned char r = *p - 1;
                   4451:               while (r > 0 && !IS_ACTIVE (reg_info[r]))
                   4452:                 r--;
                   4453: 
                   4454:               /* If we end up at register zero, that means that we saved
                   4455:                  the registers as the result of an `on_failure_jump', not
                   4456:                  a `start_memory', and we jumped to past the innermost
                   4457:                  `stop_memory'.  For example, in ((.)*) we save
                   4458:                  registers 1 and 2 as a result of the *, but when we pop
                   4459:                  back to the second ), we are at the stop_memory 1.
                   4460:                  Thus, nothing is active.  */
                   4461:              if (r == 0)
                   4462:                 {
                   4463:                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
                   4464:                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
                   4465:                 }
                   4466:               else
                   4467:                 highest_active_reg = r;
                   4468:             }
                   4469: 
                   4470:           /* If just failed to match something this time around with a
                   4471:              group that's operated on by a repetition operator, try to
                   4472:              force exit from the ``loop'', and restore the register
                   4473:              information for this group that we had before trying this
                   4474:              last match.  */
                   4475:           if ((!MATCHED_SOMETHING (reg_info[*p])
                   4476:                || just_past_start_mem == p - 1)
                   4477:              && (p + 2) < pend)
                   4478:             {
                   4479:               boolean is_a_jump_n = false;
                   4480: 
                   4481:               p1 = p + 2;
                   4482:               mcnt = 0;
                   4483:               switch ((re_opcode_t) *p1++)
                   4484:                 {
                   4485:                   case jump_n:
                   4486:                    is_a_jump_n = true;
                   4487:                   case pop_failure_jump:
                   4488:                  case maybe_pop_jump:
                   4489:                  case jump:
                   4490:                  case dummy_failure_jump:
                   4491:                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
                   4492:                    if (is_a_jump_n)
                   4493:                      p1 += 2;
                   4494:                     break;
                   4495: 
                   4496:                   default:
                   4497:                     /* do nothing */ ;
                   4498:                 }
                   4499:              p1 += mcnt;
                   4500: 
                   4501:               /* If the next operation is a jump backwards in the pattern
                   4502:                 to an on_failure_jump right before the start_memory
                   4503:                  corresponding to this stop_memory, exit from the loop
                   4504:                  by forcing a failure after pushing on the stack the
                   4505:                  on_failure_jump's jump in the pattern, and d.  */
                   4506:               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
                   4507:                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
                   4508:                {
                   4509:                   /* If this group ever matched anything, then restore
                   4510:                      what its registers were before trying this last
                   4511:                      failed match, e.g., with `(a*)*b' against `ab' for
                   4512:                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
                   4513:                      against `aba' for regend[3].
                   4514: 
                   4515:                      Also restore the registers for inner groups for,
                   4516:                      e.g., `((a*)(b*))*' against `aba' (register 3 would
                   4517:                      otherwise get trashed).  */
                   4518: 
                   4519:                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
                   4520:                    {
                   4521:                      unsigned r;
                   4522: 
                   4523:                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
                   4524: 
                   4525:                      /* Restore this and inner groups' (if any) registers.  */
                   4526:                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
                   4527:                           r++)
                   4528:                         {
                   4529:                           regstart[r] = old_regstart[r];
                   4530: 
                   4531:                           /* xx why this test?  */
                   4532:                           if (old_regend[r] >= regstart[r])
                   4533:                             regend[r] = old_regend[r];
                   4534:                         }
                   4535:                     }
                   4536:                  p1++;
                   4537:                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
                   4538:                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
                   4539: 
                   4540:                   goto fail;
                   4541:                 }
                   4542:             }
                   4543: 
                   4544:           /* Move past the register number and the inner group count.  */
                   4545:           p += 2;
                   4546:           break;
                   4547: 
                   4548: 
                   4549:        /* \<digit> has been turned into a `duplicate' command which is
                   4550:            followed by the numeric value of <digit> as the register number.  */
                   4551:         case duplicate:
                   4552:          {
                   4553:            register const char *d2, *dend2;
                   4554:            int regno = *p++;   /* Get which register to match against.  */
                   4555:            DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
                   4556: 
                   4557:            /* Can't back reference a group which we've never matched.  */
                   4558:             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
                   4559:               goto fail;
                   4560: 
                   4561:             /* Where in input to try to start matching.  */
                   4562:             d2 = regstart[regno];
                   4563: 
                   4564:             /* Where to stop matching; if both the place to start and
                   4565:                the place to stop matching are in the same string, then
                   4566:                set to the place to stop, otherwise, for now have to use
                   4567:                the end of the first string.  */
                   4568: 
                   4569:             dend2 = ((FIRST_STRING_P (regstart[regno])
                   4570:                      == FIRST_STRING_P (regend[regno]))
                   4571:                     ? regend[regno] : end_match_1);
                   4572:            for (;;)
                   4573:              {
                   4574:                /* If necessary, advance to next segment in register
                   4575:                    contents.  */
                   4576:                while (d2 == dend2)
                   4577:                  {
                   4578:                    if (dend2 == end_match_2) break;
                   4579:                    if (dend2 == regend[regno]) break;
                   4580: 
                   4581:                     /* End of string1 => advance to string2. */
                   4582:                     d2 = string2;
                   4583:                     dend2 = regend[regno];
                   4584:                  }
                   4585:                /* At end of register contents => success */
                   4586:                if (d2 == dend2) break;
                   4587: 
                   4588:                /* If necessary, advance to next segment in data.  */
                   4589:                PREFETCH ();
                   4590: 
                   4591:                /* How many characters left in this segment to match.  */
                   4592:                mcnt = dend - d;
                   4593: 
                   4594:                /* Want how many consecutive characters we can match in
                   4595:                    one shot, so, if necessary, adjust the count.  */
                   4596:                 if (mcnt > dend2 - d2)
                   4597:                  mcnt = dend2 - d2;
                   4598: 
                   4599:                /* Compare that many; failure if mismatch, else move
                   4600:                    past them.  */
                   4601:                if (translate
                   4602:                     ? bcmp_translate (d, d2, mcnt, translate)
                   4603:                     : memcmp (d, d2, mcnt))
                   4604:                  goto fail;
                   4605:                d += mcnt, d2 += mcnt;
                   4606: 
                   4607:                /* Do this because we've match some characters.  */
                   4608:                SET_REGS_MATCHED ();
                   4609:              }
                   4610:          }
                   4611:          break;
                   4612: 
                   4613: 
                   4614:         /* begline matches the empty string at the beginning of the string
                   4615:            (unless `not_bol' is set in `bufp'), and, if
                   4616:            `newline_anchor' is set, after newlines.  */
                   4617:        case begline:
                   4618:           DEBUG_PRINT1 ("EXECUTING begline.\n");
                   4619: 
                   4620:           if (AT_STRINGS_BEG (d))
                   4621:             {
                   4622:               if (!bufp->not_bol) break;
                   4623:             }
                   4624:           else if (d[-1] == '\n' && bufp->newline_anchor)
                   4625:             {
                   4626:               break;
                   4627:             }
                   4628:           /* In all other cases, we fail.  */
                   4629:           goto fail;
                   4630: 
                   4631: 
                   4632:         /* endline is the dual of begline.  */
                   4633:        case endline:
                   4634:           DEBUG_PRINT1 ("EXECUTING endline.\n");
                   4635: 
                   4636:           if (AT_STRINGS_END (d))
                   4637:             {
                   4638:               if (!bufp->not_eol) break;
                   4639:             }
                   4640: 
                   4641:           /* We have to ``prefetch'' the next character.  */
                   4642:           else if ((d == end1 ? *string2 : *d) == '\n'
                   4643:                    && bufp->newline_anchor)
                   4644:             {
                   4645:               break;
                   4646:             }
                   4647:           goto fail;
                   4648: 
                   4649: 
                   4650:        /* Match at the very beginning of the data.  */
                   4651:         case begbuf:
                   4652:           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
                   4653:           if (AT_STRINGS_BEG (d))
                   4654:             break;
                   4655:           goto fail;
                   4656: 
                   4657: 
                   4658:        /* Match at the very end of the data.  */
                   4659:         case endbuf:
                   4660:           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
                   4661:          if (AT_STRINGS_END (d))
                   4662:            break;
                   4663:           goto fail;
                   4664: 
                   4665: 
                   4666:         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
                   4667:            pushes NULL as the value for the string on the stack.  Then
                   4668:            `pop_failure_point' will keep the current value for the
                   4669:            string, instead of restoring it.  To see why, consider
                   4670:            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
                   4671:            then the . fails against the \n.  But the next thing we want
                   4672:            to do is match the \n against the \n; if we restored the
                   4673:            string value, we would be back at the foo.
                   4674: 
                   4675:            Because this is used only in specific cases, we don't need to
                   4676:            check all the things that `on_failure_jump' does, to make
                   4677:            sure the right things get saved on the stack.  Hence we don't
                   4678:            share its code.  The only reason to push anything on the
                   4679:            stack at all is that otherwise we would have to change
                   4680:            `anychar's code to do something besides goto fail in this
                   4681:            case; that seems worse than this.  */
                   4682:         case on_failure_keep_string_jump:
                   4683:           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
                   4684: 
                   4685:           EXTRACT_NUMBER_AND_INCR (mcnt, p);
                   4686: #ifdef _LIBC
                   4687:           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
                   4688: #else
                   4689:           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
                   4690: #endif
                   4691: 
                   4692:           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
                   4693:           break;
                   4694: 
                   4695: 
                   4696:        /* Uses of on_failure_jump:
                   4697: 
                   4698:            Each alternative starts with an on_failure_jump that points
                   4699:            to the beginning of the next alternative.  Each alternative
                   4700:            except the last ends with a jump that in effect jumps past
                   4701:            the rest of the alternatives.  (They really jump to the
                   4702:            ending jump of the following alternative, because tensioning
                   4703:            these jumps is a hassle.)
                   4704: 
                   4705:            Repeats start with an on_failure_jump that points past both
                   4706:            the repetition text and either the following jump or
                   4707:            pop_failure_jump back to this on_failure_jump.  */
                   4708:        case on_failure_jump:
                   4709:         on_failure:
                   4710:           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
                   4711: 
                   4712:           EXTRACT_NUMBER_AND_INCR (mcnt, p);
                   4713: #ifdef _LIBC
                   4714:           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
                   4715: #else
                   4716:           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
                   4717: #endif
                   4718: 
                   4719:           /* If this on_failure_jump comes right before a group (i.e.,
                   4720:              the original * applied to a group), save the information
                   4721:              for that group and all inner ones, so that if we fail back
                   4722:              to this point, the group's information will be correct.
                   4723:              For example, in \(a*\)*\1, we need the preceding group,
                   4724:              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
                   4725: 
                   4726:           /* We can't use `p' to check ahead because we push
                   4727:              a failure point to `p + mcnt' after we do this.  */
                   4728:           p1 = p;
                   4729: 
                   4730:           /* We need to skip no_op's before we look for the
                   4731:              start_memory in case this on_failure_jump is happening as
                   4732:              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
                   4733:              against aba.  */
                   4734:           while (p1 < pend && (re_opcode_t) *p1 == no_op)
                   4735:             p1++;
                   4736: 
                   4737:           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
                   4738:             {
                   4739:               /* We have a new highest active register now.  This will
                   4740:                  get reset at the start_memory we are about to get to,
                   4741:                  but we will have saved all the registers relevant to
                   4742:                  this repetition op, as described above.  */
                   4743:               highest_active_reg = *(p1 + 1) + *(p1 + 2);
                   4744:               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
                   4745:                 lowest_active_reg = *(p1 + 1);
                   4746:             }
                   4747: 
                   4748:           DEBUG_PRINT1 (":\n");
                   4749:           PUSH_FAILURE_POINT (p + mcnt, d, -2);
                   4750:           break;
                   4751: 
                   4752: 
                   4753:         /* A smart repeat ends with `maybe_pop_jump'.
                   4754:           We change it to either `pop_failure_jump' or `jump'.  */
                   4755:         case maybe_pop_jump:
                   4756:           EXTRACT_NUMBER_AND_INCR (mcnt, p);
                   4757:           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
                   4758:           {
                   4759:            register unsigned char *p2 = p;
                   4760: 
                   4761:             /* Compare the beginning of the repeat with what in the
                   4762:                pattern follows its end. If we can establish that there
                   4763:                is nothing that they would both match, i.e., that we
                   4764:                would have to backtrack because of (as in, e.g., `a*a')
                   4765:                then we can change to pop_failure_jump, because we'll
                   4766:                never have to backtrack.
                   4767: 
                   4768:                This is not true in the case of alternatives: in
                   4769:                `(a|ab)*' we do need to backtrack to the `ab' alternative
                   4770:                (e.g., if the string was `ab').  But instead of trying to
                   4771:                detect that here, the alternative has put on a dummy
                   4772:                failure point which is what we will end up popping.  */
                   4773: 
                   4774:            /* Skip over open/close-group commands.
                   4775:               If what follows this loop is a ...+ construct,
                   4776:               look at what begins its body, since we will have to
                   4777:               match at least one of that.  */
                   4778:            while (1)
                   4779:              {
                   4780:                if (p2 + 2 < pend
                   4781:                    && ((re_opcode_t) *p2 == stop_memory
                   4782:                        || (re_opcode_t) *p2 == start_memory))
                   4783:                  p2 += 3;
                   4784:                else if (p2 + 6 < pend
                   4785:                         && (re_opcode_t) *p2 == dummy_failure_jump)
                   4786:                  p2 += 6;
                   4787:                else
                   4788:                  break;
                   4789:              }
                   4790: 
                   4791:            p1 = p + mcnt;
                   4792:            /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
                   4793:               to the `maybe_finalize_jump' of this case.  Examine what
                   4794:               follows.  */
                   4795: 
                   4796:             /* If we're at the end of the pattern, we can change.  */
                   4797:             if (p2 == pend)
                   4798:              {
                   4799:                /* Consider what happens when matching ":\(.*\)"
                   4800:                   against ":/".  I don't really understand this code
                   4801:                   yet.  */
                   4802:                p[-3] = (unsigned char) pop_failure_jump;
                   4803:                 DEBUG_PRINT1
                   4804:                   ("  End of pattern: change to `pop_failure_jump'.\n");
                   4805:               }
                   4806: 
                   4807:             else if ((re_opcode_t) *p2 == exactn
                   4808:                     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
                   4809:              {
                   4810:                register unsigned char c
                   4811:                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
                   4812: 
                   4813:                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
                   4814:                   {
                   4815:                    p[-3] = (unsigned char) pop_failure_jump;
                   4816:                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
                   4817:                                   c, p1[5]);
                   4818:                   }
                   4819: 
                   4820:                else if ((re_opcode_t) p1[3] == charset
                   4821:                         || (re_opcode_t) p1[3] == charset_not)
                   4822:                  {
                   4823:                    int not = (re_opcode_t) p1[3] == charset_not;
                   4824: 
                   4825:                    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
                   4826:                        && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                   4827:                      not = !not;
                   4828: 
                   4829:                     /* `not' is equal to 1 if c would match, which means
                   4830:                         that we can't change to pop_failure_jump.  */
                   4831:                    if (!not)
                   4832:                       {
                   4833:                        p[-3] = (unsigned char) pop_failure_jump;
                   4834:                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
                   4835:                       }
                   4836:                  }
                   4837:              }
                   4838:             else if ((re_opcode_t) *p2 == charset)
                   4839:              {
                   4840: #ifdef DEBUG
                   4841:                register unsigned char c
                   4842:                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
                   4843: #endif
                   4844: 
                   4845: #if 0
                   4846:                 if ((re_opcode_t) p1[3] == exactn
                   4847:                    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
                   4848:                          && (p2[2 + p1[5] / BYTEWIDTH]
                   4849:                              & (1 << (p1[5] % BYTEWIDTH)))))
                   4850: #else
                   4851:                 if ((re_opcode_t) p1[3] == exactn
                   4852:                    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
                   4853:                          && (p2[2 + p1[4] / BYTEWIDTH]
                   4854:                              & (1 << (p1[4] % BYTEWIDTH)))))
                   4855: #endif
                   4856:                   {
                   4857:                    p[-3] = (unsigned char) pop_failure_jump;
                   4858:                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
                   4859:                                   c, p1[5]);
                   4860:                   }
                   4861: 
                   4862:                else if ((re_opcode_t) p1[3] == charset_not)
                   4863:                  {
                   4864:                    int idx;
                   4865:                    /* We win if the charset_not inside the loop
                   4866:                       lists every character listed in the charset after.  */
                   4867:                    for (idx = 0; idx < (int) p2[1]; idx++)
                   4868:                      if (! (p2[2 + idx] == 0
                   4869:                             || (idx < (int) p1[4]
                   4870:                                 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
                   4871:                        break;
                   4872: 
                   4873:                    if (idx == p2[1])
                   4874:                       {
                   4875:                        p[-3] = (unsigned char) pop_failure_jump;
                   4876:                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
                   4877:                       }
                   4878:                  }
                   4879:                else if ((re_opcode_t) p1[3] == charset)
                   4880:                  {
                   4881:                    int idx;
                   4882:                    /* We win if the charset inside the loop
                   4883:                       has no overlap with the one after the loop.  */
                   4884:                    for (idx = 0;
                   4885:                         idx < (int) p2[1] && idx < (int) p1[4];
                   4886:                         idx++)
                   4887:                      if ((p2[2 + idx] & p1[5 + idx]) != 0)
                   4888:                        break;
                   4889: 
                   4890:                    if (idx == p2[1] || idx == p1[4])
                   4891:                       {
                   4892:                        p[-3] = (unsigned char) pop_failure_jump;
                   4893:                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
                   4894:                       }
                   4895:                  }
                   4896:              }
                   4897:          }
                   4898:          p -= 2;               /* Point at relative address again.  */
                   4899:          if ((re_opcode_t) p[-1] != pop_failure_jump)
                   4900:            {
                   4901:              p[-1] = (unsigned char) jump;
                   4902:               DEBUG_PRINT1 ("  Match => jump.\n");
                   4903:              goto unconditional_jump;
                   4904:            }
                   4905:         /* Note fall through.  */
                   4906: 
                   4907: 
                   4908:        /* The end of a simple repeat has a pop_failure_jump back to
                   4909:            its matching on_failure_jump, where the latter will push a
                   4910:            failure point.  The pop_failure_jump takes off failure
                   4911:            points put on by this pop_failure_jump's matching
                   4912:            on_failure_jump; we got through the pattern to here from the
                   4913:            matching on_failure_jump, so didn't fail.  */
                   4914:         case pop_failure_jump:
                   4915:           {
                   4916:             /* We need to pass separate storage for the lowest and
                   4917:                highest registers, even though we don't care about the
                   4918:                actual values.  Otherwise, we will restore only one
                   4919:                register from the stack, since lowest will == highest in
                   4920:                `pop_failure_point'.  */
                   4921:             active_reg_t dummy_low_reg, dummy_high_reg;
                   4922:             unsigned char *pdummy;
                   4923:             const char *sdummy;
                   4924: 
                   4925:             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
                   4926:             POP_FAILURE_POINT (sdummy, pdummy,
                   4927:                                dummy_low_reg, dummy_high_reg,
                   4928:                                reg_dummy, reg_dummy, reg_info_dummy);
                   4929:           }
                   4930:          /* Note fall through.  */
                   4931: 
                   4932:        unconditional_jump:
                   4933: #ifdef _LIBC
                   4934:          DEBUG_PRINT2 ("\n%p: ", p);
                   4935: #else
                   4936:          DEBUG_PRINT2 ("\n0x%x: ", p);
                   4937: #endif
                   4938:           /* Note fall through.  */
                   4939: 
                   4940:         /* Unconditionally jump (without popping any failure points).  */
                   4941:         case jump:
                   4942:          EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
                   4943:           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
                   4944:          p += mcnt;                            /* Do the jump.  */
                   4945: #ifdef _LIBC
                   4946:           DEBUG_PRINT2 ("(to %p).\n", p);
                   4947: #else
                   4948:           DEBUG_PRINT2 ("(to 0x%x).\n", p);
                   4949: #endif
                   4950:          break;
                   4951: 
                   4952: 
                   4953:         /* We need this opcode so we can detect where alternatives end
                   4954:            in `group_match_null_string_p' et al.  */
                   4955:         case jump_past_alt:
                   4956:           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
                   4957:           goto unconditional_jump;
                   4958: 
                   4959: 
                   4960:         /* Normally, the on_failure_jump pushes a failure point, which
                   4961:            then gets popped at pop_failure_jump.  We will end up at
                   4962:            pop_failure_jump, also, and with a pattern of, say, `a+', we
                   4963:            are skipping over the on_failure_jump, so we have to push
                   4964:            something meaningless for pop_failure_jump to pop.  */
                   4965:         case dummy_failure_jump:
                   4966:           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
                   4967:           /* It doesn't matter what we push for the string here.  What
                   4968:              the code at `fail' tests is the value for the pattern.  */
                   4969:           PUSH_FAILURE_POINT (NULL, NULL, -2);
                   4970:           goto unconditional_jump;
                   4971: 
                   4972: 
                   4973:         /* At the end of an alternative, we need to push a dummy failure
                   4974:            point in case we are followed by a `pop_failure_jump', because
                   4975:            we don't want the failure point for the alternative to be
                   4976:            popped.  For example, matching `(a|ab)*' against `aab'
                   4977:            requires that we match the `ab' alternative.  */
                   4978:         case push_dummy_failure:
                   4979:           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
                   4980:           /* See comments just above at `dummy_failure_jump' about the
                   4981:              two zeroes.  */
                   4982:           PUSH_FAILURE_POINT (NULL, NULL, -2);
                   4983:           break;
                   4984: 
                   4985:         /* Have to succeed matching what follows at least n times.
                   4986:            After that, handle like `on_failure_jump'.  */
                   4987:         case succeed_n:
                   4988:           EXTRACT_NUMBER (mcnt, p + 2);
                   4989:           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
                   4990: 
                   4991:           assert (mcnt >= 0);
                   4992:           /* Originally, this is how many times we HAVE to succeed.  */
                   4993:           if (mcnt > 0)
                   4994:             {
                   4995:                mcnt--;
                   4996:               p += 2;
                   4997:                STORE_NUMBER_AND_INCR (p, mcnt);
                   4998: #ifdef _LIBC
                   4999:                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
                   5000: #else
                   5001:                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
                   5002: #endif
                   5003:             }
                   5004:          else if (mcnt == 0)
                   5005:             {
                   5006: #ifdef _LIBC
                   5007:               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
                   5008: #else
                   5009:               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
                   5010: #endif
                   5011:              p[2] = (unsigned char) no_op;
                   5012:               p[3] = (unsigned char) no_op;
                   5013:               goto on_failure;
                   5014:             }
                   5015:           break;
                   5016: 
                   5017:         case jump_n:
                   5018:           EXTRACT_NUMBER (mcnt, p + 2);
                   5019:           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
                   5020: 
                   5021:           /* Originally, this is how many times we CAN jump.  */
                   5022:           if (mcnt)
                   5023:             {
                   5024:                mcnt--;
                   5025:                STORE_NUMBER (p + 2, mcnt);
                   5026: #ifdef _LIBC
                   5027:                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
                   5028: #else
                   5029:                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
                   5030: #endif
                   5031:               goto unconditional_jump;
                   5032:             }
                   5033:           /* If don't have to jump any more, skip over the rest of command.  */
                   5034:          else
                   5035:            p += 4;
                   5036:           break;
                   5037: 
                   5038:        case set_number_at:
                   5039:          {
                   5040:             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
                   5041: 
                   5042:             EXTRACT_NUMBER_AND_INCR (mcnt, p);
                   5043:             p1 = p + mcnt;
                   5044:             EXTRACT_NUMBER_AND_INCR (mcnt, p);
                   5045: #ifdef _LIBC
                   5046:             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
                   5047: #else
                   5048:             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
                   5049: #endif
                   5050:            STORE_NUMBER (p1, mcnt);
                   5051:             break;
                   5052:           }
                   5053: 
                   5054: #if 0
                   5055:        /* The DEC Alpha C compiler 3.x generates incorrect code for the
                   5056:           test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
                   5057:           AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
                   5058:           macro and introducing temporary variables works around the bug.  */
                   5059: 
                   5060:        case wordbound:
                   5061:          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
                   5062:          if (AT_WORD_BOUNDARY (d))
                   5063:            break;
                   5064:          goto fail;
                   5065: 
                   5066:        case notwordbound:
                   5067:          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
                   5068:          if (AT_WORD_BOUNDARY (d))
                   5069:            goto fail;
                   5070:          break;
                   5071: #else
                   5072:        case wordbound:
                   5073:        {
                   5074:          boolean prevchar, thischar;
                   5075: 
                   5076:          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
                   5077:          if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
                   5078:            break;
                   5079: 
                   5080:          prevchar = WORDCHAR_P (d - 1);
                   5081:          thischar = WORDCHAR_P (d);
                   5082:          if (prevchar != thischar)
                   5083:            break;
                   5084:          goto fail;
                   5085:        }
                   5086: 
                   5087:       case notwordbound:
                   5088:        {
                   5089:          boolean prevchar, thischar;
                   5090: 
                   5091:          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
                   5092:          if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
                   5093:            goto fail;
                   5094: 
                   5095:          prevchar = WORDCHAR_P (d - 1);
                   5096:          thischar = WORDCHAR_P (d);
                   5097:          if (prevchar != thischar)
                   5098:            goto fail;
                   5099:          break;
                   5100:        }
                   5101: #endif
                   5102: 
                   5103:        case wordbeg:
                   5104:           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
                   5105:          if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
                   5106:            break;
                   5107:           goto fail;
                   5108: 
                   5109:        case wordend:
                   5110:           DEBUG_PRINT1 ("EXECUTING wordend.\n");
                   5111:          if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
                   5112:               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
                   5113:            break;
                   5114:           goto fail;
                   5115: 
                   5116: #ifdef emacs
                   5117:        case before_dot:
                   5118:           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
                   5119:          if (PTR_CHAR_POS ((unsigned char *) d) >= point)
                   5120:            goto fail;
                   5121:          break;
                   5122: 
                   5123:        case at_dot:
                   5124:           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
                   5125:          if (PTR_CHAR_POS ((unsigned char *) d) != point)
                   5126:            goto fail;
                   5127:          break;
                   5128: 
                   5129:        case after_dot:
                   5130:           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
                   5131:           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
                   5132:            goto fail;
                   5133:          break;
                   5134: 
                   5135:        case syntaxspec:
                   5136:           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
                   5137:          mcnt = *p++;
                   5138:          goto matchsyntax;
                   5139: 
                   5140:         case wordchar:
                   5141:           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
                   5142:          mcnt = (int) Sword;
                   5143:         matchsyntax:
                   5144:          PREFETCH ();
                   5145:          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
                   5146:          d++;
                   5147:          if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
                   5148:            goto fail;
                   5149:           SET_REGS_MATCHED ();
                   5150:          break;
                   5151: 
                   5152:        case notsyntaxspec:
                   5153:           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
                   5154:          mcnt = *p++;
                   5155:          goto matchnotsyntax;
                   5156: 
                   5157:         case notwordchar:
                   5158:           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
                   5159:          mcnt = (int) Sword;
                   5160:         matchnotsyntax:
                   5161:          PREFETCH ();
                   5162:          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
                   5163:          d++;
                   5164:          if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
                   5165:            goto fail;
                   5166:          SET_REGS_MATCHED ();
                   5167:           break;
                   5168: 
                   5169: #else /* not emacs */
                   5170:        case wordchar:
                   5171:           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
                   5172:          PREFETCH ();
                   5173:           if (!WORDCHAR_P (d))
                   5174:             goto fail;
                   5175:          SET_REGS_MATCHED ();
                   5176:           d++;
                   5177:          break;
                   5178: 
                   5179:        case notwordchar:
                   5180:           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
                   5181:          PREFETCH ();
                   5182:          if (WORDCHAR_P (d))
                   5183:             goto fail;
                   5184:           SET_REGS_MATCHED ();
                   5185:           d++;
                   5186:          break;
                   5187: #endif /* not emacs */
                   5188: 
                   5189:         default:
                   5190:           abort ();
                   5191:        }
                   5192:       continue;  /* Successfully executed one pattern command; keep going.  */
                   5193: 
                   5194: 
                   5195:     /* We goto here if a matching operation fails. */
                   5196:     fail:
                   5197:       if (!FAIL_STACK_EMPTY ())
                   5198:        { /* A restart point is known.  Restore to that state.  */
                   5199:           DEBUG_PRINT1 ("\nFAIL:\n");
                   5200:           POP_FAILURE_POINT (d, p,
                   5201:                              lowest_active_reg, highest_active_reg,
                   5202:                              regstart, regend, reg_info);
                   5203: 
                   5204:           /* If this failure point is a dummy, try the next one.  */
                   5205:           if (!p)
                   5206:            goto fail;
                   5207: 
                   5208:           /* If we failed to the end of the pattern, don't examine *p.  */
                   5209:          assert (p <= pend);
                   5210:           if (p < pend)
                   5211:             {
                   5212:               boolean is_a_jump_n = false;
                   5213: 
                   5214:               /* If failed to a backwards jump that's part of a repetition
                   5215:                  loop, need to pop this failure point and use the next one.  */
                   5216:               switch ((re_opcode_t) *p)
                   5217:                 {
                   5218:                 case jump_n:
                   5219:                   is_a_jump_n = true;
                   5220:                 case maybe_pop_jump:
                   5221:                 case pop_failure_jump:
                   5222:                 case jump:
                   5223:                   p1 = p + 1;
                   5224:                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
                   5225:                   p1 += mcnt;
                   5226: 
                   5227:                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
                   5228:                       || (!is_a_jump_n
                   5229:                           && (re_opcode_t) *p1 == on_failure_jump))
                   5230:                     goto fail;
                   5231:                   break;
                   5232:                 default:
                   5233:                   /* do nothing */ ;
                   5234:                 }
                   5235:             }
                   5236: 
                   5237:           if (d >= string1 && d <= end1)
                   5238:            dend = end_match_1;
                   5239:         }
                   5240:       else
                   5241:         break;   /* Matching at this starting point really fails.  */
                   5242:     } /* for (;;) */
                   5243: 
                   5244:   if (best_regs_set)
                   5245:     goto restore_best_regs;
                   5246: 
                   5247:   FREE_VARIABLES ();
                   5248: 
                   5249:   return -1;                           /* Failure to match.  */
                   5250: } /* re_match_2 */
1.1.1.2 ! misho    5251: 
1.1       misho    5252: /* Subroutine definitions for re_match_2.  */
                   5253: 
                   5254: 
                   5255: /* We are passed P pointing to a register number after a start_memory.
                   5256: 
                   5257:    Return true if the pattern up to the corresponding stop_memory can
                   5258:    match the empty string, and false otherwise.
                   5259: 
                   5260:    If we find the matching stop_memory, sets P to point to one past its number.
                   5261:    Otherwise, sets P to an undefined byte less than or equal to END.
                   5262: 
                   5263:    We don't handle duplicates properly (yet).  */
                   5264: 
                   5265: static boolean
                   5266: group_match_null_string_p (p, end, reg_info)
                   5267:     unsigned char **p, *end;
                   5268:     register_info_type *reg_info;
                   5269: {
                   5270:   int mcnt;
                   5271:   /* Point to after the args to the start_memory.  */
                   5272:   unsigned char *p1 = *p + 2;
                   5273: 
                   5274:   while (p1 < end)
                   5275:     {
                   5276:       /* Skip over opcodes that can match nothing, and return true or
                   5277:         false, as appropriate, when we get to one that can't, or to the
                   5278:          matching stop_memory.  */
                   5279: 
                   5280:       switch ((re_opcode_t) *p1)
                   5281:         {
                   5282:         /* Could be either a loop or a series of alternatives.  */
                   5283:         case on_failure_jump:
                   5284:           p1++;
                   5285:           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
                   5286: 
                   5287:           /* If the next operation is not a jump backwards in the
                   5288:             pattern.  */
                   5289: 
                   5290:          if (mcnt >= 0)
                   5291:            {
                   5292:               /* Go through the on_failure_jumps of the alternatives,
                   5293:                  seeing if any of the alternatives cannot match nothing.
                   5294:                  The last alternative starts with only a jump,
                   5295:                  whereas the rest start with on_failure_jump and end
                   5296:                  with a jump, e.g., here is the pattern for `a|b|c':
                   5297: 
                   5298:                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
                   5299:                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
                   5300:                  /exactn/1/c
                   5301: 
                   5302:                  So, we have to first go through the first (n-1)
                   5303:                  alternatives and then deal with the last one separately.  */
                   5304: 
                   5305: 
                   5306:               /* Deal with the first (n-1) alternatives, which start
                   5307:                  with an on_failure_jump (see above) that jumps to right
                   5308:                  past a jump_past_alt.  */
                   5309: 
                   5310:               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
                   5311:                 {
                   5312:                   /* `mcnt' holds how many bytes long the alternative
                   5313:                      is, including the ending `jump_past_alt' and
                   5314:                      its number.  */
                   5315: 
                   5316:                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
                   5317:                                                      reg_info))
                   5318:                     return false;
                   5319: 
                   5320:                   /* Move to right after this alternative, including the
                   5321:                     jump_past_alt.  */
                   5322:                   p1 += mcnt;
                   5323: 
                   5324:                   /* Break if it's the beginning of an n-th alternative
                   5325:                      that doesn't begin with an on_failure_jump.  */
                   5326:                   if ((re_opcode_t) *p1 != on_failure_jump)
                   5327:                     break;
                   5328: 
                   5329:                  /* Still have to check that it's not an n-th
                   5330:                     alternative that starts with an on_failure_jump.  */
                   5331:                  p1++;
                   5332:                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
                   5333:                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
                   5334:                     {
                   5335:                      /* Get to the beginning of the n-th alternative.  */
                   5336:                       p1 -= 3;
                   5337:                       break;
                   5338:                     }
                   5339:                 }
                   5340: 
                   5341:               /* Deal with the last alternative: go back and get number
                   5342:                  of the `jump_past_alt' just before it.  `mcnt' contains
                   5343:                  the length of the alternative.  */
                   5344:               EXTRACT_NUMBER (mcnt, p1 - 2);
                   5345: 
                   5346:               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
                   5347:                 return false;
                   5348: 
                   5349:               p1 += mcnt;      /* Get past the n-th alternative.  */
                   5350:             } /* if mcnt > 0 */
                   5351:           break;
                   5352: 
                   5353: 
                   5354:         case stop_memory:
                   5355:          assert (p1[1] == **p);
                   5356:           *p = p1 + 2;
                   5357:           return true;
                   5358: 
                   5359: 
                   5360:         default:
                   5361:           if (!common_op_match_null_string_p (&p1, end, reg_info))
                   5362:             return false;
                   5363:         }
                   5364:     } /* while p1 < end */
                   5365: 
                   5366:   return false;
                   5367: } /* group_match_null_string_p */
                   5368: 
                   5369: 
                   5370: /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
                   5371:    It expects P to be the first byte of a single alternative and END one
                   5372:    byte past the last. The alternative can contain groups.  */
                   5373: 
                   5374: static boolean
                   5375: alt_match_null_string_p (p, end, reg_info)
                   5376:     unsigned char *p, *end;
                   5377:     register_info_type *reg_info;
                   5378: {
                   5379:   int mcnt;
                   5380:   unsigned char *p1 = p;
                   5381: 
                   5382:   while (p1 < end)
                   5383:     {
                   5384:       /* Skip over opcodes that can match nothing, and break when we get
                   5385:          to one that can't.  */
                   5386: 
                   5387:       switch ((re_opcode_t) *p1)
                   5388:         {
                   5389:        /* It's a loop.  */
                   5390:         case on_failure_jump:
                   5391:           p1++;
                   5392:           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
                   5393:           p1 += mcnt;
                   5394:           break;
                   5395: 
                   5396:        default:
                   5397:           if (!common_op_match_null_string_p (&p1, end, reg_info))
                   5398:             return false;
                   5399:         }
                   5400:     }  /* while p1 < end */
                   5401: 
                   5402:   return true;
                   5403: } /* alt_match_null_string_p */
                   5404: 
                   5405: 
                   5406: /* Deals with the ops common to group_match_null_string_p and
                   5407:    alt_match_null_string_p.
                   5408: 
                   5409:    Sets P to one after the op and its arguments, if any.  */
                   5410: 
                   5411: static boolean
                   5412: common_op_match_null_string_p (p, end, reg_info)
                   5413:     unsigned char **p, *end;
                   5414:     register_info_type *reg_info;
                   5415: {
                   5416:   int mcnt;
                   5417:   boolean ret;
                   5418:   int reg_no;
                   5419:   unsigned char *p1 = *p;
                   5420: 
                   5421:   switch ((re_opcode_t) *p1++)
                   5422:     {
                   5423:     case no_op:
                   5424:     case begline:
                   5425:     case endline:
                   5426:     case begbuf:
                   5427:     case endbuf:
                   5428:     case wordbeg:
                   5429:     case wordend:
                   5430:     case wordbound:
                   5431:     case notwordbound:
                   5432: #ifdef emacs
                   5433:     case before_dot:
                   5434:     case at_dot:
                   5435:     case after_dot:
                   5436: #endif
                   5437:       break;
                   5438: 
                   5439:     case start_memory:
                   5440:       reg_no = *p1;
                   5441:       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
                   5442:       ret = group_match_null_string_p (&p1, end, reg_info);
                   5443: 
                   5444:       /* Have to set this here in case we're checking a group which
                   5445:          contains a group and a back reference to it.  */
                   5446: 
                   5447:       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
                   5448:         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
                   5449: 
                   5450:       if (!ret)
                   5451:         return false;
                   5452:       break;
                   5453: 
                   5454:     /* If this is an optimized succeed_n for zero times, make the jump.  */
                   5455:     case jump:
                   5456:       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
                   5457:       if (mcnt >= 0)
                   5458:         p1 += mcnt;
                   5459:       else
                   5460:         return false;
                   5461:       break;
                   5462: 
                   5463:     case succeed_n:
                   5464:       /* Get to the number of times to succeed.  */
                   5465:       p1 += 2;
                   5466:       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
                   5467: 
                   5468:       if (mcnt == 0)
                   5469:         {
                   5470:           p1 -= 4;
                   5471:           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
                   5472:           p1 += mcnt;
                   5473:         }
                   5474:       else
                   5475:         return false;
                   5476:       break;
                   5477: 
                   5478:     case duplicate:
                   5479:       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
                   5480:         return false;
                   5481:       break;
                   5482: 
                   5483:     case set_number_at:
                   5484:       p1 += 4;
                   5485: 
                   5486:     default:
                   5487:       /* All other opcodes mean we cannot match the empty string.  */
                   5488:       return false;
                   5489:   }
                   5490: 
                   5491:   *p = p1;
                   5492:   return true;
                   5493: } /* common_op_match_null_string_p */
                   5494: 
                   5495: 
                   5496: /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
                   5497:    bytes; nonzero otherwise.  */
                   5498: 
                   5499: static int
                   5500: bcmp_translate (s1, s2, len, translate)
                   5501:      const char *s1, *s2;
                   5502:      register int len;
                   5503:      RE_TRANSLATE_TYPE translate;
                   5504: {
                   5505:   register const unsigned char *p1 = (const unsigned char *) s1;
                   5506:   register const unsigned char *p2 = (const unsigned char *) s2;
                   5507:   while (len)
                   5508:     {
                   5509:       if (translate[*p1++] != translate[*p2++]) return 1;
                   5510:       len--;
                   5511:     }
                   5512:   return 0;
                   5513: }
1.1.1.2 ! misho    5514: 
1.1       misho    5515: /* Entry points for GNU code.  */
                   5516: 
                   5517: /* re_compile_pattern is the GNU regular expression compiler: it
                   5518:    compiles PATTERN (of length SIZE) and puts the result in BUFP.
                   5519:    Returns 0 if the pattern was valid, otherwise an error string.
                   5520: 
                   5521:    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
                   5522:    are set in BUFP on entry.
                   5523: 
                   5524:    We call regex_compile to do the actual compilation.  */
                   5525: 
                   5526: const char *
                   5527: re_compile_pattern (pattern, length, bufp)
                   5528:      const char *pattern;
                   5529:      size_t length;
                   5530:      struct re_pattern_buffer *bufp;
                   5531: {
                   5532:   reg_errcode_t ret;
                   5533: 
                   5534:   /* GNU code is written to assume at least RE_NREGS registers will be set
                   5535:      (and at least one extra will be -1).  */
                   5536:   bufp->regs_allocated = REGS_UNALLOCATED;
                   5537: 
                   5538:   /* And GNU code determines whether or not to get register information
                   5539:      by passing null for the REGS argument to re_match, etc., not by
                   5540:      setting no_sub.  */
                   5541:   bufp->no_sub = 0;
                   5542: 
                   5543:   /* Match anchors at newline.  */
                   5544:   bufp->newline_anchor = 1;
                   5545: 
                   5546:   ret = regex_compile (pattern, length, re_syntax_options, bufp);
                   5547: 
                   5548:   if (!ret)
                   5549:     return NULL;
                   5550:   return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
                   5551: }
                   5552: #ifdef _LIBC
                   5553: weak_alias (__re_compile_pattern, re_compile_pattern)
                   5554: #endif
1.1.1.2 ! misho    5555: 
1.1       misho    5556: /* Entry points compatible with 4.2 BSD regex library.  We don't define
                   5557:    them unless specifically requested.  */
                   5558: 
                   5559: #if defined _REGEX_RE_COMP || defined _LIBC
                   5560: 
                   5561: /* BSD has one and only one pattern buffer.  */
                   5562: static struct re_pattern_buffer re_comp_buf;
                   5563: 
                   5564: char *
                   5565: #ifdef _LIBC
                   5566: /* Make these definitions weak in libc, so POSIX programs can redefine
                   5567:    these names if they don't use our functions, and still use
                   5568:    regcomp/regexec below without link errors.  */
                   5569: weak_function
                   5570: #endif
                   5571: re_comp (s)
                   5572:     const char *s;
                   5573: {
                   5574:   reg_errcode_t ret;
                   5575: 
                   5576:   if (!s)
                   5577:     {
                   5578:       if (!re_comp_buf.buffer)
                   5579:        return gettext ("No previous regular expression");
                   5580:       return 0;
                   5581:     }
                   5582: 
                   5583:   if (!re_comp_buf.buffer)
                   5584:     {
                   5585:       re_comp_buf.buffer = (unsigned char *) malloc (200);
                   5586:       if (re_comp_buf.buffer == NULL)
                   5587:         return (char *) gettext (re_error_msgid
                   5588:                                 + re_error_msgid_idx[(int) REG_ESPACE]);
                   5589:       re_comp_buf.allocated = 200;
                   5590: 
                   5591:       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
                   5592:       if (re_comp_buf.fastmap == NULL)
                   5593:        return (char *) gettext (re_error_msgid
                   5594:                                 + re_error_msgid_idx[(int) REG_ESPACE]);
                   5595:     }
                   5596: 
                   5597:   /* Since `re_exec' always passes NULL for the `regs' argument, we
                   5598:      don't need to initialize the pattern buffer fields which affect it.  */
                   5599: 
                   5600:   /* Match anchors at newlines.  */
                   5601:   re_comp_buf.newline_anchor = 1;
                   5602: 
                   5603:   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
                   5604: 
                   5605:   if (!ret)
                   5606:     return NULL;
                   5607: 
                   5608:   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
                   5609:   return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
                   5610: }
                   5611: 
                   5612: 
                   5613: int
                   5614: #ifdef _LIBC
                   5615: weak_function
                   5616: #endif
                   5617: re_exec (s)
                   5618:     const char *s;
                   5619: {
                   5620:   const int len = strlen (s);
                   5621:   return
                   5622:     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
                   5623: }
                   5624: 
                   5625: #endif /* _REGEX_RE_COMP */
1.1.1.2 ! misho    5626: 
1.1       misho    5627: /* POSIX.2 functions.  Don't define these for Emacs.  */
                   5628: 
                   5629: #ifndef emacs
                   5630: 
                   5631: /* regcomp takes a regular expression as a string and compiles it.
                   5632: 
                   5633:    PREG is a regex_t *.  We do not expect any fields to be initialized,
                   5634:    since POSIX says we shouldn't.  Thus, we set
                   5635: 
                   5636:      `buffer' to the compiled pattern;
                   5637:      `used' to the length of the compiled pattern;
                   5638:      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
                   5639:        REG_EXTENDED bit in CFLAGS is set; otherwise, to
                   5640:        RE_SYNTAX_POSIX_BASIC;
                   5641:      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
                   5642:      `fastmap' to an allocated space for the fastmap;
                   5643:      `fastmap_accurate' to zero;
                   5644:      `re_nsub' to the number of subexpressions in PATTERN.
                   5645: 
                   5646:    PATTERN is the address of the pattern string.
                   5647: 
                   5648:    CFLAGS is a series of bits which affect compilation.
                   5649: 
                   5650:      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
                   5651:      use POSIX basic syntax.
                   5652: 
                   5653:      If REG_NEWLINE is set, then . and [^...] don't match newline.
                   5654:      Also, regexec will try a match beginning after every newline.
                   5655: 
                   5656:      If REG_ICASE is set, then we considers upper- and lowercase
                   5657:      versions of letters to be equivalent when matching.
                   5658: 
                   5659:      If REG_NOSUB is set, then when PREG is passed to regexec, that
                   5660:      routine will report only success or failure, and nothing about the
                   5661:      registers.
                   5662: 
                   5663:    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
                   5664:    the return codes and their meanings.)  */
                   5665: 
                   5666: int
                   5667: regcomp (preg, pattern, cflags)
                   5668:     regex_t *preg;
                   5669:     const char *pattern;
                   5670:     int cflags;
                   5671: {
                   5672:   reg_errcode_t ret;
                   5673:   reg_syntax_t syntax
                   5674:     = (cflags & REG_EXTENDED) ?
                   5675:       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
                   5676: 
                   5677:   /* regex_compile will allocate the space for the compiled pattern.  */
                   5678:   preg->buffer = 0;
                   5679:   preg->allocated = 0;
                   5680:   preg->used = 0;
                   5681: 
                   5682:   /* Try to allocate space for the fastmap.  */
                   5683:   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
                   5684: 
                   5685:   if (cflags & REG_ICASE)
                   5686:     {
                   5687:       unsigned i;
                   5688: 
                   5689:       preg->translate
                   5690:        = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
                   5691:                                      * sizeof (*(RE_TRANSLATE_TYPE)0));
                   5692:       if (preg->translate == NULL)
                   5693:         return (int) REG_ESPACE;
                   5694: 
                   5695:       /* Map uppercase characters to corresponding lowercase ones.  */
                   5696:       for (i = 0; i < CHAR_SET_SIZE; i++)
                   5697:         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
                   5698:     }
                   5699:   else
                   5700:     preg->translate = NULL;
                   5701: 
                   5702:   /* If REG_NEWLINE is set, newlines are treated differently.  */
                   5703:   if (cflags & REG_NEWLINE)
                   5704:     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
                   5705:       syntax &= ~RE_DOT_NEWLINE;
                   5706:       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
                   5707:       /* It also changes the matching behavior.  */
                   5708:       preg->newline_anchor = 1;
                   5709:     }
                   5710:   else
                   5711:     preg->newline_anchor = 0;
                   5712: 
                   5713:   preg->no_sub = !!(cflags & REG_NOSUB);
                   5714: 
                   5715:   /* POSIX says a null character in the pattern terminates it, so we
                   5716:      can use strlen here in compiling the pattern.  */
                   5717:   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
                   5718: 
                   5719:   /* POSIX doesn't distinguish between an unmatched open-group and an
                   5720:      unmatched close-group: both are REG_EPAREN.  */
                   5721:   if (ret == REG_ERPAREN) ret = REG_EPAREN;
                   5722: 
                   5723:   if (ret == REG_NOERROR && preg->fastmap)
                   5724:     {
                   5725:       /* Compute the fastmap now, since regexec cannot modify the pattern
                   5726:         buffer.  */
                   5727:       if (re_compile_fastmap (preg) == -2)
                   5728:        {
                   5729:          /* Some error occured while computing the fastmap, just forget
                   5730:             about it.  */
                   5731:          free (preg->fastmap);
                   5732:          preg->fastmap = NULL;
                   5733:        }
                   5734:     }
                   5735: 
                   5736:   return (int) ret;
                   5737: }
                   5738: #ifdef _LIBC
                   5739: weak_alias (__regcomp, regcomp)
                   5740: #endif
                   5741: 
                   5742: 
                   5743: /* regexec searches for a given pattern, specified by PREG, in the
                   5744:    string STRING.
                   5745: 
                   5746:    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
                   5747:    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
                   5748:    least NMATCH elements, and we set them to the offsets of the
                   5749:    corresponding matched substrings.
                   5750: 
                   5751:    EFLAGS specifies `execution flags' which affect matching: if
                   5752:    REG_NOTBOL is set, then ^ does not match at the beginning of the
                   5753:    string; if REG_NOTEOL is set, then $ does not match at the end.
                   5754: 
                   5755:    We return 0 if we find a match and REG_NOMATCH if not.  */
                   5756: 
                   5757: int
                   5758: regexec (preg, string, nmatch, pmatch, eflags)
                   5759:     const regex_t *preg;
                   5760:     const char *string;
                   5761:     size_t nmatch;
                   5762:     regmatch_t pmatch[];
                   5763:     int eflags;
                   5764: {
                   5765:   int ret;
                   5766:   struct re_registers regs;
                   5767:   regex_t private_preg;
                   5768:   int len = strlen (string);
                   5769:   boolean want_reg_info = !preg->no_sub && nmatch > 0;
                   5770: 
                   5771:   private_preg = *preg;
                   5772: 
                   5773:   private_preg.not_bol = !!(eflags & REG_NOTBOL);
                   5774:   private_preg.not_eol = !!(eflags & REG_NOTEOL);
                   5775: 
                   5776:   /* The user has told us exactly how many registers to return
                   5777:      information about, via `nmatch'.  We have to pass that on to the
                   5778:      matching routines.  */
                   5779:   private_preg.regs_allocated = REGS_FIXED;
                   5780: 
                   5781:   if (want_reg_info)
                   5782:     {
                   5783:       regs.num_regs = nmatch;
                   5784:       regs.start = TALLOC (nmatch * 2, regoff_t);
                   5785:       if (regs.start == NULL)
                   5786:         return (int) REG_NOMATCH;
                   5787:       regs.end = regs.start + nmatch;
                   5788:     }
                   5789: 
                   5790:   /* Perform the searching operation.  */
                   5791:   ret = re_search (&private_preg, string, len,
                   5792:                    /* start: */ 0, /* range: */ len,
                   5793:                    want_reg_info ? &regs : (struct re_registers *) 0);
                   5794: 
                   5795:   /* Copy the register information to the POSIX structure.  */
                   5796:   if (want_reg_info)
                   5797:     {
                   5798:       if (ret >= 0)
                   5799:         {
                   5800:           unsigned r;
                   5801: 
                   5802:           for (r = 0; r < nmatch; r++)
                   5803:             {
                   5804:               pmatch[r].rm_so = regs.start[r];
                   5805:               pmatch[r].rm_eo = regs.end[r];
                   5806:             }
                   5807:         }
                   5808: 
                   5809:       /* If we needed the temporary register info, free the space now.  */
                   5810:       free (regs.start);
                   5811:     }
                   5812: 
                   5813:   /* We want zero return to mean success, unlike `re_search'.  */
                   5814:   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
                   5815: }
                   5816: #ifdef _LIBC
                   5817: weak_alias (__regexec, regexec)
                   5818: #endif
                   5819: 
                   5820: 
                   5821: /* Returns a message corresponding to an error code, ERRCODE, returned
                   5822:    from either regcomp or regexec.   We don't use PREG here.  */
                   5823: 
                   5824: size_t
                   5825: regerror (err, preg, errbuf, errbuf_size)
                   5826:     int err;
                   5827:     const regex_t *preg;
                   5828:     char *errbuf;
                   5829:     size_t errbuf_size;
                   5830: {
                   5831:   const char *msg;
                   5832:   size_t msg_size;
                   5833: 
                   5834:   if (err < 0
                   5835:       || err >= (int) (sizeof (re_error_msgid_idx)
                   5836:                           / sizeof (re_error_msgid_idx[0])))
                   5837:     /* Only error codes returned by the rest of the code should be passed
                   5838:        to this routine.  If we are given anything else, or if other regex
                   5839:        code generates an invalid error code, then the program has a bug.
                   5840:        Dump core so we can fix it.  */
                   5841:     abort ();
                   5842: 
                   5843:   msg = gettext (re_error_msgid + re_error_msgid_idx[err]);
                   5844: 
                   5845:   msg_size = strlen (msg) + 1; /* Includes the null.  */
                   5846: 
                   5847:   if (errbuf_size != 0)
                   5848:     {
                   5849:       if (msg_size > errbuf_size)
                   5850:         {
                   5851: #if defined HAVE_MEMPCPY || defined _LIBC
                   5852:          *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
                   5853: #else
                   5854:           memcpy (errbuf, msg, errbuf_size - 1);
                   5855:           errbuf[errbuf_size - 1] = 0;
                   5856: #endif
                   5857:         }
                   5858:       else
                   5859:         memcpy (errbuf, msg, msg_size);
                   5860:     }
                   5861: 
                   5862:   return msg_size;
                   5863: }
                   5864: #ifdef _LIBC
                   5865: weak_alias (__regerror, regerror)
                   5866: #endif
                   5867: 
                   5868: 
                   5869: /* Free dynamically allocated space used by PREG.  */
                   5870: 
                   5871: void
                   5872: regfree (preg)
                   5873:     regex_t *preg;
                   5874: {
                   5875:   if (preg->buffer != NULL)
                   5876:     free (preg->buffer);
                   5877:   preg->buffer = NULL;
                   5878: 
                   5879:   preg->allocated = 0;
                   5880:   preg->used = 0;
                   5881: 
                   5882:   if (preg->fastmap != NULL)
                   5883:     free (preg->fastmap);
                   5884:   preg->fastmap = NULL;
                   5885:   preg->fastmap_accurate = 0;
                   5886: 
                   5887:   if (preg->translate != NULL)
                   5888:     free (preg->translate);
                   5889:   preg->translate = NULL;
                   5890: }
                   5891: #ifdef _LIBC
                   5892: weak_alias (__regfree, regfree)
                   5893: #endif
                   5894: 
                   5895: #endif /* not emacs  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>