Annotation of embedaddon/pcre/doc/pcreapi.3, revision 1.1

1.1     ! misho       1: .TH PCREAPI 3
        !             2: .SH NAME
        !             3: PCRE - Perl-compatible regular expressions
        !             4: .SH "PCRE NATIVE API BASIC FUNCTIONS"
        !             5: .rs
        !             6: .sp
        !             7: .B #include <pcre.h>
        !             8: .PP
        !             9: .SM
        !            10: .B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
        !            11: .ti +5n
        !            12: .B const char **\fIerrptr\fP, int *\fIerroffset\fP,
        !            13: .ti +5n
        !            14: .B const unsigned char *\fItableptr\fP);
        !            15: .PP
        !            16: .B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
        !            17: .ti +5n
        !            18: .B int *\fIerrorcodeptr\fP,
        !            19: .ti +5n
        !            20: .B const char **\fIerrptr\fP, int *\fIerroffset\fP,
        !            21: .ti +5n
        !            22: .B const unsigned char *\fItableptr\fP);
        !            23: .PP
        !            24: .B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
        !            25: .ti +5n
        !            26: .B const char **\fIerrptr\fP);
        !            27: .PP
        !            28: .B void pcre_free_study(pcre_extra *\fIextra\fP);
        !            29: .PP
        !            30: .B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
        !            31: .ti +5n
        !            32: .B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
        !            33: .ti +5n
        !            34: .B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
        !            35: .
        !            36: .
        !            37: .SH "PCRE NATIVE API AUXILIARY FUNCTIONS"
        !            38: .rs
        !            39: .sp
        !            40: .B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
        !            41: .PP
        !            42: .B void pcre_jit_stack_free(pcre_jit_stack *\fIstack\fP);
        !            43: .PP
        !            44: .B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
        !            45: .ti +5n
        !            46: .B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
        !            47: .PP
        !            48: .B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
        !            49: .ti +5n
        !            50: .B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
        !            51: .ti +5n
        !            52: .B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
        !            53: .ti +5n
        !            54: .B int *\fIworkspace\fP, int \fIwscount\fP);
        !            55: .PP
        !            56: .B int pcre_copy_named_substring(const pcre *\fIcode\fP,
        !            57: .ti +5n
        !            58: .B const char *\fIsubject\fP, int *\fIovector\fP,
        !            59: .ti +5n
        !            60: .B int \fIstringcount\fP, const char *\fIstringname\fP,
        !            61: .ti +5n
        !            62: .B char *\fIbuffer\fP, int \fIbuffersize\fP);
        !            63: .PP
        !            64: .B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
        !            65: .ti +5n
        !            66: .B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
        !            67: .ti +5n
        !            68: .B int \fIbuffersize\fP);
        !            69: .PP
        !            70: .B int pcre_get_named_substring(const pcre *\fIcode\fP,
        !            71: .ti +5n
        !            72: .B const char *\fIsubject\fP, int *\fIovector\fP,
        !            73: .ti +5n
        !            74: .B int \fIstringcount\fP, const char *\fIstringname\fP,
        !            75: .ti +5n
        !            76: .B const char **\fIstringptr\fP);
        !            77: .PP
        !            78: .B int pcre_get_stringnumber(const pcre *\fIcode\fP,
        !            79: .ti +5n
        !            80: .B const char *\fIname\fP);
        !            81: .PP
        !            82: .B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
        !            83: .ti +5n
        !            84: .B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
        !            85: .PP
        !            86: .B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
        !            87: .ti +5n
        !            88: .B int \fIstringcount\fP, int \fIstringnumber\fP,
        !            89: .ti +5n
        !            90: .B const char **\fIstringptr\fP);
        !            91: .PP
        !            92: .B int pcre_get_substring_list(const char *\fIsubject\fP,
        !            93: .ti +5n
        !            94: .B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
        !            95: .PP
        !            96: .B void pcre_free_substring(const char *\fIstringptr\fP);
        !            97: .PP
        !            98: .B void pcre_free_substring_list(const char **\fIstringptr\fP);
        !            99: .PP
        !           100: .B const unsigned char *pcre_maketables(void);
        !           101: .PP
        !           102: .B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
        !           103: .ti +5n
        !           104: .B int \fIwhat\fP, void *\fIwhere\fP);
        !           105: .PP
        !           106: .B int pcre_info(const pcre *\fIcode\fP, int *\fIoptptr\fP, int
        !           107: .B *\fIfirstcharptr\fP);
        !           108: .PP
        !           109: .B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
        !           110: .PP
        !           111: .B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
        !           112: .PP
        !           113: .B char *pcre_version(void);
        !           114: .
        !           115: .
        !           116: .SH "PCRE NATIVE API INDIRECTED FUNCTIONS"
        !           117: .rs
        !           118: .sp
        !           119: .B void *(*pcre_malloc)(size_t);
        !           120: .PP
        !           121: .B void (*pcre_free)(void *);
        !           122: .PP
        !           123: .B void *(*pcre_stack_malloc)(size_t);
        !           124: .PP
        !           125: .B void (*pcre_stack_free)(void *);
        !           126: .PP
        !           127: .B int (*pcre_callout)(pcre_callout_block *);
        !           128: .
        !           129: .
        !           130: .SH "PCRE API OVERVIEW"
        !           131: .rs
        !           132: .sp
        !           133: PCRE has its own native API, which is described in this document. There are
        !           134: also some wrapper functions that correspond to the POSIX regular expression
        !           135: API, but they do not give access to all the functionality. They are described
        !           136: in the
        !           137: .\" HREF
        !           138: \fBpcreposix\fP
        !           139: .\"
        !           140: documentation. Both of these APIs define a set of C function calls. A C++
        !           141: wrapper is also distributed with PCRE. It is documented in the
        !           142: .\" HREF
        !           143: \fBpcrecpp\fP
        !           144: .\"
        !           145: page.
        !           146: .P
        !           147: The native API C function prototypes are defined in the header file
        !           148: \fBpcre.h\fP, and on Unix systems the library itself is called \fBlibpcre\fP.
        !           149: It can normally be accessed by adding \fB-lpcre\fP to the command for linking
        !           150: an application that uses PCRE. The header file defines the macros PCRE_MAJOR
        !           151: and PCRE_MINOR to contain the major and minor release numbers for the library.
        !           152: Applications can use these to include support for different releases of PCRE.
        !           153: .P
        !           154: In a Windows environment, if you want to statically link an application program
        !           155: against a non-dll \fBpcre.a\fP file, you must define PCRE_STATIC before
        !           156: including \fBpcre.h\fP or \fBpcrecpp.h\fP, because otherwise the
        !           157: \fBpcre_malloc()\fP and \fBpcre_free()\fP exported functions will be declared
        !           158: \fB__declspec(dllimport)\fP, with unwanted results.
        !           159: .P
        !           160: The functions \fBpcre_compile()\fP, \fBpcre_compile2()\fP, \fBpcre_study()\fP,
        !           161: and \fBpcre_exec()\fP are used for compiling and matching regular expressions
        !           162: in a Perl-compatible manner. A sample program that demonstrates the simplest
        !           163: way of using them is provided in the file called \fIpcredemo.c\fP in the PCRE
        !           164: source distribution. A listing of this program is given in the
        !           165: .\" HREF
        !           166: \fBpcredemo\fP
        !           167: .\"
        !           168: documentation, and the
        !           169: .\" HREF
        !           170: \fBpcresample\fP
        !           171: .\"
        !           172: documentation describes how to compile and run it.
        !           173: .P
        !           174: Just-in-time compiler support is an optional feature of PCRE that can be built
        !           175: in appropriate hardware environments. It greatly speeds up the matching
        !           176: performance of many patterns. Simple programs can easily request that it be
        !           177: used if available, by setting an option that is ignored when it is not
        !           178: relevant. More complicated programs might need to make use of the functions
        !           179: \fBpcre_jit_stack_alloc()\fP, \fBpcre_jit_stack_free()\fP, and
        !           180: \fBpcre_assign_jit_stack()\fP in order to control the JIT code's memory usage.
        !           181: These functions are discussed in the
        !           182: .\" HREF
        !           183: \fBpcrejit\fP
        !           184: .\"
        !           185: documentation.
        !           186: .P
        !           187: A second matching function, \fBpcre_dfa_exec()\fP, which is not
        !           188: Perl-compatible, is also provided. This uses a different algorithm for the
        !           189: matching. The alternative algorithm finds all possible matches (at a given
        !           190: point in the subject), and scans the subject just once (unless there are
        !           191: lookbehind assertions). However, this algorithm does not return captured
        !           192: substrings. A description of the two matching algorithms and their advantages
        !           193: and disadvantages is given in the
        !           194: .\" HREF
        !           195: \fBpcrematching\fP
        !           196: .\"
        !           197: documentation.
        !           198: .P
        !           199: In addition to the main compiling and matching functions, there are convenience
        !           200: functions for extracting captured substrings from a subject string that is
        !           201: matched by \fBpcre_exec()\fP. They are:
        !           202: .sp
        !           203:   \fBpcre_copy_substring()\fP
        !           204:   \fBpcre_copy_named_substring()\fP
        !           205:   \fBpcre_get_substring()\fP
        !           206:   \fBpcre_get_named_substring()\fP
        !           207:   \fBpcre_get_substring_list()\fP
        !           208:   \fBpcre_get_stringnumber()\fP
        !           209:   \fBpcre_get_stringtable_entries()\fP
        !           210: .sp
        !           211: \fBpcre_free_substring()\fP and \fBpcre_free_substring_list()\fP are also
        !           212: provided, to free the memory used for extracted strings.
        !           213: .P
        !           214: The function \fBpcre_maketables()\fP is used to build a set of character tables
        !           215: in the current locale for passing to \fBpcre_compile()\fP, \fBpcre_exec()\fP,
        !           216: or \fBpcre_dfa_exec()\fP. This is an optional facility that is provided for
        !           217: specialist use. Most commonly, no special tables are passed, in which case
        !           218: internal tables that are generated when PCRE is built are used.
        !           219: .P
        !           220: The function \fBpcre_fullinfo()\fP is used to find out information about a
        !           221: compiled pattern; \fBpcre_info()\fP is an obsolete version that returns only
        !           222: some of the available information, but is retained for backwards compatibility.
        !           223: The function \fBpcre_version()\fP returns a pointer to a string containing the
        !           224: version of PCRE and its date of release.
        !           225: .P
        !           226: The function \fBpcre_refcount()\fP maintains a reference count in a data block
        !           227: containing a compiled pattern. This is provided for the benefit of
        !           228: object-oriented applications.
        !           229: .P
        !           230: The global variables \fBpcre_malloc\fP and \fBpcre_free\fP initially contain
        !           231: the entry points of the standard \fBmalloc()\fP and \fBfree()\fP functions,
        !           232: respectively. PCRE calls the memory management functions via these variables,
        !           233: so a calling program can replace them if it wishes to intercept the calls. This
        !           234: should be done before calling any PCRE functions.
        !           235: .P
        !           236: The global variables \fBpcre_stack_malloc\fP and \fBpcre_stack_free\fP are also
        !           237: indirections to memory management functions. These special functions are used
        !           238: only when PCRE is compiled to use the heap for remembering data, instead of
        !           239: recursive function calls, when running the \fBpcre_exec()\fP function. See the
        !           240: .\" HREF
        !           241: \fBpcrebuild\fP
        !           242: .\"
        !           243: documentation for details of how to do this. It is a non-standard way of
        !           244: building PCRE, for use in environments that have limited stacks. Because of the
        !           245: greater use of memory management, it runs more slowly. Separate functions are
        !           246: provided so that special-purpose external code can be used for this case. When
        !           247: used, these functions are always called in a stack-like manner (last obtained,
        !           248: first freed), and always for memory blocks of the same size. There is a
        !           249: discussion about PCRE's stack usage in the
        !           250: .\" HREF
        !           251: \fBpcrestack\fP
        !           252: .\"
        !           253: documentation.
        !           254: .P
        !           255: The global variable \fBpcre_callout\fP initially contains NULL. It can be set
        !           256: by the caller to a "callout" function, which PCRE will then call at specified
        !           257: points during a matching operation. Details are given in the
        !           258: .\" HREF
        !           259: \fBpcrecallout\fP
        !           260: .\"
        !           261: documentation.
        !           262: .
        !           263: .
        !           264: .\" HTML <a name="newlines"></a>
        !           265: .SH NEWLINES
        !           266: .rs
        !           267: .sp
        !           268: PCRE supports five different conventions for indicating line breaks in
        !           269: strings: a single CR (carriage return) character, a single LF (linefeed)
        !           270: character, the two-character sequence CRLF, any of the three preceding, or any
        !           271: Unicode newline sequence. The Unicode newline sequences are the three just
        !           272: mentioned, plus the single characters VT (vertical tab, U+000B), FF (formfeed,
        !           273: U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
        !           274: (paragraph separator, U+2029).
        !           275: .P
        !           276: Each of the first three conventions is used by at least one operating system as
        !           277: its standard newline sequence. When PCRE is built, a default can be specified.
        !           278: The default default is LF, which is the Unix standard. When PCRE is run, the
        !           279: default can be overridden, either when a pattern is compiled, or when it is
        !           280: matched.
        !           281: .P
        !           282: At compile time, the newline convention can be specified by the \fIoptions\fP
        !           283: argument of \fBpcre_compile()\fP, or it can be specified by special text at the
        !           284: start of the pattern itself; this overrides any other settings. See the
        !           285: .\" HREF
        !           286: \fBpcrepattern\fP
        !           287: .\"
        !           288: page for details of the special character sequences.
        !           289: .P
        !           290: In the PCRE documentation the word "newline" is used to mean "the character or
        !           291: pair of characters that indicate a line break". The choice of newline
        !           292: convention affects the handling of the dot, circumflex, and dollar
        !           293: metacharacters, the handling of #-comments in /x mode, and, when CRLF is a
        !           294: recognized line ending sequence, the match position advancement for a
        !           295: non-anchored pattern. There is more detail about this in the
        !           296: .\" HTML <a href="#execoptions">
        !           297: .\" </a>
        !           298: section on \fBpcre_exec()\fP options
        !           299: .\"
        !           300: below.
        !           301: .P
        !           302: The choice of newline convention does not affect the interpretation of
        !           303: the \en or \er escape sequences, nor does it affect what \eR matches, which is
        !           304: controlled in a similar way, but by separate options.
        !           305: .
        !           306: .
        !           307: .SH MULTITHREADING
        !           308: .rs
        !           309: .sp
        !           310: The PCRE functions can be used in multi-threading applications, with the
        !           311: proviso that the memory management functions pointed to by \fBpcre_malloc\fP,
        !           312: \fBpcre_free\fP, \fBpcre_stack_malloc\fP, and \fBpcre_stack_free\fP, and the
        !           313: callout function pointed to by \fBpcre_callout\fP, are shared by all threads.
        !           314: .P
        !           315: The compiled form of a regular expression is not altered during matching, so
        !           316: the same compiled pattern can safely be used by several threads at once.
        !           317: .P
        !           318: If the just-in-time optimization feature is being used, it needs separate
        !           319: memory stack areas for each thread. See the
        !           320: .\" HREF
        !           321: \fBpcrejit\fP
        !           322: .\"
        !           323: documentation for more details.
        !           324: .
        !           325: .
        !           326: .SH "SAVING PRECOMPILED PATTERNS FOR LATER USE"
        !           327: .rs
        !           328: .sp
        !           329: The compiled form of a regular expression can be saved and re-used at a later
        !           330: time, possibly by a different program, and even on a host other than the one on
        !           331: which it was compiled. Details are given in the
        !           332: .\" HREF
        !           333: \fBpcreprecompile\fP
        !           334: .\"
        !           335: documentation. However, compiling a regular expression with one version of PCRE
        !           336: for use with a different version is not guaranteed to work and may cause
        !           337: crashes.
        !           338: .
        !           339: .
        !           340: .SH "CHECKING BUILD-TIME OPTIONS"
        !           341: .rs
        !           342: .sp
        !           343: .B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
        !           344: .PP
        !           345: The function \fBpcre_config()\fP makes it possible for a PCRE client to
        !           346: discover which optional features have been compiled into the PCRE library. The
        !           347: .\" HREF
        !           348: \fBpcrebuild\fP
        !           349: .\"
        !           350: documentation has more details about these optional features.
        !           351: .P
        !           352: The first argument for \fBpcre_config()\fP is an integer, specifying which
        !           353: information is required; the second argument is a pointer to a variable into
        !           354: which the information is placed. The following information is available:
        !           355: .sp
        !           356:   PCRE_CONFIG_UTF8
        !           357: .sp
        !           358: The output is an integer that is set to one if UTF-8 support is available;
        !           359: otherwise it is set to zero.
        !           360: .sp
        !           361:   PCRE_CONFIG_UNICODE_PROPERTIES
        !           362: .sp
        !           363: The output is an integer that is set to one if support for Unicode character
        !           364: properties is available; otherwise it is set to zero.
        !           365: .sp
        !           366:   PCRE_CONFIG_JIT
        !           367: .sp
        !           368: The output is an integer that is set to one if support for just-in-time
        !           369: compiling is available; otherwise it is set to zero.
        !           370: .sp
        !           371:   PCRE_CONFIG_NEWLINE
        !           372: .sp
        !           373: The output is an integer whose value specifies the default character sequence
        !           374: that is recognized as meaning "newline". The four values that are supported
        !           375: are: 10 for LF, 13 for CR, 3338 for CRLF, -2 for ANYCRLF, and -1 for ANY.
        !           376: Though they are derived from ASCII, the same values are returned in EBCDIC
        !           377: environments. The default should normally correspond to the standard sequence
        !           378: for your operating system.
        !           379: .sp
        !           380:   PCRE_CONFIG_BSR
        !           381: .sp
        !           382: The output is an integer whose value indicates what character sequences the \eR
        !           383: escape sequence matches by default. A value of 0 means that \eR matches any
        !           384: Unicode line ending sequence; a value of 1 means that \eR matches only CR, LF,
        !           385: or CRLF. The default can be overridden when a pattern is compiled or matched.
        !           386: .sp
        !           387:   PCRE_CONFIG_LINK_SIZE
        !           388: .sp
        !           389: The output is an integer that contains the number of bytes used for internal
        !           390: linkage in compiled regular expressions. The value is 2, 3, or 4. Larger values
        !           391: allow larger regular expressions to be compiled, at the expense of slower
        !           392: matching. The default value of 2 is sufficient for all but the most massive
        !           393: patterns, since it allows the compiled pattern to be up to 64K in size.
        !           394: .sp
        !           395:   PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
        !           396: .sp
        !           397: The output is an integer that contains the threshold above which the POSIX
        !           398: interface uses \fBmalloc()\fP for output vectors. Further details are given in
        !           399: the
        !           400: .\" HREF
        !           401: \fBpcreposix\fP
        !           402: .\"
        !           403: documentation.
        !           404: .sp
        !           405:   PCRE_CONFIG_MATCH_LIMIT
        !           406: .sp
        !           407: The output is a long integer that gives the default limit for the number of
        !           408: internal matching function calls in a \fBpcre_exec()\fP execution. Further
        !           409: details are given with \fBpcre_exec()\fP below.
        !           410: .sp
        !           411:   PCRE_CONFIG_MATCH_LIMIT_RECURSION
        !           412: .sp
        !           413: The output is a long integer that gives the default limit for the depth of
        !           414: recursion when calling the internal matching function in a \fBpcre_exec()\fP
        !           415: execution. Further details are given with \fBpcre_exec()\fP below.
        !           416: .sp
        !           417:   PCRE_CONFIG_STACKRECURSE
        !           418: .sp
        !           419: The output is an integer that is set to one if internal recursion when running
        !           420: \fBpcre_exec()\fP is implemented by recursive function calls that use the stack
        !           421: to remember their state. This is the usual way that PCRE is compiled. The
        !           422: output is zero if PCRE was compiled to use blocks of data on the heap instead
        !           423: of recursive function calls. In this case, \fBpcre_stack_malloc\fP and
        !           424: \fBpcre_stack_free\fP are called to manage memory blocks on the heap, thus
        !           425: avoiding the use of the stack.
        !           426: .
        !           427: .
        !           428: .SH "COMPILING A PATTERN"
        !           429: .rs
        !           430: .sp
        !           431: .B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
        !           432: .ti +5n
        !           433: .B const char **\fIerrptr\fP, int *\fIerroffset\fP,
        !           434: .ti +5n
        !           435: .B const unsigned char *\fItableptr\fP);
        !           436: .sp
        !           437: .B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
        !           438: .ti +5n
        !           439: .B int *\fIerrorcodeptr\fP,
        !           440: .ti +5n
        !           441: .B const char **\fIerrptr\fP, int *\fIerroffset\fP,
        !           442: .ti +5n
        !           443: .B const unsigned char *\fItableptr\fP);
        !           444: .P
        !           445: Either of the functions \fBpcre_compile()\fP or \fBpcre_compile2()\fP can be
        !           446: called to compile a pattern into an internal form. The only difference between
        !           447: the two interfaces is that \fBpcre_compile2()\fP has an additional argument,
        !           448: \fIerrorcodeptr\fP, via which a numerical error code can be returned. To avoid
        !           449: too much repetition, we refer just to \fBpcre_compile()\fP below, but the
        !           450: information applies equally to \fBpcre_compile2()\fP.
        !           451: .P
        !           452: The pattern is a C string terminated by a binary zero, and is passed in the
        !           453: \fIpattern\fP argument. A pointer to a single block of memory that is obtained
        !           454: via \fBpcre_malloc\fP is returned. This contains the compiled code and related
        !           455: data. The \fBpcre\fP type is defined for the returned block; this is a typedef
        !           456: for a structure whose contents are not externally defined. It is up to the
        !           457: caller to free the memory (via \fBpcre_free\fP) when it is no longer required.
        !           458: .P
        !           459: Although the compiled code of a PCRE regex is relocatable, that is, it does not
        !           460: depend on memory location, the complete \fBpcre\fP data block is not
        !           461: fully relocatable, because it may contain a copy of the \fItableptr\fP
        !           462: argument, which is an address (see below).
        !           463: .P
        !           464: The \fIoptions\fP argument contains various bit settings that affect the
        !           465: compilation. It should be zero if no options are required. The available
        !           466: options are described below. Some of them (in particular, those that are
        !           467: compatible with Perl, but some others as well) can also be set and unset from
        !           468: within the pattern (see the detailed description in the
        !           469: .\" HREF
        !           470: \fBpcrepattern\fP
        !           471: .\"
        !           472: documentation). For those options that can be different in different parts of
        !           473: the pattern, the contents of the \fIoptions\fP argument specifies their
        !           474: settings at the start of compilation and execution. The PCRE_ANCHORED,
        !           475: PCRE_BSR_\fIxxx\fP, PCRE_NEWLINE_\fIxxx\fP, PCRE_NO_UTF8_CHECK, and
        !           476: PCRE_NO_START_OPT options can be set at the time of matching as well as at
        !           477: compile time.
        !           478: .P
        !           479: If \fIerrptr\fP is NULL, \fBpcre_compile()\fP returns NULL immediately.
        !           480: Otherwise, if compilation of a pattern fails, \fBpcre_compile()\fP returns
        !           481: NULL, and sets the variable pointed to by \fIerrptr\fP to point to a textual
        !           482: error message. This is a static string that is part of the library. You must
        !           483: not try to free it. Normally, the offset from the start of the pattern to the
        !           484: byte that was being processed when the error was discovered is placed in the
        !           485: variable pointed to by \fIerroffset\fP, which must not be NULL (if it is, an
        !           486: immediate error is given). However, for an invalid UTF-8 string, the offset is
        !           487: that of the first byte of the failing character. Also, some errors are not
        !           488: detected until checks are carried out when the whole pattern has been scanned;
        !           489: in these cases the offset passed back is the length of the pattern.
        !           490: .P
        !           491: Note that the offset is in bytes, not characters, even in UTF-8 mode. It may
        !           492: sometimes point into the middle of a UTF-8 character.
        !           493: .P
        !           494: If \fBpcre_compile2()\fP is used instead of \fBpcre_compile()\fP, and the
        !           495: \fIerrorcodeptr\fP argument is not NULL, a non-zero error code number is
        !           496: returned via this argument in the event of an error. This is in addition to the
        !           497: textual error message. Error codes and messages are listed below.
        !           498: .P
        !           499: If the final argument, \fItableptr\fP, is NULL, PCRE uses a default set of
        !           500: character tables that are built when PCRE is compiled, using the default C
        !           501: locale. Otherwise, \fItableptr\fP must be an address that is the result of a
        !           502: call to \fBpcre_maketables()\fP. This value is stored with the compiled
        !           503: pattern, and used again by \fBpcre_exec()\fP, unless another table pointer is
        !           504: passed to it. For more discussion, see the section on locale support below.
        !           505: .P
        !           506: This code fragment shows a typical straightforward call to \fBpcre_compile()\fP:
        !           507: .sp
        !           508:   pcre *re;
        !           509:   const char *error;
        !           510:   int erroffset;
        !           511:   re = pcre_compile(
        !           512:     "^A.*Z",          /* the pattern */
        !           513:     0,                /* default options */
        !           514:     &error,           /* for error message */
        !           515:     &erroffset,       /* for error offset */
        !           516:     NULL);            /* use default character tables */
        !           517: .sp
        !           518: The following names for option bits are defined in the \fBpcre.h\fP header
        !           519: file:
        !           520: .sp
        !           521:   PCRE_ANCHORED
        !           522: .sp
        !           523: If this bit is set, the pattern is forced to be "anchored", that is, it is
        !           524: constrained to match only at the first matching point in the string that is
        !           525: being searched (the "subject string"). This effect can also be achieved by
        !           526: appropriate constructs in the pattern itself, which is the only way to do it in
        !           527: Perl.
        !           528: .sp
        !           529:   PCRE_AUTO_CALLOUT
        !           530: .sp
        !           531: If this bit is set, \fBpcre_compile()\fP automatically inserts callout items,
        !           532: all with number 255, before each pattern item. For discussion of the callout
        !           533: facility, see the
        !           534: .\" HREF
        !           535: \fBpcrecallout\fP
        !           536: .\"
        !           537: documentation.
        !           538: .sp
        !           539:   PCRE_BSR_ANYCRLF
        !           540:   PCRE_BSR_UNICODE
        !           541: .sp
        !           542: These options (which are mutually exclusive) control what the \eR escape
        !           543: sequence matches. The choice is either to match only CR, LF, or CRLF, or to
        !           544: match any Unicode newline sequence. The default is specified when PCRE is
        !           545: built. It can be overridden from within the pattern, or by setting an option
        !           546: when a compiled pattern is matched.
        !           547: .sp
        !           548:   PCRE_CASELESS
        !           549: .sp
        !           550: If this bit is set, letters in the pattern match both upper and lower case
        !           551: letters. It is equivalent to Perl's /i option, and it can be changed within a
        !           552: pattern by a (?i) option setting. In UTF-8 mode, PCRE always understands the
        !           553: concept of case for characters whose values are less than 128, so caseless
        !           554: matching is always possible. For characters with higher values, the concept of
        !           555: case is supported if PCRE is compiled with Unicode property support, but not
        !           556: otherwise. If you want to use caseless matching for characters 128 and above,
        !           557: you must ensure that PCRE is compiled with Unicode property support as well as
        !           558: with UTF-8 support.
        !           559: .sp
        !           560:   PCRE_DOLLAR_ENDONLY
        !           561: .sp
        !           562: If this bit is set, a dollar metacharacter in the pattern matches only at the
        !           563: end of the subject string. Without this option, a dollar also matches
        !           564: immediately before a newline at the end of the string (but not before any other
        !           565: newlines). The PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is set.
        !           566: There is no equivalent to this option in Perl, and no way to set it within a
        !           567: pattern.
        !           568: .sp
        !           569:   PCRE_DOTALL
        !           570: .sp
        !           571: If this bit is set, a dot metacharacter in the pattern matches a character of
        !           572: any value, including one that indicates a newline. However, it only ever
        !           573: matches one character, even if newlines are coded as CRLF. Without this option,
        !           574: a dot does not match when the current position is at a newline. This option is
        !           575: equivalent to Perl's /s option, and it can be changed within a pattern by a
        !           576: (?s) option setting. A negative class such as [^a] always matches newline
        !           577: characters, independent of the setting of this option.
        !           578: .sp
        !           579:   PCRE_DUPNAMES
        !           580: .sp
        !           581: If this bit is set, names used to identify capturing subpatterns need not be
        !           582: unique. This can be helpful for certain types of pattern when it is known that
        !           583: only one instance of the named subpattern can ever be matched. There are more
        !           584: details of named subpatterns below; see also the
        !           585: .\" HREF
        !           586: \fBpcrepattern\fP
        !           587: .\"
        !           588: documentation.
        !           589: .sp
        !           590:   PCRE_EXTENDED
        !           591: .sp
        !           592: If this bit is set, whitespace data characters in the pattern are totally
        !           593: ignored except when escaped or inside a character class. Whitespace does not
        !           594: include the VT character (code 11). In addition, characters between an
        !           595: unescaped # outside a character class and the next newline, inclusive, are also
        !           596: ignored. This is equivalent to Perl's /x option, and it can be changed within a
        !           597: pattern by a (?x) option setting.
        !           598: .P
        !           599: Which characters are interpreted as newlines is controlled by the options
        !           600: passed to \fBpcre_compile()\fP or by a special sequence at the start of the
        !           601: pattern, as described in the section entitled
        !           602: .\" HTML <a href="pcrepattern.html#newlines">
        !           603: .\" </a>
        !           604: "Newline conventions"
        !           605: .\"
        !           606: in the \fBpcrepattern\fP documentation. Note that the end of this type of
        !           607: comment is a literal newline sequence in the pattern; escape sequences that
        !           608: happen to represent a newline do not count.
        !           609: .P
        !           610: This option makes it possible to include comments inside complicated patterns.
        !           611: Note, however, that this applies only to data characters. Whitespace characters
        !           612: may never appear within special character sequences in a pattern, for example
        !           613: within the sequence (?( that introduces a conditional subpattern.
        !           614: .sp
        !           615:   PCRE_EXTRA
        !           616: .sp
        !           617: This option was invented in order to turn on additional functionality of PCRE
        !           618: that is incompatible with Perl, but it is currently of very little use. When
        !           619: set, any backslash in a pattern that is followed by a letter that has no
        !           620: special meaning causes an error, thus reserving these combinations for future
        !           621: expansion. By default, as in Perl, a backslash followed by a letter with no
        !           622: special meaning is treated as a literal. (Perl can, however, be persuaded to
        !           623: give an error for this, by running it with the -w option.) There are at present
        !           624: no other features controlled by this option. It can also be set by a (?X)
        !           625: option setting within a pattern.
        !           626: .sp
        !           627:   PCRE_FIRSTLINE
        !           628: .sp
        !           629: If this option is set, an unanchored pattern is required to match before or at
        !           630: the first newline in the subject string, though the matched text may continue
        !           631: over the newline.
        !           632: .sp
        !           633:   PCRE_JAVASCRIPT_COMPAT
        !           634: .sp
        !           635: If this option is set, PCRE's behaviour is changed in some ways so that it is
        !           636: compatible with JavaScript rather than Perl. The changes are as follows:
        !           637: .P
        !           638: (1) A lone closing square bracket in a pattern causes a compile-time error,
        !           639: because this is illegal in JavaScript (by default it is treated as a data
        !           640: character). Thus, the pattern AB]CD becomes illegal when this option is set.
        !           641: .P
        !           642: (2) At run time, a back reference to an unset subpattern group matches an empty
        !           643: string (by default this causes the current matching alternative to fail). A
        !           644: pattern such as (\e1)(a) succeeds when this option is set (assuming it can find
        !           645: an "a" in the subject), whereas it fails by default, for Perl compatibility.
        !           646: .P
        !           647: (3) \eU matches an upper case "U" character; by default \eU causes a compile
        !           648: time error (Perl uses \eU to upper case subsequent characters).
        !           649: .P
        !           650: (4) \eu matches a lower case "u" character unless it is followed by four
        !           651: hexadecimal digits, in which case the hexadecimal number defines the code point
        !           652: to match. By default, \eu causes a compile time error (Perl uses it to upper
        !           653: case the following character).
        !           654: .P
        !           655: (5) \ex matches a lower case "x" character unless it is followed by two
        !           656: hexadecimal digits, in which case the hexadecimal number defines the code point
        !           657: to match. By default, as in Perl, a hexadecimal number is always expected after
        !           658: \ex, but it may have zero, one, or two digits (so, for example, \exz matches a
        !           659: binary zero character followed by z).
        !           660: .sp
        !           661:   PCRE_MULTILINE
        !           662: .sp
        !           663: By default, PCRE treats the subject string as consisting of a single line of
        !           664: characters (even if it actually contains newlines). The "start of line"
        !           665: metacharacter (^) matches only at the start of the string, while the "end of
        !           666: line" metacharacter ($) matches only at the end of the string, or before a
        !           667: terminating newline (unless PCRE_DOLLAR_ENDONLY is set). This is the same as
        !           668: Perl.
        !           669: .P
        !           670: When PCRE_MULTILINE it is set, the "start of line" and "end of line" constructs
        !           671: match immediately following or immediately before internal newlines in the
        !           672: subject string, respectively, as well as at the very start and end. This is
        !           673: equivalent to Perl's /m option, and it can be changed within a pattern by a
        !           674: (?m) option setting. If there are no newlines in a subject string, or no
        !           675: occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE has no effect.
        !           676: .sp
        !           677:   PCRE_NEWLINE_CR
        !           678:   PCRE_NEWLINE_LF
        !           679:   PCRE_NEWLINE_CRLF
        !           680:   PCRE_NEWLINE_ANYCRLF
        !           681:   PCRE_NEWLINE_ANY
        !           682: .sp
        !           683: These options override the default newline definition that was chosen when PCRE
        !           684: was built. Setting the first or the second specifies that a newline is
        !           685: indicated by a single character (CR or LF, respectively). Setting
        !           686: PCRE_NEWLINE_CRLF specifies that a newline is indicated by the two-character
        !           687: CRLF sequence. Setting PCRE_NEWLINE_ANYCRLF specifies that any of the three
        !           688: preceding sequences should be recognized. Setting PCRE_NEWLINE_ANY specifies
        !           689: that any Unicode newline sequence should be recognized. The Unicode newline
        !           690: sequences are the three just mentioned, plus the single characters VT (vertical
        !           691: tab, U+000B), FF (formfeed, U+000C), NEL (next line, U+0085), LS (line
        !           692: separator, U+2028), and PS (paragraph separator, U+2029). The last two are
        !           693: recognized only in UTF-8 mode.
        !           694: .P
        !           695: The newline setting in the options word uses three bits that are treated
        !           696: as a number, giving eight possibilities. Currently only six are used (default
        !           697: plus the five values above). This means that if you set more than one newline
        !           698: option, the combination may or may not be sensible. For example,
        !           699: PCRE_NEWLINE_CR with PCRE_NEWLINE_LF is equivalent to PCRE_NEWLINE_CRLF, but
        !           700: other combinations may yield unused numbers and cause an error.
        !           701: .P
        !           702: The only time that a line break in a pattern is specially recognized when
        !           703: compiling is when PCRE_EXTENDED is set. CR and LF are whitespace characters,
        !           704: and so are ignored in this mode. Also, an unescaped # outside a character class
        !           705: indicates a comment that lasts until after the next line break sequence. In
        !           706: other circumstances, line break sequences in patterns are treated as literal
        !           707: data.
        !           708: .P
        !           709: The newline option that is set at compile time becomes the default that is used
        !           710: for \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, but it can be overridden.
        !           711: .sp
        !           712:   PCRE_NO_AUTO_CAPTURE
        !           713: .sp
        !           714: If this option is set, it disables the use of numbered capturing parentheses in
        !           715: the pattern. Any opening parenthesis that is not followed by ? behaves as if it
        !           716: were followed by ?: but named parentheses can still be used for capturing (and
        !           717: they acquire numbers in the usual way). There is no equivalent of this option
        !           718: in Perl.
        !           719: .sp
        !           720:   NO_START_OPTIMIZE
        !           721: .sp
        !           722: This is an option that acts at matching time; that is, it is really an option
        !           723: for \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. If it is set at compile time,
        !           724: it is remembered with the compiled pattern and assumed at matching time. For
        !           725: details see the discussion of PCRE_NO_START_OPTIMIZE
        !           726: .\" HTML <a href="#execoptions">
        !           727: .\" </a>
        !           728: below.
        !           729: .\"
        !           730: .sp
        !           731:   PCRE_UCP
        !           732: .sp
        !           733: This option changes the way PCRE processes \eB, \eb, \eD, \ed, \eS, \es, \eW,
        !           734: \ew, and some of the POSIX character classes. By default, only ASCII characters
        !           735: are recognized, but if PCRE_UCP is set, Unicode properties are used instead to
        !           736: classify characters. More details are given in the section on
        !           737: .\" HTML <a href="pcre.html#genericchartypes">
        !           738: .\" </a>
        !           739: generic character types
        !           740: .\"
        !           741: in the
        !           742: .\" HREF
        !           743: \fBpcrepattern\fP
        !           744: .\"
        !           745: page. If you set PCRE_UCP, matching one of the items it affects takes much
        !           746: longer. The option is available only if PCRE has been compiled with Unicode
        !           747: property support.
        !           748: .sp
        !           749:   PCRE_UNGREEDY
        !           750: .sp
        !           751: This option inverts the "greediness" of the quantifiers so that they are not
        !           752: greedy by default, but become greedy if followed by "?". It is not compatible
        !           753: with Perl. It can also be set by a (?U) option setting within the pattern.
        !           754: .sp
        !           755:   PCRE_UTF8
        !           756: .sp
        !           757: This option causes PCRE to regard both the pattern and the subject as strings
        !           758: of UTF-8 characters instead of single-byte character strings. However, it is
        !           759: available only when PCRE is built to include UTF-8 support. If not, the use
        !           760: of this option provokes an error. Details of how this option changes the
        !           761: behaviour of PCRE are given in the
        !           762: .\" HREF
        !           763: \fBpcreunicode\fP
        !           764: .\"
        !           765: page.
        !           766: .sp
        !           767:   PCRE_NO_UTF8_CHECK
        !           768: .sp
        !           769: When PCRE_UTF8 is set, the validity of the pattern as a UTF-8 string is
        !           770: automatically checked. There is a discussion about the
        !           771: .\" HTML <a href="pcre.html#utf8strings">
        !           772: .\" </a>
        !           773: validity of UTF-8 strings
        !           774: .\"
        !           775: in the main
        !           776: .\" HREF
        !           777: \fBpcre\fP
        !           778: .\"
        !           779: page. If an invalid UTF-8 sequence of bytes is found, \fBpcre_compile()\fP
        !           780: returns an error. If you already know that your pattern is valid, and you want
        !           781: to skip this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK
        !           782: option. When it is set, the effect of passing an invalid UTF-8 string as a
        !           783: pattern is undefined. It may cause your program to crash. Note that this option
        !           784: can also be passed to \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, to suppress
        !           785: the UTF-8 validity checking of subject strings.
        !           786: .
        !           787: .
        !           788: .SH "COMPILATION ERROR CODES"
        !           789: .rs
        !           790: .sp
        !           791: The following table lists the error codes than may be returned by
        !           792: \fBpcre_compile2()\fP, along with the error messages that may be returned by
        !           793: both compiling functions. As PCRE has developed, some error codes have fallen
        !           794: out of use. To avoid confusion, they have not been re-used.
        !           795: .sp
        !           796:    0  no error
        !           797:    1  \e at end of pattern
        !           798:    2  \ec at end of pattern
        !           799:    3  unrecognized character follows \e
        !           800:    4  numbers out of order in {} quantifier
        !           801:    5  number too big in {} quantifier
        !           802:    6  missing terminating ] for character class
        !           803:    7  invalid escape sequence in character class
        !           804:    8  range out of order in character class
        !           805:    9  nothing to repeat
        !           806:   10  [this code is not in use]
        !           807:   11  internal error: unexpected repeat
        !           808:   12  unrecognized character after (? or (?-
        !           809:   13  POSIX named classes are supported only within a class
        !           810:   14  missing )
        !           811:   15  reference to non-existent subpattern
        !           812:   16  erroffset passed as NULL
        !           813:   17  unknown option bit(s) set
        !           814:   18  missing ) after comment
        !           815:   19  [this code is not in use]
        !           816:   20  regular expression is too large
        !           817:   21  failed to get memory
        !           818:   22  unmatched parentheses
        !           819:   23  internal error: code overflow
        !           820:   24  unrecognized character after (?<
        !           821:   25  lookbehind assertion is not fixed length
        !           822:   26  malformed number or name after (?(
        !           823:   27  conditional group contains more than two branches
        !           824:   28  assertion expected after (?(
        !           825:   29  (?R or (?[+-]digits must be followed by )
        !           826:   30  unknown POSIX class name
        !           827:   31  POSIX collating elements are not supported
        !           828:   32  this version of PCRE is not compiled with PCRE_UTF8 support
        !           829:   33  [this code is not in use]
        !           830:   34  character value in \ex{...} sequence is too large
        !           831:   35  invalid condition (?(0)
        !           832:   36  \eC not allowed in lookbehind assertion
        !           833:   37  PCRE does not support \eL, \el, \eN{name}, \eU, or \eu
        !           834:   38  number after (?C is > 255
        !           835:   39  closing ) for (?C expected
        !           836:   40  recursive call could loop indefinitely
        !           837:   41  unrecognized character after (?P
        !           838:   42  syntax error in subpattern name (missing terminator)
        !           839:   43  two named subpatterns have the same name
        !           840:   44  invalid UTF-8 string
        !           841:   45  support for \eP, \ep, and \eX has not been compiled
        !           842:   46  malformed \eP or \ep sequence
        !           843:   47  unknown property name after \eP or \ep
        !           844:   48  subpattern name is too long (maximum 32 characters)
        !           845:   49  too many named subpatterns (maximum 10000)
        !           846:   50  [this code is not in use]
        !           847:   51  octal value is greater than \e377 (not in UTF-8 mode)
        !           848:   52  internal error: overran compiling workspace
        !           849:   53  internal error: previously-checked referenced subpattern
        !           850:         not found
        !           851:   54  DEFINE group contains more than one branch
        !           852:   55  repeating a DEFINE group is not allowed
        !           853:   56  inconsistent NEWLINE options
        !           854:   57  \eg is not followed by a braced, angle-bracketed, or quoted
        !           855:         name/number or by a plain number
        !           856:   58  a numbered reference must not be zero
        !           857:   59  an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)
        !           858:   60  (*VERB) not recognized
        !           859:   61  number is too big
        !           860:   62  subpattern name expected
        !           861:   63  digit expected after (?+
        !           862:   64  ] is an invalid data character in JavaScript compatibility mode
        !           863:   65  different names for subpatterns of the same number are
        !           864:         not allowed
        !           865:   66  (*MARK) must have an argument
        !           866:   67  this version of PCRE is not compiled with PCRE_UCP support
        !           867:   68  \ec must be followed by an ASCII character
        !           868:   69  \ek is not followed by a braced, angle-bracketed, or quoted name
        !           869: .sp
        !           870: The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
        !           871: be used if the limits were changed when PCRE was built.
        !           872: .
        !           873: .
        !           874: .\" HTML <a name="studyingapattern"></a>
        !           875: .SH "STUDYING A PATTERN"
        !           876: .rs
        !           877: .sp
        !           878: .B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP
        !           879: .ti +5n
        !           880: .B const char **\fIerrptr\fP);
        !           881: .PP
        !           882: If a compiled pattern is going to be used several times, it is worth spending
        !           883: more time analyzing it in order to speed up the time taken for matching. The
        !           884: function \fBpcre_study()\fP takes a pointer to a compiled pattern as its first
        !           885: argument. If studying the pattern produces additional information that will
        !           886: help speed up matching, \fBpcre_study()\fP returns a pointer to a
        !           887: \fBpcre_extra\fP block, in which the \fIstudy_data\fP field points to the
        !           888: results of the study.
        !           889: .P
        !           890: The returned value from \fBpcre_study()\fP can be passed directly to
        !           891: \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. However, a \fBpcre_extra\fP block
        !           892: also contains other fields that can be set by the caller before the block is
        !           893: passed; these are described
        !           894: .\" HTML <a href="#extradata">
        !           895: .\" </a>
        !           896: below
        !           897: .\"
        !           898: in the section on matching a pattern.
        !           899: .P
        !           900: If studying the pattern does not produce any useful information,
        !           901: \fBpcre_study()\fP returns NULL. In that circumstance, if the calling program
        !           902: wants to pass any of the other fields to \fBpcre_exec()\fP or
        !           903: \fBpcre_dfa_exec()\fP, it must set up its own \fBpcre_extra\fP block.
        !           904: .P
        !           905: The second argument of \fBpcre_study()\fP contains option bits. There is only
        !           906: one option: PCRE_STUDY_JIT_COMPILE. If this is set, and the just-in-time
        !           907: compiler is available, the pattern is further compiled into machine code that
        !           908: executes much faster than the \fBpcre_exec()\fP matching function. If
        !           909: the just-in-time compiler is not available, this option is ignored. All other
        !           910: bits in the \fIoptions\fP argument must be zero.
        !           911: .P
        !           912: JIT compilation is a heavyweight optimization. It can take some time for
        !           913: patterns to be analyzed, and for one-off matches and simple patterns the
        !           914: benefit of faster execution might be offset by a much slower study time.
        !           915: Not all patterns can be optimized by the JIT compiler. For those that cannot be
        !           916: handled, matching automatically falls back to the \fBpcre_exec()\fP
        !           917: interpreter. For more details, see the
        !           918: .\" HREF
        !           919: \fBpcrejit\fP
        !           920: .\"
        !           921: documentation.
        !           922: .P
        !           923: The third argument for \fBpcre_study()\fP is a pointer for an error message. If
        !           924: studying succeeds (even if no data is returned), the variable it points to is
        !           925: set to NULL. Otherwise it is set to point to a textual error message. This is a
        !           926: static string that is part of the library. You must not try to free it. You
        !           927: should test the error pointer for NULL after calling \fBpcre_study()\fP, to be
        !           928: sure that it has run successfully.
        !           929: .P
        !           930: When you are finished with a pattern, you can free the memory used for the
        !           931: study data by calling \fBpcre_free_study()\fP. This function was added to the
        !           932: API for release 8.20. For earlier versions, the memory could be freed with
        !           933: \fBpcre_free()\fP, just like the pattern itself. This will still work in cases
        !           934: where PCRE_STUDY_JIT_COMPILE is not used, but it is advisable to change to the
        !           935: new function when convenient.
        !           936: .P
        !           937: This is a typical way in which \fBpcre_study\fP() is used (except that in a
        !           938: real application there should be tests for errors):
        !           939: .sp
        !           940:   int rc;
        !           941:   pcre *re;
        !           942:   pcre_extra *sd;
        !           943:   re = pcre_compile("pattern", 0, &error, &erroroffset, NULL);
        !           944:   sd = pcre_study(
        !           945:     re,             /* result of pcre_compile() */
        !           946:     0,              /* no options */
        !           947:     &error);        /* set to NULL or points to a message */
        !           948:   rc = pcre_exec(   /* see below for details of pcre_exec() options */
        !           949:     re, sd, "subject", 7, 0, 0, ovector, 30);
        !           950:   ...
        !           951:   pcre_free_study(sd);
        !           952:   pcre_free(re);
        !           953: .sp
        !           954: Studying a pattern does two things: first, a lower bound for the length of
        !           955: subject string that is needed to match the pattern is computed. This does not
        !           956: mean that there are any strings of that length that match, but it does
        !           957: guarantee that no shorter strings match. The value is used by
        !           958: \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP to avoid wasting time by trying to
        !           959: match strings that are shorter than the lower bound. You can find out the value
        !           960: in a calling program via the \fBpcre_fullinfo()\fP function.
        !           961: .P
        !           962: Studying a pattern is also useful for non-anchored patterns that do not have a
        !           963: single fixed starting character. A bitmap of possible starting bytes is
        !           964: created. This speeds up finding a position in the subject at which to start
        !           965: matching.
        !           966: .P
        !           967: These two optimizations apply to both \fBpcre_exec()\fP and
        !           968: \fBpcre_dfa_exec()\fP. However, they are not used by \fBpcre_exec()\fP if
        !           969: \fBpcre_study()\fP is called with the PCRE_STUDY_JIT_COMPILE option, and
        !           970: just-in-time compiling is successful. The optimizations can be disabled by
        !           971: setting the PCRE_NO_START_OPTIMIZE option when calling \fBpcre_exec()\fP or
        !           972: \fBpcre_dfa_exec()\fP. You might want to do this if your pattern contains
        !           973: callouts or (*MARK) (which cannot be handled by the JIT compiler), and you want
        !           974: to make use of these facilities in cases where matching fails. See the
        !           975: discussion of PCRE_NO_START_OPTIMIZE
        !           976: .\" HTML <a href="#execoptions">
        !           977: .\" </a>
        !           978: below.
        !           979: .\"
        !           980: .
        !           981: .
        !           982: .\" HTML <a name="localesupport"></a>
        !           983: .SH "LOCALE SUPPORT"
        !           984: .rs
        !           985: .sp
        !           986: PCRE handles caseless matching, and determines whether characters are letters,
        !           987: digits, or whatever, by reference to a set of tables, indexed by character
        !           988: value. When running in UTF-8 mode, this applies only to characters with codes
        !           989: less than 128. By default, higher-valued codes never match escapes such as \ew
        !           990: or \ed, but they can be tested with \ep if PCRE is built with Unicode character
        !           991: property support. Alternatively, the PCRE_UCP option can be set at compile
        !           992: time; this causes \ew and friends to use Unicode property support instead of
        !           993: built-in tables. The use of locales with Unicode is discouraged. If you are
        !           994: handling characters with codes greater than 128, you should either use UTF-8
        !           995: and Unicode, or use locales, but not try to mix the two.
        !           996: .P
        !           997: PCRE contains an internal set of tables that are used when the final argument
        !           998: of \fBpcre_compile()\fP is NULL. These are sufficient for many applications.
        !           999: Normally, the internal tables recognize only ASCII characters. However, when
        !          1000: PCRE is built, it is possible to cause the internal tables to be rebuilt in the
        !          1001: default "C" locale of the local system, which may cause them to be different.
        !          1002: .P
        !          1003: The internal tables can always be overridden by tables supplied by the
        !          1004: application that calls PCRE. These may be created in a different locale from
        !          1005: the default. As more and more applications change to using Unicode, the need
        !          1006: for this locale support is expected to die away.
        !          1007: .P
        !          1008: External tables are built by calling the \fBpcre_maketables()\fP function,
        !          1009: which has no arguments, in the relevant locale. The result can then be passed
        !          1010: to \fBpcre_compile()\fP or \fBpcre_exec()\fP as often as necessary. For
        !          1011: example, to build and use tables that are appropriate for the French locale
        !          1012: (where accented characters with values greater than 128 are treated as letters),
        !          1013: the following code could be used:
        !          1014: .sp
        !          1015:   setlocale(LC_CTYPE, "fr_FR");
        !          1016:   tables = pcre_maketables();
        !          1017:   re = pcre_compile(..., tables);
        !          1018: .sp
        !          1019: The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
        !          1020: are using Windows, the name for the French locale is "french".
        !          1021: .P
        !          1022: When \fBpcre_maketables()\fP runs, the tables are built in memory that is
        !          1023: obtained via \fBpcre_malloc\fP. It is the caller's responsibility to ensure
        !          1024: that the memory containing the tables remains available for as long as it is
        !          1025: needed.
        !          1026: .P
        !          1027: The pointer that is passed to \fBpcre_compile()\fP is saved with the compiled
        !          1028: pattern, and the same tables are used via this pointer by \fBpcre_study()\fP
        !          1029: and normally also by \fBpcre_exec()\fP. Thus, by default, for any single
        !          1030: pattern, compilation, studying and matching all happen in the same locale, but
        !          1031: different patterns can be compiled in different locales.
        !          1032: .P
        !          1033: It is possible to pass a table pointer or NULL (indicating the use of the
        !          1034: internal tables) to \fBpcre_exec()\fP. Although not intended for this purpose,
        !          1035: this facility could be used to match a pattern in a different locale from the
        !          1036: one in which it was compiled. Passing table pointers at run time is discussed
        !          1037: below in the section on matching a pattern.
        !          1038: .
        !          1039: .
        !          1040: .\" HTML <a name="infoaboutpattern"></a>
        !          1041: .SH "INFORMATION ABOUT A PATTERN"
        !          1042: .rs
        !          1043: .sp
        !          1044: .B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
        !          1045: .ti +5n
        !          1046: .B int \fIwhat\fP, void *\fIwhere\fP);
        !          1047: .PP
        !          1048: The \fBpcre_fullinfo()\fP function returns information about a compiled
        !          1049: pattern. It replaces the obsolete \fBpcre_info()\fP function, which is
        !          1050: nevertheless retained for backwards compability (and is documented below).
        !          1051: .P
        !          1052: The first argument for \fBpcre_fullinfo()\fP is a pointer to the compiled
        !          1053: pattern. The second argument is the result of \fBpcre_study()\fP, or NULL if
        !          1054: the pattern was not studied. The third argument specifies which piece of
        !          1055: information is required, and the fourth argument is a pointer to a variable
        !          1056: to receive the data. The yield of the function is zero for success, or one of
        !          1057: the following negative numbers:
        !          1058: .sp
        !          1059:   PCRE_ERROR_NULL       the argument \fIcode\fP was NULL
        !          1060:                         the argument \fIwhere\fP was NULL
        !          1061:   PCRE_ERROR_BADMAGIC   the "magic number" was not found
        !          1062:   PCRE_ERROR_BADOPTION  the value of \fIwhat\fP was invalid
        !          1063: .sp
        !          1064: The "magic number" is placed at the start of each compiled pattern as an simple
        !          1065: check against passing an arbitrary memory pointer. Here is a typical call of
        !          1066: \fBpcre_fullinfo()\fP, to obtain the length of the compiled pattern:
        !          1067: .sp
        !          1068:   int rc;
        !          1069:   size_t length;
        !          1070:   rc = pcre_fullinfo(
        !          1071:     re,               /* result of pcre_compile() */
        !          1072:     sd,               /* result of pcre_study(), or NULL */
        !          1073:     PCRE_INFO_SIZE,   /* what is required */
        !          1074:     &length);         /* where to put the data */
        !          1075: .sp
        !          1076: The possible values for the third argument are defined in \fBpcre.h\fP, and are
        !          1077: as follows:
        !          1078: .sp
        !          1079:   PCRE_INFO_BACKREFMAX
        !          1080: .sp
        !          1081: Return the number of the highest back reference in the pattern. The fourth
        !          1082: argument should point to an \fBint\fP variable. Zero is returned if there are
        !          1083: no back references.
        !          1084: .sp
        !          1085:   PCRE_INFO_CAPTURECOUNT
        !          1086: .sp
        !          1087: Return the number of capturing subpatterns in the pattern. The fourth argument
        !          1088: should point to an \fBint\fP variable.
        !          1089: .sp
        !          1090:   PCRE_INFO_DEFAULT_TABLES
        !          1091: .sp
        !          1092: Return a pointer to the internal default character tables within PCRE. The
        !          1093: fourth argument should point to an \fBunsigned char *\fP variable. This
        !          1094: information call is provided for internal use by the \fBpcre_study()\fP
        !          1095: function. External callers can cause PCRE to use its internal tables by passing
        !          1096: a NULL table pointer.
        !          1097: .sp
        !          1098:   PCRE_INFO_FIRSTBYTE
        !          1099: .sp
        !          1100: Return information about the first byte of any matched string, for a
        !          1101: non-anchored pattern. The fourth argument should point to an \fBint\fP
        !          1102: variable. (This option used to be called PCRE_INFO_FIRSTCHAR; the old name is
        !          1103: still recognized for backwards compatibility.)
        !          1104: .P
        !          1105: If there is a fixed first byte, for example, from a pattern such as
        !          1106: (cat|cow|coyote), its value is returned. Otherwise, if either
        !          1107: .sp
        !          1108: (a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
        !          1109: starts with "^", or
        !          1110: .sp
        !          1111: (b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
        !          1112: (if it were set, the pattern would be anchored),
        !          1113: .sp
        !          1114: -1 is returned, indicating that the pattern matches only at the start of a
        !          1115: subject string or after any newline within the string. Otherwise -2 is
        !          1116: returned. For anchored patterns, -2 is returned.
        !          1117: .sp
        !          1118:   PCRE_INFO_FIRSTTABLE
        !          1119: .sp
        !          1120: If the pattern was studied, and this resulted in the construction of a 256-bit
        !          1121: table indicating a fixed set of bytes for the first byte in any matching
        !          1122: string, a pointer to the table is returned. Otherwise NULL is returned. The
        !          1123: fourth argument should point to an \fBunsigned char *\fP variable.
        !          1124: .sp
        !          1125:   PCRE_INFO_HASCRORLF
        !          1126: .sp
        !          1127: Return 1 if the pattern contains any explicit matches for CR or LF characters,
        !          1128: otherwise 0. The fourth argument should point to an \fBint\fP variable. An
        !          1129: explicit match is either a literal CR or LF character, or \er or \en.
        !          1130: .sp
        !          1131:   PCRE_INFO_JCHANGED
        !          1132: .sp
        !          1133: Return 1 if the (?J) or (?-J) option setting is used in the pattern, otherwise
        !          1134: 0. The fourth argument should point to an \fBint\fP variable. (?J) and
        !          1135: (?-J) set and unset the local PCRE_DUPNAMES option, respectively.
        !          1136: .sp
        !          1137:   PCRE_INFO_JIT
        !          1138: .sp
        !          1139: Return 1 if the pattern was studied with the PCRE_STUDY_JIT_COMPILE option, and
        !          1140: just-in-time compiling was successful. The fourth argument should point to an
        !          1141: \fBint\fP variable. A return value of 0 means that JIT support is not available
        !          1142: in this version of PCRE, or that the pattern was not studied with the
        !          1143: PCRE_STUDY_JIT_COMPILE option, or that the JIT compiler could not handle this
        !          1144: particular pattern. See the
        !          1145: .\" HREF
        !          1146: \fBpcrejit\fP
        !          1147: .\"
        !          1148: documentation for details of what can and cannot be handled.
        !          1149: .sp
        !          1150:   PCRE_INFO_JITSIZE
        !          1151: .sp
        !          1152: If the pattern was successfully studied with the PCRE_STUDY_JIT_COMPILE option,
        !          1153: return the size of the JIT compiled code, otherwise return zero. The fourth
        !          1154: argument should point to a \fBsize_t\fP variable.
        !          1155: .sp
        !          1156:   PCRE_INFO_LASTLITERAL
        !          1157: .sp
        !          1158: Return the value of the rightmost literal byte that must exist in any matched
        !          1159: string, other than at its start, if such a byte has been recorded. The fourth
        !          1160: argument should point to an \fBint\fP variable. If there is no such byte, -1 is
        !          1161: returned. For anchored patterns, a last literal byte is recorded only if it
        !          1162: follows something of variable length. For example, for the pattern
        !          1163: /^a\ed+z\ed+/ the returned value is "z", but for /^a\edz\ed/ the returned value
        !          1164: is -1.
        !          1165: .sp
        !          1166:   PCRE_INFO_MINLENGTH
        !          1167: .sp
        !          1168: If the pattern was studied and a minimum length for matching subject strings
        !          1169: was computed, its value is returned. Otherwise the returned value is -1. The
        !          1170: value is a number of characters, not bytes (this may be relevant in UTF-8
        !          1171: mode). The fourth argument should point to an \fBint\fP variable. A
        !          1172: non-negative value is a lower bound to the length of any matching string. There
        !          1173: may not be any strings of that length that do actually match, but every string
        !          1174: that does match is at least that long.
        !          1175: .sp
        !          1176:   PCRE_INFO_NAMECOUNT
        !          1177:   PCRE_INFO_NAMEENTRYSIZE
        !          1178:   PCRE_INFO_NAMETABLE
        !          1179: .sp
        !          1180: PCRE supports the use of named as well as numbered capturing parentheses. The
        !          1181: names are just an additional way of identifying the parentheses, which still
        !          1182: acquire numbers. Several convenience functions such as
        !          1183: \fBpcre_get_named_substring()\fP are provided for extracting captured
        !          1184: substrings by name. It is also possible to extract the data directly, by first
        !          1185: converting the name to a number in order to access the correct pointers in the
        !          1186: output vector (described with \fBpcre_exec()\fP below). To do the conversion,
        !          1187: you need to use the name-to-number map, which is described by these three
        !          1188: values.
        !          1189: .P
        !          1190: The map consists of a number of fixed-size entries. PCRE_INFO_NAMECOUNT gives
        !          1191: the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size of each
        !          1192: entry; both of these return an \fBint\fP value. The entry size depends on the
        !          1193: length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the first
        !          1194: entry of the table (a pointer to \fBchar\fP). The first two bytes of each entry
        !          1195: are the number of the capturing parenthesis, most significant byte first. The
        !          1196: rest of the entry is the corresponding name, zero terminated.
        !          1197: .P
        !          1198: The names are in alphabetical order. Duplicate names may appear if (?| is used
        !          1199: to create multiple groups with the same number, as described in the
        !          1200: .\" HTML <a href="pcrepattern.html#dupsubpatternnumber">
        !          1201: .\" </a>
        !          1202: section on duplicate subpattern numbers
        !          1203: .\"
        !          1204: in the
        !          1205: .\" HREF
        !          1206: \fBpcrepattern\fP
        !          1207: .\"
        !          1208: page. Duplicate names for subpatterns with different numbers are permitted only
        !          1209: if PCRE_DUPNAMES is set. In all cases of duplicate names, they appear in the
        !          1210: table in the order in which they were found in the pattern. In the absence of
        !          1211: (?| this is the order of increasing number; when (?| is used this is not
        !          1212: necessarily the case because later subpatterns may have lower numbers.
        !          1213: .P
        !          1214: As a simple example of the name/number table, consider the following pattern
        !          1215: (assume PCRE_EXTENDED is set, so white space - including newlines - is
        !          1216: ignored):
        !          1217: .sp
        !          1218: .\" JOIN
        !          1219:   (?<date> (?<year>(\ed\ed)?\ed\ed) -
        !          1220:   (?<month>\ed\ed) - (?<day>\ed\ed) )
        !          1221: .sp
        !          1222: There are four named subpatterns, so the table has four entries, and each entry
        !          1223: in the table is eight bytes long. The table is as follows, with non-printing
        !          1224: bytes shows in hexadecimal, and undefined bytes shown as ??:
        !          1225: .sp
        !          1226:   00 01 d  a  t  e  00 ??
        !          1227:   00 05 d  a  y  00 ?? ??
        !          1228:   00 04 m  o  n  t  h  00
        !          1229:   00 02 y  e  a  r  00 ??
        !          1230: .sp
        !          1231: When writing code to extract data from named subpatterns using the
        !          1232: name-to-number map, remember that the length of the entries is likely to be
        !          1233: different for each compiled pattern.
        !          1234: .sp
        !          1235:   PCRE_INFO_OKPARTIAL
        !          1236: .sp
        !          1237: Return 1 if the pattern can be used for partial matching with
        !          1238: \fBpcre_exec()\fP, otherwise 0. The fourth argument should point to an
        !          1239: \fBint\fP variable. From release 8.00, this always returns 1, because the
        !          1240: restrictions that previously applied to partial matching have been lifted. The
        !          1241: .\" HREF
        !          1242: \fBpcrepartial\fP
        !          1243: .\"
        !          1244: documentation gives details of partial matching.
        !          1245: .sp
        !          1246:   PCRE_INFO_OPTIONS
        !          1247: .sp
        !          1248: Return a copy of the options with which the pattern was compiled. The fourth
        !          1249: argument should point to an \fBunsigned long int\fP variable. These option bits
        !          1250: are those specified in the call to \fBpcre_compile()\fP, modified by any
        !          1251: top-level option settings at the start of the pattern itself. In other words,
        !          1252: they are the options that will be in force when matching starts. For example,
        !          1253: if the pattern /(?im)abc(?-i)d/ is compiled with the PCRE_EXTENDED option, the
        !          1254: result is PCRE_CASELESS, PCRE_MULTILINE, and PCRE_EXTENDED.
        !          1255: .P
        !          1256: A pattern is automatically anchored by PCRE if all of its top-level
        !          1257: alternatives begin with one of the following:
        !          1258: .sp
        !          1259:   ^     unless PCRE_MULTILINE is set
        !          1260:   \eA    always
        !          1261:   \eG    always
        !          1262: .\" JOIN
        !          1263:   .*    if PCRE_DOTALL is set and there are no back
        !          1264:           references to the subpattern in which .* appears
        !          1265: .sp
        !          1266: For such patterns, the PCRE_ANCHORED bit is set in the options returned by
        !          1267: \fBpcre_fullinfo()\fP.
        !          1268: .sp
        !          1269:   PCRE_INFO_SIZE
        !          1270: .sp
        !          1271: Return the size of the compiled pattern. The fourth argument should point to a
        !          1272: \fBsize_t\fP variable. This value does not include the size of the \fBpcre\fP
        !          1273: structure that is returned by \fBpcre_compile()\fP. The value that is passed as
        !          1274: the argument to \fBpcre_malloc()\fP when \fBpcre_compile()\fP is getting memory
        !          1275: in which to place the compiled data is the value returned by this option plus
        !          1276: the size of the \fBpcre\fP structure. Studying a compiled pattern, with or
        !          1277: without JIT, does not alter the value returned by this option.
        !          1278: .sp
        !          1279:   PCRE_INFO_STUDYSIZE
        !          1280: .sp
        !          1281: Return the size of the data block pointed to by the \fIstudy_data\fP field in a
        !          1282: \fBpcre_extra\fP block. If \fBpcre_extra\fP is NULL, or there is no study data,
        !          1283: zero is returned. The fourth argument should point to a \fBsize_t\fP variable.
        !          1284: The \fIstudy_data\fP field is set by \fBpcre_study()\fP to record information
        !          1285: that will speed up matching (see the section entitled
        !          1286: .\" HTML <a href="#studyingapattern">
        !          1287: .\" </a>
        !          1288: "Studying a pattern"
        !          1289: .\"
        !          1290: above). The format of the \fIstudy_data\fP block is private, but its length
        !          1291: is made available via this option so that it can be saved and restored (see the
        !          1292: .\" HREF
        !          1293: \fBpcreprecompile\fP
        !          1294: .\"
        !          1295: documentation for details).
        !          1296: .
        !          1297: .
        !          1298: .SH "OBSOLETE INFO FUNCTION"
        !          1299: .rs
        !          1300: .sp
        !          1301: .B int pcre_info(const pcre *\fIcode\fP, int *\fIoptptr\fP, int
        !          1302: .B *\fIfirstcharptr\fP);
        !          1303: .PP
        !          1304: The \fBpcre_info()\fP function is now obsolete because its interface is too
        !          1305: restrictive to return all the available data about a compiled pattern. New
        !          1306: programs should use \fBpcre_fullinfo()\fP instead. The yield of
        !          1307: \fBpcre_info()\fP is the number of capturing subpatterns, or one of the
        !          1308: following negative numbers:
        !          1309: .sp
        !          1310:   PCRE_ERROR_NULL       the argument \fIcode\fP was NULL
        !          1311:   PCRE_ERROR_BADMAGIC   the "magic number" was not found
        !          1312: .sp
        !          1313: If the \fIoptptr\fP argument is not NULL, a copy of the options with which the
        !          1314: pattern was compiled is placed in the integer it points to (see
        !          1315: PCRE_INFO_OPTIONS above).
        !          1316: .P
        !          1317: If the pattern is not anchored and the \fIfirstcharptr\fP argument is not NULL,
        !          1318: it is used to pass back information about the first character of any matched
        !          1319: string (see PCRE_INFO_FIRSTBYTE above).
        !          1320: .
        !          1321: .
        !          1322: .SH "REFERENCE COUNTS"
        !          1323: .rs
        !          1324: .sp
        !          1325: .B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
        !          1326: .PP
        !          1327: The \fBpcre_refcount()\fP function is used to maintain a reference count in the
        !          1328: data block that contains a compiled pattern. It is provided for the benefit of
        !          1329: applications that operate in an object-oriented manner, where different parts
        !          1330: of the application may be using the same compiled pattern, but you want to free
        !          1331: the block when they are all done.
        !          1332: .P
        !          1333: When a pattern is compiled, the reference count field is initialized to zero.
        !          1334: It is changed only by calling this function, whose action is to add the
        !          1335: \fIadjust\fP value (which may be positive or negative) to it. The yield of the
        !          1336: function is the new value. However, the value of the count is constrained to
        !          1337: lie between 0 and 65535, inclusive. If the new value is outside these limits,
        !          1338: it is forced to the appropriate limit value.
        !          1339: .P
        !          1340: Except when it is zero, the reference count is not correctly preserved if a
        !          1341: pattern is compiled on one host and then transferred to a host whose byte-order
        !          1342: is different. (This seems a highly unlikely scenario.)
        !          1343: .
        !          1344: .
        !          1345: .SH "MATCHING A PATTERN: THE TRADITIONAL FUNCTION"
        !          1346: .rs
        !          1347: .sp
        !          1348: .B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
        !          1349: .ti +5n
        !          1350: .B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
        !          1351: .ti +5n
        !          1352: .B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
        !          1353: .P
        !          1354: The function \fBpcre_exec()\fP is called to match a subject string against a
        !          1355: compiled pattern, which is passed in the \fIcode\fP argument. If the
        !          1356: pattern was studied, the result of the study should be passed in the
        !          1357: \fIextra\fP argument. You can call \fBpcre_exec()\fP with the same \fIcode\fP
        !          1358: and \fIextra\fP arguments as many times as you like, in order to match
        !          1359: different subject strings with the same pattern.
        !          1360: .P
        !          1361: This function is the main matching facility of the library, and it operates in
        !          1362: a Perl-like manner. For specialist use there is also an alternative matching
        !          1363: function, which is described
        !          1364: .\" HTML <a href="#dfamatch">
        !          1365: .\" </a>
        !          1366: below
        !          1367: .\"
        !          1368: in the section about the \fBpcre_dfa_exec()\fP function.
        !          1369: .P
        !          1370: In most applications, the pattern will have been compiled (and optionally
        !          1371: studied) in the same process that calls \fBpcre_exec()\fP. However, it is
        !          1372: possible to save compiled patterns and study data, and then use them later
        !          1373: in different processes, possibly even on different hosts. For a discussion
        !          1374: about this, see the
        !          1375: .\" HREF
        !          1376: \fBpcreprecompile\fP
        !          1377: .\"
        !          1378: documentation.
        !          1379: .P
        !          1380: Here is an example of a simple call to \fBpcre_exec()\fP:
        !          1381: .sp
        !          1382:   int rc;
        !          1383:   int ovector[30];
        !          1384:   rc = pcre_exec(
        !          1385:     re,             /* result of pcre_compile() */
        !          1386:     NULL,           /* we didn't study the pattern */
        !          1387:     "some string",  /* the subject string */
        !          1388:     11,             /* the length of the subject string */
        !          1389:     0,              /* start at offset 0 in the subject */
        !          1390:     0,              /* default options */
        !          1391:     ovector,        /* vector of integers for substring information */
        !          1392:     30);            /* number of elements (NOT size in bytes) */
        !          1393: .
        !          1394: .
        !          1395: .\" HTML <a name="extradata"></a>
        !          1396: .SS "Extra data for \fBpcre_exec()\fR"
        !          1397: .rs
        !          1398: .sp
        !          1399: If the \fIextra\fP argument is not NULL, it must point to a \fBpcre_extra\fP
        !          1400: data block. The \fBpcre_study()\fP function returns such a block (when it
        !          1401: doesn't return NULL), but you can also create one for yourself, and pass
        !          1402: additional information in it. The \fBpcre_extra\fP block contains the following
        !          1403: fields (not necessarily in this order):
        !          1404: .sp
        !          1405:   unsigned long int \fIflags\fP;
        !          1406:   void *\fIstudy_data\fP;
        !          1407:   void *\fIexecutable_jit\fP;
        !          1408:   unsigned long int \fImatch_limit\fP;
        !          1409:   unsigned long int \fImatch_limit_recursion\fP;
        !          1410:   void *\fIcallout_data\fP;
        !          1411:   const unsigned char *\fItables\fP;
        !          1412:   unsigned char **\fImark\fP;
        !          1413: .sp
        !          1414: The \fIflags\fP field is a bitmap that specifies which of the other fields
        !          1415: are set. The flag bits are:
        !          1416: .sp
        !          1417:   PCRE_EXTRA_STUDY_DATA
        !          1418:   PCRE_EXTRA_EXECUTABLE_JIT
        !          1419:   PCRE_EXTRA_MATCH_LIMIT
        !          1420:   PCRE_EXTRA_MATCH_LIMIT_RECURSION
        !          1421:   PCRE_EXTRA_CALLOUT_DATA
        !          1422:   PCRE_EXTRA_TABLES
        !          1423:   PCRE_EXTRA_MARK
        !          1424: .sp
        !          1425: Other flag bits should be set to zero. The \fIstudy_data\fP field and sometimes
        !          1426: the \fIexecutable_jit\fP field are set in the \fBpcre_extra\fP block that is
        !          1427: returned by \fBpcre_study()\fP, together with the appropriate flag bits. You
        !          1428: should not set these yourself, but you may add to the block by setting the
        !          1429: other fields and their corresponding flag bits.
        !          1430: .P
        !          1431: The \fImatch_limit\fP field provides a means of preventing PCRE from using up a
        !          1432: vast amount of resources when running patterns that are not going to match,
        !          1433: but which have a very large number of possibilities in their search trees. The
        !          1434: classic example is a pattern that uses nested unlimited repeats.
        !          1435: .P
        !          1436: Internally, \fBpcre_exec()\fP uses a function called \fBmatch()\fP, which it
        !          1437: calls repeatedly (sometimes recursively). The limit set by \fImatch_limit\fP is
        !          1438: imposed on the number of times this function is called during a match, which
        !          1439: has the effect of limiting the amount of backtracking that can take place. For
        !          1440: patterns that are not anchored, the count restarts from zero for each position
        !          1441: in the subject string.
        !          1442: .P
        !          1443: When \fBpcre_exec()\fP is called with a pattern that was successfully studied
        !          1444: with the PCRE_STUDY_JIT_COMPILE option, the way that the matching is executed
        !          1445: is entirely different. However, there is still the possibility of runaway
        !          1446: matching that goes on for a very long time, and so the \fImatch_limit\fP value
        !          1447: is also used in this case (but in a different way) to limit how long the
        !          1448: matching can continue.
        !          1449: .P
        !          1450: The default value for the limit can be set when PCRE is built; the default
        !          1451: default is 10 million, which handles all but the most extreme cases. You can
        !          1452: override the default by suppling \fBpcre_exec()\fP with a \fBpcre_extra\fP
        !          1453: block in which \fImatch_limit\fP is set, and PCRE_EXTRA_MATCH_LIMIT is set in
        !          1454: the \fIflags\fP field. If the limit is exceeded, \fBpcre_exec()\fP returns
        !          1455: PCRE_ERROR_MATCHLIMIT.
        !          1456: .P
        !          1457: The \fImatch_limit_recursion\fP field is similar to \fImatch_limit\fP, but
        !          1458: instead of limiting the total number of times that \fBmatch()\fP is called, it
        !          1459: limits the depth of recursion. The recursion depth is a smaller number than the
        !          1460: total number of calls, because not all calls to \fBmatch()\fP are recursive.
        !          1461: This limit is of use only if it is set smaller than \fImatch_limit\fP.
        !          1462: .P
        !          1463: Limiting the recursion depth limits the amount of machine stack that can be
        !          1464: used, or, when PCRE has been compiled to use memory on the heap instead of the
        !          1465: stack, the amount of heap memory that can be used. This limit is not relevant,
        !          1466: and is ignored, if the pattern was successfully studied with
        !          1467: PCRE_STUDY_JIT_COMPILE.
        !          1468: .P
        !          1469: The default value for \fImatch_limit_recursion\fP can be set when PCRE is
        !          1470: built; the default default is the same value as the default for
        !          1471: \fImatch_limit\fP. You can override the default by suppling \fBpcre_exec()\fP
        !          1472: with a \fBpcre_extra\fP block in which \fImatch_limit_recursion\fP is set, and
        !          1473: PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the \fIflags\fP field. If the limit
        !          1474: is exceeded, \fBpcre_exec()\fP returns PCRE_ERROR_RECURSIONLIMIT.
        !          1475: .P
        !          1476: The \fIcallout_data\fP field is used in conjunction with the "callout" feature,
        !          1477: and is described in the
        !          1478: .\" HREF
        !          1479: \fBpcrecallout\fP
        !          1480: .\"
        !          1481: documentation.
        !          1482: .P
        !          1483: The \fItables\fP field is used to pass a character tables pointer to
        !          1484: \fBpcre_exec()\fP; this overrides the value that is stored with the compiled
        !          1485: pattern. A non-NULL value is stored with the compiled pattern only if custom
        !          1486: tables were supplied to \fBpcre_compile()\fP via its \fItableptr\fP argument.
        !          1487: If NULL is passed to \fBpcre_exec()\fP using this mechanism, it forces PCRE's
        !          1488: internal tables to be used. This facility is helpful when re-using patterns
        !          1489: that have been saved after compiling with an external set of tables, because
        !          1490: the external tables might be at a different address when \fBpcre_exec()\fP is
        !          1491: called. See the
        !          1492: .\" HREF
        !          1493: \fBpcreprecompile\fP
        !          1494: .\"
        !          1495: documentation for a discussion of saving compiled patterns for later use.
        !          1496: .P
        !          1497: If PCRE_EXTRA_MARK is set in the \fIflags\fP field, the \fImark\fP field must
        !          1498: be set to point to a \fBchar *\fP variable. If the pattern contains any
        !          1499: backtracking control verbs such as (*MARK:NAME), and the execution ends up with
        !          1500: a name to pass back, a pointer to the name string (zero terminated) is placed
        !          1501: in the variable pointed to by the \fImark\fP field. The names are within the
        !          1502: compiled pattern; if you wish to retain such a name you must copy it before
        !          1503: freeing the memory of a compiled pattern. If there is no name to pass back, the
        !          1504: variable pointed to by the \fImark\fP field set to NULL. For details of the
        !          1505: backtracking control verbs, see the section entitled
        !          1506: .\" HTML <a href="pcrepattern#backtrackcontrol">
        !          1507: .\" </a>
        !          1508: "Backtracking control"
        !          1509: .\"
        !          1510: in the
        !          1511: .\" HREF
        !          1512: \fBpcrepattern\fP
        !          1513: .\"
        !          1514: documentation.
        !          1515: .
        !          1516: .
        !          1517: .\" HTML <a name="execoptions"></a>
        !          1518: .SS "Option bits for \fBpcre_exec()\fP"
        !          1519: .rs
        !          1520: .sp
        !          1521: The unused bits of the \fIoptions\fP argument for \fBpcre_exec()\fP must be
        !          1522: zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_\fIxxx\fP,
        !          1523: PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
        !          1524: PCRE_NO_START_OPTIMIZE, PCRE_NO_UTF8_CHECK, PCRE_PARTIAL_SOFT, and
        !          1525: PCRE_PARTIAL_HARD.
        !          1526: .P
        !          1527: If the pattern was successfully studied with the PCRE_STUDY_JIT_COMPILE option,
        !          1528: the only supported options for JIT execution are PCRE_NO_UTF8_CHECK,
        !          1529: PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, and PCRE_NOTEMPTY_ATSTART. Note in
        !          1530: particular that partial matching is not supported. If an unsupported option is
        !          1531: used, JIT execution is disabled and the normal interpretive code in
        !          1532: \fBpcre_exec()\fP is run.
        !          1533: .sp
        !          1534:   PCRE_ANCHORED
        !          1535: .sp
        !          1536: The PCRE_ANCHORED option limits \fBpcre_exec()\fP to matching at the first
        !          1537: matching position. If a pattern was compiled with PCRE_ANCHORED, or turned out
        !          1538: to be anchored by virtue of its contents, it cannot be made unachored at
        !          1539: matching time.
        !          1540: .sp
        !          1541:   PCRE_BSR_ANYCRLF
        !          1542:   PCRE_BSR_UNICODE
        !          1543: .sp
        !          1544: These options (which are mutually exclusive) control what the \eR escape
        !          1545: sequence matches. The choice is either to match only CR, LF, or CRLF, or to
        !          1546: match any Unicode newline sequence. These options override the choice that was
        !          1547: made or defaulted when the pattern was compiled.
        !          1548: .sp
        !          1549:   PCRE_NEWLINE_CR
        !          1550:   PCRE_NEWLINE_LF
        !          1551:   PCRE_NEWLINE_CRLF
        !          1552:   PCRE_NEWLINE_ANYCRLF
        !          1553:   PCRE_NEWLINE_ANY
        !          1554: .sp
        !          1555: These options override the newline definition that was chosen or defaulted when
        !          1556: the pattern was compiled. For details, see the description of
        !          1557: \fBpcre_compile()\fP above. During matching, the newline choice affects the
        !          1558: behaviour of the dot, circumflex, and dollar metacharacters. It may also alter
        !          1559: the way the match position is advanced after a match failure for an unanchored
        !          1560: pattern.
        !          1561: .P
        !          1562: When PCRE_NEWLINE_CRLF, PCRE_NEWLINE_ANYCRLF, or PCRE_NEWLINE_ANY is set, and a
        !          1563: match attempt for an unanchored pattern fails when the current position is at a
        !          1564: CRLF sequence, and the pattern contains no explicit matches for CR or LF
        !          1565: characters, the match position is advanced by two characters instead of one, in
        !          1566: other words, to after the CRLF.
        !          1567: .P
        !          1568: The above rule is a compromise that makes the most common cases work as
        !          1569: expected. For example, if the pattern is .+A (and the PCRE_DOTALL option is not
        !          1570: set), it does not match the string "\er\enA" because, after failing at the
        !          1571: start, it skips both the CR and the LF before retrying. However, the pattern
        !          1572: [\er\en]A does match that string, because it contains an explicit CR or LF
        !          1573: reference, and so advances only by one character after the first failure.
        !          1574: .P
        !          1575: An explicit match for CR of LF is either a literal appearance of one of those
        !          1576: characters, or one of the \er or \en escape sequences. Implicit matches such as
        !          1577: [^X] do not count, nor does \es (which includes CR and LF in the characters
        !          1578: that it matches).
        !          1579: .P
        !          1580: Notwithstanding the above, anomalous effects may still occur when CRLF is a
        !          1581: valid newline sequence and explicit \er or \en escapes appear in the pattern.
        !          1582: .sp
        !          1583:   PCRE_NOTBOL
        !          1584: .sp
        !          1585: This option specifies that first character of the subject string is not the
        !          1586: beginning of a line, so the circumflex metacharacter should not match before
        !          1587: it. Setting this without PCRE_MULTILINE (at compile time) causes circumflex
        !          1588: never to match. This option affects only the behaviour of the circumflex
        !          1589: metacharacter. It does not affect \eA.
        !          1590: .sp
        !          1591:   PCRE_NOTEOL
        !          1592: .sp
        !          1593: This option specifies that the end of the subject string is not the end of a
        !          1594: line, so the dollar metacharacter should not match it nor (except in multiline
        !          1595: mode) a newline immediately before it. Setting this without PCRE_MULTILINE (at
        !          1596: compile time) causes dollar never to match. This option affects only the
        !          1597: behaviour of the dollar metacharacter. It does not affect \eZ or \ez.
        !          1598: .sp
        !          1599:   PCRE_NOTEMPTY
        !          1600: .sp
        !          1601: An empty string is not considered to be a valid match if this option is set. If
        !          1602: there are alternatives in the pattern, they are tried. If all the alternatives
        !          1603: match the empty string, the entire match fails. For example, if the pattern
        !          1604: .sp
        !          1605:   a?b?
        !          1606: .sp
        !          1607: is applied to a string not beginning with "a" or "b", it matches an empty
        !          1608: string at the start of the subject. With PCRE_NOTEMPTY set, this match is not
        !          1609: valid, so PCRE searches further into the string for occurrences of "a" or "b".
        !          1610: .sp
        !          1611:   PCRE_NOTEMPTY_ATSTART
        !          1612: .sp
        !          1613: This is like PCRE_NOTEMPTY, except that an empty string match that is not at
        !          1614: the start of the subject is permitted. If the pattern is anchored, such a match
        !          1615: can occur only if the pattern contains \eK.
        !          1616: .P
        !          1617: Perl has no direct equivalent of PCRE_NOTEMPTY or PCRE_NOTEMPTY_ATSTART, but it
        !          1618: does make a special case of a pattern match of the empty string within its
        !          1619: \fBsplit()\fP function, and when using the /g modifier. It is possible to
        !          1620: emulate Perl's behaviour after matching a null string by first trying the match
        !          1621: again at the same offset with PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED, and then
        !          1622: if that fails, by advancing the starting offset (see below) and trying an
        !          1623: ordinary match again. There is some code that demonstrates how to do this in
        !          1624: the
        !          1625: .\" HREF
        !          1626: \fBpcredemo\fP
        !          1627: .\"
        !          1628: sample program. In the most general case, you have to check to see if the
        !          1629: newline convention recognizes CRLF as a newline, and if so, and the current
        !          1630: character is CR followed by LF, advance the starting offset by two characters
        !          1631: instead of one.
        !          1632: .sp
        !          1633:   PCRE_NO_START_OPTIMIZE
        !          1634: .sp
        !          1635: There are a number of optimizations that \fBpcre_exec()\fP uses at the start of
        !          1636: a match, in order to speed up the process. For example, if it is known that an
        !          1637: unanchored match must start with a specific character, it searches the subject
        !          1638: for that character, and fails immediately if it cannot find it, without
        !          1639: actually running the main matching function. This means that a special item
        !          1640: such as (*COMMIT) at the start of a pattern is not considered until after a
        !          1641: suitable starting point for the match has been found. When callouts or (*MARK)
        !          1642: items are in use, these "start-up" optimizations can cause them to be skipped
        !          1643: if the pattern is never actually used. The start-up optimizations are in effect
        !          1644: a pre-scan of the subject that takes place before the pattern is run.
        !          1645: .P
        !          1646: The PCRE_NO_START_OPTIMIZE option disables the start-up optimizations, possibly
        !          1647: causing performance to suffer, but ensuring that in cases where the result is
        !          1648: "no match", the callouts do occur, and that items such as (*COMMIT) and (*MARK)
        !          1649: are considered at every possible starting position in the subject string. If
        !          1650: PCRE_NO_START_OPTIMIZE is set at compile time, it cannot be unset at matching
        !          1651: time.
        !          1652: .P
        !          1653: Setting PCRE_NO_START_OPTIMIZE can change the outcome of a matching operation.
        !          1654: Consider the pattern
        !          1655: .sp
        !          1656:   (*COMMIT)ABC
        !          1657: .sp
        !          1658: When this is compiled, PCRE records the fact that a match must start with the
        !          1659: character "A". Suppose the subject string is "DEFABC". The start-up
        !          1660: optimization scans along the subject, finds "A" and runs the first match
        !          1661: attempt from there. The (*COMMIT) item means that the pattern must match the
        !          1662: current starting position, which in this case, it does. However, if the same
        !          1663: match is run with PCRE_NO_START_OPTIMIZE set, the initial scan along the
        !          1664: subject string does not happen. The first match attempt is run starting from
        !          1665: "D" and when this fails, (*COMMIT) prevents any further matches being tried, so
        !          1666: the overall result is "no match". If the pattern is studied, more start-up
        !          1667: optimizations may be used. For example, a minimum length for the subject may be
        !          1668: recorded. Consider the pattern
        !          1669: .sp
        !          1670:   (*MARK:A)(X|Y)
        !          1671: .sp
        !          1672: The minimum length for a match is one character. If the subject is "ABC", there
        !          1673: will be attempts to match "ABC", "BC", "C", and then finally an empty string.
        !          1674: If the pattern is studied, the final attempt does not take place, because PCRE
        !          1675: knows that the subject is too short, and so the (*MARK) is never encountered.
        !          1676: In this case, studying the pattern does not affect the overall match result,
        !          1677: which is still "no match", but it does affect the auxiliary information that is
        !          1678: returned.
        !          1679: .sp
        !          1680:   PCRE_NO_UTF8_CHECK
        !          1681: .sp
        !          1682: When PCRE_UTF8 is set at compile time, the validity of the subject as a UTF-8
        !          1683: string is automatically checked when \fBpcre_exec()\fP is subsequently called.
        !          1684: The value of \fIstartoffset\fP is also checked to ensure that it points to the
        !          1685: start of a UTF-8 character. There is a discussion about the validity of UTF-8
        !          1686: strings in the
        !          1687: .\" HTML <a href="pcre.html#utf8strings">
        !          1688: .\" </a>
        !          1689: section on UTF-8 support
        !          1690: .\"
        !          1691: in the main
        !          1692: .\" HREF
        !          1693: \fBpcre\fP
        !          1694: .\"
        !          1695: page. If an invalid UTF-8 sequence of bytes is found, \fBpcre_exec()\fP returns
        !          1696: the error PCRE_ERROR_BADUTF8 or, if PCRE_PARTIAL_HARD is set and the problem is
        !          1697: a truncated UTF-8 character at the end of the subject, PCRE_ERROR_SHORTUTF8. In
        !          1698: both cases, information about the precise nature of the error may also be
        !          1699: returned (see the descriptions of these errors in the section entitled \fIError
        !          1700: return values from\fP \fBpcre_exec()\fP
        !          1701: .\" HTML <a href="#errorlist">
        !          1702: .\" </a>
        !          1703: below).
        !          1704: .\"
        !          1705: If \fIstartoffset\fP contains a value that does not point to the start of a
        !          1706: UTF-8 character (or to the end of the subject), PCRE_ERROR_BADUTF8_OFFSET is
        !          1707: returned.
        !          1708: .P
        !          1709: If you already know that your subject is valid, and you want to skip these
        !          1710: checks for performance reasons, you can set the PCRE_NO_UTF8_CHECK option when
        !          1711: calling \fBpcre_exec()\fP. You might want to do this for the second and
        !          1712: subsequent calls to \fBpcre_exec()\fP if you are making repeated calls to find
        !          1713: all the matches in a single subject string. However, you should be sure that
        !          1714: the value of \fIstartoffset\fP points to the start of a UTF-8 character (or the
        !          1715: end of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
        !          1716: invalid UTF-8 string as a subject or an invalid value of \fIstartoffset\fP is
        !          1717: undefined. Your program may crash.
        !          1718: .sp
        !          1719:   PCRE_PARTIAL_HARD
        !          1720:   PCRE_PARTIAL_SOFT
        !          1721: .sp
        !          1722: These options turn on the partial matching feature. For backwards
        !          1723: compatibility, PCRE_PARTIAL is a synonym for PCRE_PARTIAL_SOFT. A partial match
        !          1724: occurs if the end of the subject string is reached successfully, but there are
        !          1725: not enough subject characters to complete the match. If this happens when
        !          1726: PCRE_PARTIAL_SOFT (but not PCRE_PARTIAL_HARD) is set, matching continues by
        !          1727: testing any remaining alternatives. Only if no complete match can be found is
        !          1728: PCRE_ERROR_PARTIAL returned instead of PCRE_ERROR_NOMATCH. In other words,
        !          1729: PCRE_PARTIAL_SOFT says that the caller is prepared to handle a partial match,
        !          1730: but only if no complete match can be found.
        !          1731: .P
        !          1732: If PCRE_PARTIAL_HARD is set, it overrides PCRE_PARTIAL_SOFT. In this case, if a
        !          1733: partial match is found, \fBpcre_exec()\fP immediately returns
        !          1734: PCRE_ERROR_PARTIAL, without considering any other alternatives. In other words,
        !          1735: when PCRE_PARTIAL_HARD is set, a partial match is considered to be more
        !          1736: important that an alternative complete match.
        !          1737: .P
        !          1738: In both cases, the portion of the string that was inspected when the partial
        !          1739: match was found is set as the first matching string. There is a more detailed
        !          1740: discussion of partial and multi-segment matching, with examples, in the
        !          1741: .\" HREF
        !          1742: \fBpcrepartial\fP
        !          1743: .\"
        !          1744: documentation.
        !          1745: .
        !          1746: .
        !          1747: .SS "The string to be matched by \fBpcre_exec()\fP"
        !          1748: .rs
        !          1749: .sp
        !          1750: The subject string is passed to \fBpcre_exec()\fP as a pointer in
        !          1751: \fIsubject\fP, a length (in bytes) in \fIlength\fP, and a starting byte offset
        !          1752: in \fIstartoffset\fP. If this is negative or greater than the length of the
        !          1753: subject, \fBpcre_exec()\fP returns PCRE_ERROR_BADOFFSET. When the starting
        !          1754: offset is zero, the search for a match starts at the beginning of the subject,
        !          1755: and this is by far the most common case. In UTF-8 mode, the byte offset must
        !          1756: point to the start of a UTF-8 character (or the end of the subject). Unlike the
        !          1757: pattern string, the subject may contain binary zero bytes.
        !          1758: .P
        !          1759: A non-zero starting offset is useful when searching for another match in the
        !          1760: same subject by calling \fBpcre_exec()\fP again after a previous success.
        !          1761: Setting \fIstartoffset\fP differs from just passing over a shortened string and
        !          1762: setting PCRE_NOTBOL in the case of a pattern that begins with any kind of
        !          1763: lookbehind. For example, consider the pattern
        !          1764: .sp
        !          1765:   \eBiss\eB
        !          1766: .sp
        !          1767: which finds occurrences of "iss" in the middle of words. (\eB matches only if
        !          1768: the current position in the subject is not a word boundary.) When applied to
        !          1769: the string "Mississipi" the first call to \fBpcre_exec()\fP finds the first
        !          1770: occurrence. If \fBpcre_exec()\fP is called again with just the remainder of the
        !          1771: subject, namely "issipi", it does not match, because \eB is always false at the
        !          1772: start of the subject, which is deemed to be a word boundary. However, if
        !          1773: \fBpcre_exec()\fP is passed the entire string again, but with \fIstartoffset\fP
        !          1774: set to 4, it finds the second occurrence of "iss" because it is able to look
        !          1775: behind the starting point to discover that it is preceded by a letter.
        !          1776: .P
        !          1777: Finding all the matches in a subject is tricky when the pattern can match an
        !          1778: empty string. It is possible to emulate Perl's /g behaviour by first trying the
        !          1779: match again at the same offset, with the PCRE_NOTEMPTY_ATSTART and
        !          1780: PCRE_ANCHORED options, and then if that fails, advancing the starting offset
        !          1781: and trying an ordinary match again. There is some code that demonstrates how to
        !          1782: do this in the
        !          1783: .\" HREF
        !          1784: \fBpcredemo\fP
        !          1785: .\"
        !          1786: sample program. In the most general case, you have to check to see if the
        !          1787: newline convention recognizes CRLF as a newline, and if so, and the current
        !          1788: character is CR followed by LF, advance the starting offset by two characters
        !          1789: instead of one.
        !          1790: .P
        !          1791: If a non-zero starting offset is passed when the pattern is anchored, one
        !          1792: attempt to match at the given offset is made. This can only succeed if the
        !          1793: pattern does not require the match to be at the start of the subject.
        !          1794: .
        !          1795: .
        !          1796: .SS "How \fBpcre_exec()\fP returns captured substrings"
        !          1797: .rs
        !          1798: .sp
        !          1799: In general, a pattern matches a certain portion of the subject, and in
        !          1800: addition, further substrings from the subject may be picked out by parts of the
        !          1801: pattern. Following the usage in Jeffrey Friedl's book, this is called
        !          1802: "capturing" in what follows, and the phrase "capturing subpattern" is used for
        !          1803: a fragment of a pattern that picks out a substring. PCRE supports several other
        !          1804: kinds of parenthesized subpattern that do not cause substrings to be captured.
        !          1805: .P
        !          1806: Captured substrings are returned to the caller via a vector of integers whose
        !          1807: address is passed in \fIovector\fP. The number of elements in the vector is
        !          1808: passed in \fIovecsize\fP, which must be a non-negative number. \fBNote\fP: this
        !          1809: argument is NOT the size of \fIovector\fP in bytes.
        !          1810: .P
        !          1811: The first two-thirds of the vector is used to pass back captured substrings,
        !          1812: each substring using a pair of integers. The remaining third of the vector is
        !          1813: used as workspace by \fBpcre_exec()\fP while matching capturing subpatterns,
        !          1814: and is not available for passing back information. The number passed in
        !          1815: \fIovecsize\fP should always be a multiple of three. If it is not, it is
        !          1816: rounded down.
        !          1817: .P
        !          1818: When a match is successful, information about captured substrings is returned
        !          1819: in pairs of integers, starting at the beginning of \fIovector\fP, and
        !          1820: continuing up to two-thirds of its length at the most. The first element of
        !          1821: each pair is set to the byte offset of the first character in a substring, and
        !          1822: the second is set to the byte offset of the first character after the end of a
        !          1823: substring. \fBNote\fP: these values are always byte offsets, even in UTF-8
        !          1824: mode. They are not character counts.
        !          1825: .P
        !          1826: The first pair of integers, \fIovector[0]\fP and \fIovector[1]\fP, identify the
        !          1827: portion of the subject string matched by the entire pattern. The next pair is
        !          1828: used for the first capturing subpattern, and so on. The value returned by
        !          1829: \fBpcre_exec()\fP is one more than the highest numbered pair that has been set.
        !          1830: For example, if two substrings have been captured, the returned value is 3. If
        !          1831: there are no capturing subpatterns, the return value from a successful match is
        !          1832: 1, indicating that just the first pair of offsets has been set.
        !          1833: .P
        !          1834: If a capturing subpattern is matched repeatedly, it is the last portion of the
        !          1835: string that it matched that is returned.
        !          1836: .P
        !          1837: If the vector is too small to hold all the captured substring offsets, it is
        !          1838: used as far as possible (up to two-thirds of its length), and the function
        !          1839: returns a value of zero. If neither the actual string matched not any captured
        !          1840: substrings are of interest, \fBpcre_exec()\fP may be called with \fIovector\fP
        !          1841: passed as NULL and \fIovecsize\fP as zero. However, if the pattern contains
        !          1842: back references and the \fIovector\fP is not big enough to remember the related
        !          1843: substrings, PCRE has to get additional memory for use during matching. Thus it
        !          1844: is usually advisable to supply an \fIovector\fP of reasonable size.
        !          1845: .P
        !          1846: There are some cases where zero is returned (indicating vector overflow) when
        !          1847: in fact the vector is exactly the right size for the final match. For example,
        !          1848: consider the pattern
        !          1849: .sp
        !          1850:   (a)(?:(b)c|bd)
        !          1851: .sp
        !          1852: If a vector of 6 elements (allowing for only 1 captured substring) is given
        !          1853: with subject string "abd", \fBpcre_exec()\fP will try to set the second
        !          1854: captured string, thereby recording a vector overflow, before failing to match
        !          1855: "c" and backing up to try the second alternative. The zero return, however,
        !          1856: does correctly indicate that the maximum number of slots (namely 2) have been
        !          1857: filled. In similar cases where there is temporary overflow, but the final
        !          1858: number of used slots is actually less than the maximum, a non-zero value is
        !          1859: returned.
        !          1860: .P
        !          1861: The \fBpcre_fullinfo()\fP function can be used to find out how many capturing
        !          1862: subpatterns there are in a compiled pattern. The smallest size for
        !          1863: \fIovector\fP that will allow for \fIn\fP captured substrings, in addition to
        !          1864: the offsets of the substring matched by the whole pattern, is (\fIn\fP+1)*3.
        !          1865: .P
        !          1866: It is possible for capturing subpattern number \fIn+1\fP to match some part of
        !          1867: the subject when subpattern \fIn\fP has not been used at all. For example, if
        !          1868: the string "abc" is matched against the pattern (a|(z))(bc) the return from the
        !          1869: function is 4, and subpatterns 1 and 3 are matched, but 2 is not. When this
        !          1870: happens, both values in the offset pairs corresponding to unused subpatterns
        !          1871: are set to -1.
        !          1872: .P
        !          1873: Offset values that correspond to unused subpatterns at the end of the
        !          1874: expression are also set to -1. For example, if the string "abc" is matched
        !          1875: against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not matched. The
        !          1876: return from the function is 2, because the highest used capturing subpattern
        !          1877: number is 1, and the offsets for for the second and third capturing subpatterns
        !          1878: (assuming the vector is large enough, of course) are set to -1.
        !          1879: .P
        !          1880: \fBNote\fP: Elements in the first two-thirds of \fIovector\fP that do not
        !          1881: correspond to capturing parentheses in the pattern are never changed. That is,
        !          1882: if a pattern contains \fIn\fP capturing parentheses, no more than
        !          1883: \fIovector[0]\fP to \fIovector[2n+1]\fP are set by \fBpcre_exec()\fP. The other
        !          1884: elements (in the first two-thirds) retain whatever values they previously had.
        !          1885: .P
        !          1886: Some convenience functions are provided for extracting the captured substrings
        !          1887: as separate strings. These are described below.
        !          1888: .
        !          1889: .
        !          1890: .\" HTML <a name="errorlist"></a>
        !          1891: .SS "Error return values from \fBpcre_exec()\fP"
        !          1892: .rs
        !          1893: .sp
        !          1894: If \fBpcre_exec()\fP fails, it returns a negative number. The following are
        !          1895: defined in the header file:
        !          1896: .sp
        !          1897:   PCRE_ERROR_NOMATCH        (-1)
        !          1898: .sp
        !          1899: The subject string did not match the pattern.
        !          1900: .sp
        !          1901:   PCRE_ERROR_NULL           (-2)
        !          1902: .sp
        !          1903: Either \fIcode\fP or \fIsubject\fP was passed as NULL, or \fIovector\fP was
        !          1904: NULL and \fIovecsize\fP was not zero.
        !          1905: .sp
        !          1906:   PCRE_ERROR_BADOPTION      (-3)
        !          1907: .sp
        !          1908: An unrecognized bit was set in the \fIoptions\fP argument.
        !          1909: .sp
        !          1910:   PCRE_ERROR_BADMAGIC       (-4)
        !          1911: .sp
        !          1912: PCRE stores a 4-byte "magic number" at the start of the compiled code, to catch
        !          1913: the case when it is passed a junk pointer and to detect when a pattern that was
        !          1914: compiled in an environment of one endianness is run in an environment with the
        !          1915: other endianness. This is the error that PCRE gives when the magic number is
        !          1916: not present.
        !          1917: .sp
        !          1918:   PCRE_ERROR_UNKNOWN_OPCODE (-5)
        !          1919: .sp
        !          1920: While running the pattern match, an unknown item was encountered in the
        !          1921: compiled pattern. This error could be caused by a bug in PCRE or by overwriting
        !          1922: of the compiled pattern.
        !          1923: .sp
        !          1924:   PCRE_ERROR_NOMEMORY       (-6)
        !          1925: .sp
        !          1926: If a pattern contains back references, but the \fIovector\fP that is passed to
        !          1927: \fBpcre_exec()\fP is not big enough to remember the referenced substrings, PCRE
        !          1928: gets a block of memory at the start of matching to use for this purpose. If the
        !          1929: call via \fBpcre_malloc()\fP fails, this error is given. The memory is
        !          1930: automatically freed at the end of matching.
        !          1931: .P
        !          1932: This error is also given if \fBpcre_stack_malloc()\fP fails in
        !          1933: \fBpcre_exec()\fP. This can happen only when PCRE has been compiled with
        !          1934: \fB--disable-stack-for-recursion\fP.
        !          1935: .sp
        !          1936:   PCRE_ERROR_NOSUBSTRING    (-7)
        !          1937: .sp
        !          1938: This error is used by the \fBpcre_copy_substring()\fP,
        !          1939: \fBpcre_get_substring()\fP, and \fBpcre_get_substring_list()\fP functions (see
        !          1940: below). It is never returned by \fBpcre_exec()\fP.
        !          1941: .sp
        !          1942:   PCRE_ERROR_MATCHLIMIT     (-8)
        !          1943: .sp
        !          1944: The backtracking limit, as specified by the \fImatch_limit\fP field in a
        !          1945: \fBpcre_extra\fP structure (or defaulted) was reached. See the description
        !          1946: above.
        !          1947: .sp
        !          1948:   PCRE_ERROR_CALLOUT        (-9)
        !          1949: .sp
        !          1950: This error is never generated by \fBpcre_exec()\fP itself. It is provided for
        !          1951: use by callout functions that want to yield a distinctive error code. See the
        !          1952: .\" HREF
        !          1953: \fBpcrecallout\fP
        !          1954: .\"
        !          1955: documentation for details.
        !          1956: .sp
        !          1957:   PCRE_ERROR_BADUTF8        (-10)
        !          1958: .sp
        !          1959: A string that contains an invalid UTF-8 byte sequence was passed as a subject,
        !          1960: and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector
        !          1961: (\fIovecsize\fP) is at least 2, the byte offset to the start of the the invalid
        !          1962: UTF-8 character is placed in the first element, and a reason code is placed in
        !          1963: the second element. The reason codes are listed in the
        !          1964: .\" HTML <a href="#badutf8reasons">
        !          1965: .\" </a>
        !          1966: following section.
        !          1967: .\"
        !          1968: For backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a
        !          1969: truncated UTF-8 character at the end of the subject (reason codes 1 to 5),
        !          1970: PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8.
        !          1971: .sp
        !          1972:   PCRE_ERROR_BADUTF8_OFFSET (-11)
        !          1973: .sp
        !          1974: The UTF-8 byte sequence that was passed as a subject was checked and found to
        !          1975: be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of
        !          1976: \fIstartoffset\fP did not point to the beginning of a UTF-8 character or the
        !          1977: end of the subject.
        !          1978: .sp
        !          1979:   PCRE_ERROR_PARTIAL        (-12)
        !          1980: .sp
        !          1981: The subject string did not match, but it did match partially. See the
        !          1982: .\" HREF
        !          1983: \fBpcrepartial\fP
        !          1984: .\"
        !          1985: documentation for details of partial matching.
        !          1986: .sp
        !          1987:   PCRE_ERROR_BADPARTIAL     (-13)
        !          1988: .sp
        !          1989: This code is no longer in use. It was formerly returned when the PCRE_PARTIAL
        !          1990: option was used with a compiled pattern containing items that were not
        !          1991: supported for partial matching. From release 8.00 onwards, there are no
        !          1992: restrictions on partial matching.
        !          1993: .sp
        !          1994:   PCRE_ERROR_INTERNAL       (-14)
        !          1995: .sp
        !          1996: An unexpected internal error has occurred. This error could be caused by a bug
        !          1997: in PCRE or by overwriting of the compiled pattern.
        !          1998: .sp
        !          1999:   PCRE_ERROR_BADCOUNT       (-15)
        !          2000: .sp
        !          2001: This error is given if the value of the \fIovecsize\fP argument is negative.
        !          2002: .sp
        !          2003:   PCRE_ERROR_RECURSIONLIMIT (-21)
        !          2004: .sp
        !          2005: The internal recursion limit, as specified by the \fImatch_limit_recursion\fP
        !          2006: field in a \fBpcre_extra\fP structure (or defaulted) was reached. See the
        !          2007: description above.
        !          2008: .sp
        !          2009:   PCRE_ERROR_BADNEWLINE     (-23)
        !          2010: .sp
        !          2011: An invalid combination of PCRE_NEWLINE_\fIxxx\fP options was given.
        !          2012: .sp
        !          2013:   PCRE_ERROR_BADOFFSET      (-24)
        !          2014: .sp
        !          2015: The value of \fIstartoffset\fP was negative or greater than the length of the
        !          2016: subject, that is, the value in \fIlength\fP.
        !          2017: .sp
        !          2018:   PCRE_ERROR_SHORTUTF8      (-25)
        !          2019: .sp
        !          2020: This error is returned instead of PCRE_ERROR_BADUTF8 when the subject string
        !          2021: ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set.
        !          2022: Information about the failure is returned as for PCRE_ERROR_BADUTF8. It is in
        !          2023: fact sufficient to detect this case, but this special error code for
        !          2024: PCRE_PARTIAL_HARD precedes the implementation of returned information; it is
        !          2025: retained for backwards compatibility.
        !          2026: .sp
        !          2027:   PCRE_ERROR_RECURSELOOP    (-26)
        !          2028: .sp
        !          2029: This error is returned when \fBpcre_exec()\fP detects a recursion loop within
        !          2030: the pattern. Specifically, it means that either the whole pattern or a
        !          2031: subpattern has been called recursively for the second time at the same position
        !          2032: in the subject string. Some simple patterns that might do this are detected and
        !          2033: faulted at compile time, but more complicated cases, in particular mutual
        !          2034: recursions between two different subpatterns, cannot be detected until run
        !          2035: time.
        !          2036: .sp
        !          2037:   PCRE_ERROR_JIT_STACKLIMIT (-27)
        !          2038: .sp
        !          2039: This error is returned when a pattern that was successfully studied using the
        !          2040: PCRE_STUDY_JIT_COMPILE option is being matched, but the memory available for
        !          2041: the just-in-time processing stack is not large enough. See the
        !          2042: .\" HREF
        !          2043: \fBpcrejit\fP
        !          2044: .\"
        !          2045: documentation for more details.
        !          2046: .P
        !          2047: Error numbers -16 to -20 and -22 are not used by \fBpcre_exec()\fP.
        !          2048: .
        !          2049: .
        !          2050: .\" HTML <a name="badutf8reasons"></a>
        !          2051: .SS "Reason codes for invalid UTF-8 strings"
        !          2052: .rs
        !          2053: .sp
        !          2054: When \fBpcre_exec()\fP returns either PCRE_ERROR_BADUTF8 or
        !          2055: PCRE_ERROR_SHORTUTF8, and the size of the output vector (\fIovecsize\fP) is at
        !          2056: least 2, the offset of the start of the invalid UTF-8 character is placed in
        !          2057: the first output vector element (\fIovector[0]\fP) and a reason code is placed
        !          2058: in the second element (\fIovector[1]\fP). The reason codes are given names in
        !          2059: the \fBpcre.h\fP header file:
        !          2060: .sp
        !          2061:   PCRE_UTF8_ERR1
        !          2062:   PCRE_UTF8_ERR2
        !          2063:   PCRE_UTF8_ERR3
        !          2064:   PCRE_UTF8_ERR4
        !          2065:   PCRE_UTF8_ERR5
        !          2066: .sp
        !          2067: The string ends with a truncated UTF-8 character; the code specifies how many
        !          2068: bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be
        !          2069: no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279)
        !          2070: allows for up to 6 bytes, and this is checked first; hence the possibility of
        !          2071: 4 or 5 missing bytes.
        !          2072: .sp
        !          2073:   PCRE_UTF8_ERR6
        !          2074:   PCRE_UTF8_ERR7
        !          2075:   PCRE_UTF8_ERR8
        !          2076:   PCRE_UTF8_ERR9
        !          2077:   PCRE_UTF8_ERR10
        !          2078: .sp
        !          2079: The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the
        !          2080: character do not have the binary value 0b10 (that is, either the most
        !          2081: significant bit is 0, or the next bit is 1).
        !          2082: .sp
        !          2083:   PCRE_UTF8_ERR11
        !          2084:   PCRE_UTF8_ERR12
        !          2085: .sp
        !          2086: A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long;
        !          2087: these code points are excluded by RFC 3629.
        !          2088: .sp
        !          2089:   PCRE_UTF8_ERR13
        !          2090: .sp
        !          2091: A 4-byte character has a value greater than 0x10fff; these code points are
        !          2092: excluded by RFC 3629.
        !          2093: .sp
        !          2094:   PCRE_UTF8_ERR14
        !          2095: .sp
        !          2096: A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of
        !          2097: code points are reserved by RFC 3629 for use with UTF-16, and so are excluded
        !          2098: from UTF-8.
        !          2099: .sp
        !          2100:   PCRE_UTF8_ERR15
        !          2101:   PCRE_UTF8_ERR16
        !          2102:   PCRE_UTF8_ERR17
        !          2103:   PCRE_UTF8_ERR18
        !          2104:   PCRE_UTF8_ERR19
        !          2105: .sp
        !          2106: A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a
        !          2107: value that can be represented by fewer bytes, which is invalid. For example,
        !          2108: the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just
        !          2109: one byte.
        !          2110: .sp
        !          2111:   PCRE_UTF8_ERR20
        !          2112: .sp
        !          2113: The two most significant bits of the first byte of a character have the binary
        !          2114: value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a
        !          2115: byte can only validly occur as the second or subsequent byte of a multi-byte
        !          2116: character.
        !          2117: .sp
        !          2118:   PCRE_UTF8_ERR21
        !          2119: .sp
        !          2120: The first byte of a character has the value 0xfe or 0xff. These values can
        !          2121: never occur in a valid UTF-8 string.
        !          2122: .
        !          2123: .
        !          2124: .SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER"
        !          2125: .rs
        !          2126: .sp
        !          2127: .B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
        !          2128: .ti +5n
        !          2129: .B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
        !          2130: .ti +5n
        !          2131: .B int \fIbuffersize\fP);
        !          2132: .PP
        !          2133: .B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
        !          2134: .ti +5n
        !          2135: .B int \fIstringcount\fP, int \fIstringnumber\fP,
        !          2136: .ti +5n
        !          2137: .B const char **\fIstringptr\fP);
        !          2138: .PP
        !          2139: .B int pcre_get_substring_list(const char *\fIsubject\fP,
        !          2140: .ti +5n
        !          2141: .B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
        !          2142: .PP
        !          2143: Captured substrings can be accessed directly by using the offsets returned by
        !          2144: \fBpcre_exec()\fP in \fIovector\fP. For convenience, the functions
        !          2145: \fBpcre_copy_substring()\fP, \fBpcre_get_substring()\fP, and
        !          2146: \fBpcre_get_substring_list()\fP are provided for extracting captured substrings
        !          2147: as new, separate, zero-terminated strings. These functions identify substrings
        !          2148: by number. The next section describes functions for extracting named
        !          2149: substrings.
        !          2150: .P
        !          2151: A substring that contains a binary zero is correctly extracted and has a
        !          2152: further zero added on the end, but the result is not, of course, a C string.
        !          2153: However, you can process such a string by referring to the length that is
        !          2154: returned by \fBpcre_copy_substring()\fP and \fBpcre_get_substring()\fP.
        !          2155: Unfortunately, the interface to \fBpcre_get_substring_list()\fP is not adequate
        !          2156: for handling strings containing binary zeros, because the end of the final
        !          2157: string is not independently indicated.
        !          2158: .P
        !          2159: The first three arguments are the same for all three of these functions:
        !          2160: \fIsubject\fP is the subject string that has just been successfully matched,
        !          2161: \fIovector\fP is a pointer to the vector of integer offsets that was passed to
        !          2162: \fBpcre_exec()\fP, and \fIstringcount\fP is the number of substrings that were
        !          2163: captured by the match, including the substring that matched the entire regular
        !          2164: expression. This is the value returned by \fBpcre_exec()\fP if it is greater
        !          2165: than zero. If \fBpcre_exec()\fP returned zero, indicating that it ran out of
        !          2166: space in \fIovector\fP, the value passed as \fIstringcount\fP should be the
        !          2167: number of elements in the vector divided by three.
        !          2168: .P
        !          2169: The functions \fBpcre_copy_substring()\fP and \fBpcre_get_substring()\fP
        !          2170: extract a single substring, whose number is given as \fIstringnumber\fP. A
        !          2171: value of zero extracts the substring that matched the entire pattern, whereas
        !          2172: higher values extract the captured substrings. For \fBpcre_copy_substring()\fP,
        !          2173: the string is placed in \fIbuffer\fP, whose length is given by
        !          2174: \fIbuffersize\fP, while for \fBpcre_get_substring()\fP a new block of memory is
        !          2175: obtained via \fBpcre_malloc\fP, and its address is returned via
        !          2176: \fIstringptr\fP. The yield of the function is the length of the string, not
        !          2177: including the terminating zero, or one of these error codes:
        !          2178: .sp
        !          2179:   PCRE_ERROR_NOMEMORY       (-6)
        !          2180: .sp
        !          2181: The buffer was too small for \fBpcre_copy_substring()\fP, or the attempt to get
        !          2182: memory failed for \fBpcre_get_substring()\fP.
        !          2183: .sp
        !          2184:   PCRE_ERROR_NOSUBSTRING    (-7)
        !          2185: .sp
        !          2186: There is no substring whose number is \fIstringnumber\fP.
        !          2187: .P
        !          2188: The \fBpcre_get_substring_list()\fP function extracts all available substrings
        !          2189: and builds a list of pointers to them. All this is done in a single block of
        !          2190: memory that is obtained via \fBpcre_malloc\fP. The address of the memory block
        !          2191: is returned via \fIlistptr\fP, which is also the start of the list of string
        !          2192: pointers. The end of the list is marked by a NULL pointer. The yield of the
        !          2193: function is zero if all went well, or the error code
        !          2194: .sp
        !          2195:   PCRE_ERROR_NOMEMORY       (-6)
        !          2196: .sp
        !          2197: if the attempt to get the memory block failed.
        !          2198: .P
        !          2199: When any of these functions encounter a substring that is unset, which can
        !          2200: happen when capturing subpattern number \fIn+1\fP matches some part of the
        !          2201: subject, but subpattern \fIn\fP has not been used at all, they return an empty
        !          2202: string. This can be distinguished from a genuine zero-length substring by
        !          2203: inspecting the appropriate offset in \fIovector\fP, which is negative for unset
        !          2204: substrings.
        !          2205: .P
        !          2206: The two convenience functions \fBpcre_free_substring()\fP and
        !          2207: \fBpcre_free_substring_list()\fP can be used to free the memory returned by
        !          2208: a previous call of \fBpcre_get_substring()\fP or
        !          2209: \fBpcre_get_substring_list()\fP, respectively. They do nothing more than call
        !          2210: the function pointed to by \fBpcre_free\fP, which of course could be called
        !          2211: directly from a C program. However, PCRE is used in some situations where it is
        !          2212: linked via a special interface to another programming language that cannot use
        !          2213: \fBpcre_free\fP directly; it is for these cases that the functions are
        !          2214: provided.
        !          2215: .
        !          2216: .
        !          2217: .SH "EXTRACTING CAPTURED SUBSTRINGS BY NAME"
        !          2218: .rs
        !          2219: .sp
        !          2220: .B int pcre_get_stringnumber(const pcre *\fIcode\fP,
        !          2221: .ti +5n
        !          2222: .B const char *\fIname\fP);
        !          2223: .PP
        !          2224: .B int pcre_copy_named_substring(const pcre *\fIcode\fP,
        !          2225: .ti +5n
        !          2226: .B const char *\fIsubject\fP, int *\fIovector\fP,
        !          2227: .ti +5n
        !          2228: .B int \fIstringcount\fP, const char *\fIstringname\fP,
        !          2229: .ti +5n
        !          2230: .B char *\fIbuffer\fP, int \fIbuffersize\fP);
        !          2231: .PP
        !          2232: .B int pcre_get_named_substring(const pcre *\fIcode\fP,
        !          2233: .ti +5n
        !          2234: .B const char *\fIsubject\fP, int *\fIovector\fP,
        !          2235: .ti +5n
        !          2236: .B int \fIstringcount\fP, const char *\fIstringname\fP,
        !          2237: .ti +5n
        !          2238: .B const char **\fIstringptr\fP);
        !          2239: .PP
        !          2240: To extract a substring by name, you first have to find associated number.
        !          2241: For example, for this pattern
        !          2242: .sp
        !          2243:   (a+)b(?<xxx>\ed+)...
        !          2244: .sp
        !          2245: the number of the subpattern called "xxx" is 2. If the name is known to be
        !          2246: unique (PCRE_DUPNAMES was not set), you can find the number from the name by
        !          2247: calling \fBpcre_get_stringnumber()\fP. The first argument is the compiled
        !          2248: pattern, and the second is the name. The yield of the function is the
        !          2249: subpattern number, or PCRE_ERROR_NOSUBSTRING (-7) if there is no subpattern of
        !          2250: that name.
        !          2251: .P
        !          2252: Given the number, you can extract the substring directly, or use one of the
        !          2253: functions described in the previous section. For convenience, there are also
        !          2254: two functions that do the whole job.
        !          2255: .P
        !          2256: Most of the arguments of \fBpcre_copy_named_substring()\fP and
        !          2257: \fBpcre_get_named_substring()\fP are the same as those for the similarly named
        !          2258: functions that extract by number. As these are described in the previous
        !          2259: section, they are not re-described here. There are just two differences:
        !          2260: .P
        !          2261: First, instead of a substring number, a substring name is given. Second, there
        !          2262: is an extra argument, given at the start, which is a pointer to the compiled
        !          2263: pattern. This is needed in order to gain access to the name-to-number
        !          2264: translation table.
        !          2265: .P
        !          2266: These functions call \fBpcre_get_stringnumber()\fP, and if it succeeds, they
        !          2267: then call \fBpcre_copy_substring()\fP or \fBpcre_get_substring()\fP, as
        !          2268: appropriate. \fBNOTE:\fP If PCRE_DUPNAMES is set and there are duplicate names,
        !          2269: the behaviour may not be what you want (see the next section).
        !          2270: .P
        !          2271: \fBWarning:\fP If the pattern uses the (?| feature to set up multiple
        !          2272: subpatterns with the same number, as described in the
        !          2273: .\" HTML <a href="pcrepattern.html#dupsubpatternnumber">
        !          2274: .\" </a>
        !          2275: section on duplicate subpattern numbers
        !          2276: .\"
        !          2277: in the
        !          2278: .\" HREF
        !          2279: \fBpcrepattern\fP
        !          2280: .\"
        !          2281: page, you cannot use names to distinguish the different subpatterns, because
        !          2282: names are not included in the compiled code. The matching process uses only
        !          2283: numbers. For this reason, the use of different names for subpatterns of the
        !          2284: same number causes an error at compile time.
        !          2285: .
        !          2286: .
        !          2287: .SH "DUPLICATE SUBPATTERN NAMES"
        !          2288: .rs
        !          2289: .sp
        !          2290: .B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
        !          2291: .ti +5n
        !          2292: .B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
        !          2293: .PP
        !          2294: When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns
        !          2295: are not required to be unique. (Duplicate names are always allowed for
        !          2296: subpatterns with the same number, created by using the (?| feature. Indeed, if
        !          2297: such subpatterns are named, they are required to use the same names.)
        !          2298: .P
        !          2299: Normally, patterns with duplicate names are such that in any one match, only
        !          2300: one of the named subpatterns participates. An example is shown in the
        !          2301: .\" HREF
        !          2302: \fBpcrepattern\fP
        !          2303: .\"
        !          2304: documentation.
        !          2305: .P
        !          2306: When duplicates are present, \fBpcre_copy_named_substring()\fP and
        !          2307: \fBpcre_get_named_substring()\fP return the first substring corresponding to
        !          2308: the given name that is set. If none are set, PCRE_ERROR_NOSUBSTRING (-7) is
        !          2309: returned; no data is returned. The \fBpcre_get_stringnumber()\fP function
        !          2310: returns one of the numbers that are associated with the name, but it is not
        !          2311: defined which it is.
        !          2312: .P
        !          2313: If you want to get full details of all captured substrings for a given name,
        !          2314: you must use the \fBpcre_get_stringtable_entries()\fP function. The first
        !          2315: argument is the compiled pattern, and the second is the name. The third and
        !          2316: fourth are pointers to variables which are updated by the function. After it
        !          2317: has run, they point to the first and last entries in the name-to-number table
        !          2318: for the given name. The function itself returns the length of each entry, or
        !          2319: PCRE_ERROR_NOSUBSTRING (-7) if there are none. The format of the table is
        !          2320: described above in the section entitled \fIInformation about a pattern\fP
        !          2321: .\" HTML <a href="#infoaboutpattern">
        !          2322: .\" </a>
        !          2323: above.
        !          2324: .\"
        !          2325: Given all the relevant entries for the name, you can extract each of their
        !          2326: numbers, and hence the captured data, if any.
        !          2327: .
        !          2328: .
        !          2329: .SH "FINDING ALL POSSIBLE MATCHES"
        !          2330: .rs
        !          2331: .sp
        !          2332: The traditional matching function uses a similar algorithm to Perl, which stops
        !          2333: when it finds the first match, starting at a given point in the subject. If you
        !          2334: want to find all possible matches, or the longest possible match, consider
        !          2335: using the alternative matching function (see below) instead. If you cannot use
        !          2336: the alternative function, but still need to find all possible matches, you
        !          2337: can kludge it up by making use of the callout facility, which is described in
        !          2338: the
        !          2339: .\" HREF
        !          2340: \fBpcrecallout\fP
        !          2341: .\"
        !          2342: documentation.
        !          2343: .P
        !          2344: What you have to do is to insert a callout right at the end of the pattern.
        !          2345: When your callout function is called, extract and save the current matched
        !          2346: substring. Then return 1, which forces \fBpcre_exec()\fP to backtrack and try
        !          2347: other alternatives. Ultimately, when it runs out of matches, \fBpcre_exec()\fP
        !          2348: will yield PCRE_ERROR_NOMATCH.
        !          2349: .
        !          2350: .
        !          2351: .\" HTML <a name="dfamatch"></a>
        !          2352: .SH "MATCHING A PATTERN: THE ALTERNATIVE FUNCTION"
        !          2353: .rs
        !          2354: .sp
        !          2355: .B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
        !          2356: .ti +5n
        !          2357: .B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
        !          2358: .ti +5n
        !          2359: .B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
        !          2360: .ti +5n
        !          2361: .B int *\fIworkspace\fP, int \fIwscount\fP);
        !          2362: .P
        !          2363: The function \fBpcre_dfa_exec()\fP is called to match a subject string against
        !          2364: a compiled pattern, using a matching algorithm that scans the subject string
        !          2365: just once, and does not backtrack. This has different characteristics to the
        !          2366: normal algorithm, and is not compatible with Perl. Some of the features of PCRE
        !          2367: patterns are not supported. Nevertheless, there are times when this kind of
        !          2368: matching can be useful. For a discussion of the two matching algorithms, and a
        !          2369: list of features that \fBpcre_dfa_exec()\fP does not support, see the
        !          2370: .\" HREF
        !          2371: \fBpcrematching\fP
        !          2372: .\"
        !          2373: documentation.
        !          2374: .P
        !          2375: The arguments for the \fBpcre_dfa_exec()\fP function are the same as for
        !          2376: \fBpcre_exec()\fP, plus two extras. The \fIovector\fP argument is used in a
        !          2377: different way, and this is described below. The other common arguments are used
        !          2378: in the same way as for \fBpcre_exec()\fP, so their description is not repeated
        !          2379: here.
        !          2380: .P
        !          2381: The two additional arguments provide workspace for the function. The workspace
        !          2382: vector should contain at least 20 elements. It is used for keeping track of
        !          2383: multiple paths through the pattern tree. More workspace will be needed for
        !          2384: patterns and subjects where there are a lot of potential matches.
        !          2385: .P
        !          2386: Here is an example of a simple call to \fBpcre_dfa_exec()\fP:
        !          2387: .sp
        !          2388:   int rc;
        !          2389:   int ovector[10];
        !          2390:   int wspace[20];
        !          2391:   rc = pcre_dfa_exec(
        !          2392:     re,             /* result of pcre_compile() */
        !          2393:     NULL,           /* we didn't study the pattern */
        !          2394:     "some string",  /* the subject string */
        !          2395:     11,             /* the length of the subject string */
        !          2396:     0,              /* start at offset 0 in the subject */
        !          2397:     0,              /* default options */
        !          2398:     ovector,        /* vector of integers for substring information */
        !          2399:     10,             /* number of elements (NOT size in bytes) */
        !          2400:     wspace,         /* working space vector */
        !          2401:     20);            /* number of elements (NOT size in bytes) */
        !          2402: .
        !          2403: .SS "Option bits for \fBpcre_dfa_exec()\fP"
        !          2404: .rs
        !          2405: .sp
        !          2406: The unused bits of the \fIoptions\fP argument for \fBpcre_dfa_exec()\fP must be
        !          2407: zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_\fIxxx\fP,
        !          2408: PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
        !          2409: PCRE_NO_UTF8_CHECK, PCRE_BSR_ANYCRLF, PCRE_BSR_UNICODE, PCRE_NO_START_OPTIMIZE,
        !          2410: PCRE_PARTIAL_HARD, PCRE_PARTIAL_SOFT, PCRE_DFA_SHORTEST, and PCRE_DFA_RESTART.
        !          2411: All but the last four of these are exactly the same as for \fBpcre_exec()\fP,
        !          2412: so their description is not repeated here.
        !          2413: .sp
        !          2414:   PCRE_PARTIAL_HARD
        !          2415:   PCRE_PARTIAL_SOFT
        !          2416: .sp
        !          2417: These have the same general effect as they do for \fBpcre_exec()\fP, but the
        !          2418: details are slightly different. When PCRE_PARTIAL_HARD is set for
        !          2419: \fBpcre_dfa_exec()\fP, it returns PCRE_ERROR_PARTIAL if the end of the subject
        !          2420: is reached and there is still at least one matching possibility that requires
        !          2421: additional characters. This happens even if some complete matches have also
        !          2422: been found. When PCRE_PARTIAL_SOFT is set, the return code PCRE_ERROR_NOMATCH
        !          2423: is converted into PCRE_ERROR_PARTIAL if the end of the subject is reached,
        !          2424: there have been no complete matches, but there is still at least one matching
        !          2425: possibility. The portion of the string that was inspected when the longest
        !          2426: partial match was found is set as the first matching string in both cases.
        !          2427: There is a more detailed discussion of partial and multi-segment matching, with
        !          2428: examples, in the
        !          2429: .\" HREF
        !          2430: \fBpcrepartial\fP
        !          2431: .\"
        !          2432: documentation.
        !          2433: .sp
        !          2434:   PCRE_DFA_SHORTEST
        !          2435: .sp
        !          2436: Setting the PCRE_DFA_SHORTEST option causes the matching algorithm to stop as
        !          2437: soon as it has found one match. Because of the way the alternative algorithm
        !          2438: works, this is necessarily the shortest possible match at the first possible
        !          2439: matching point in the subject string.
        !          2440: .sp
        !          2441:   PCRE_DFA_RESTART
        !          2442: .sp
        !          2443: When \fBpcre_dfa_exec()\fP returns a partial match, it is possible to call it
        !          2444: again, with additional subject characters, and have it continue with the same
        !          2445: match. The PCRE_DFA_RESTART option requests this action; when it is set, the
        !          2446: \fIworkspace\fP and \fIwscount\fP options must reference the same vector as
        !          2447: before because data about the match so far is left in them after a partial
        !          2448: match. There is more discussion of this facility in the
        !          2449: .\" HREF
        !          2450: \fBpcrepartial\fP
        !          2451: .\"
        !          2452: documentation.
        !          2453: .
        !          2454: .
        !          2455: .SS "Successful returns from \fBpcre_dfa_exec()\fP"
        !          2456: .rs
        !          2457: .sp
        !          2458: When \fBpcre_dfa_exec()\fP succeeds, it may have matched more than one
        !          2459: substring in the subject. Note, however, that all the matches from one run of
        !          2460: the function start at the same point in the subject. The shorter matches are
        !          2461: all initial substrings of the longer matches. For example, if the pattern
        !          2462: .sp
        !          2463:   <.*>
        !          2464: .sp
        !          2465: is matched against the string
        !          2466: .sp
        !          2467:   This is <something> <something else> <something further> no more
        !          2468: .sp
        !          2469: the three matched strings are
        !          2470: .sp
        !          2471:   <something>
        !          2472:   <something> <something else>
        !          2473:   <something> <something else> <something further>
        !          2474: .sp
        !          2475: On success, the yield of the function is a number greater than zero, which is
        !          2476: the number of matched substrings. The substrings themselves are returned in
        !          2477: \fIovector\fP. Each string uses two elements; the first is the offset to the
        !          2478: start, and the second is the offset to the end. In fact, all the strings have
        !          2479: the same start offset. (Space could have been saved by giving this only once,
        !          2480: but it was decided to retain some compatibility with the way \fBpcre_exec()\fP
        !          2481: returns data, even though the meaning of the strings is different.)
        !          2482: .P
        !          2483: The strings are returned in reverse order of length; that is, the longest
        !          2484: matching string is given first. If there were too many matches to fit into
        !          2485: \fIovector\fP, the yield of the function is zero, and the vector is filled with
        !          2486: the longest matches. Unlike \fBpcre_exec()\fP, \fBpcre_dfa_exec()\fP can use
        !          2487: the entire \fIovector\fP for returning matched strings.
        !          2488: .
        !          2489: .
        !          2490: .SS "Error returns from \fBpcre_dfa_exec()\fP"
        !          2491: .rs
        !          2492: .sp
        !          2493: The \fBpcre_dfa_exec()\fP function returns a negative number when it fails.
        !          2494: Many of the errors are the same as for \fBpcre_exec()\fP, and these are
        !          2495: described
        !          2496: .\" HTML <a href="#errorlist">
        !          2497: .\" </a>
        !          2498: above.
        !          2499: .\"
        !          2500: There are in addition the following errors that are specific to
        !          2501: \fBpcre_dfa_exec()\fP:
        !          2502: .sp
        !          2503:   PCRE_ERROR_DFA_UITEM      (-16)
        !          2504: .sp
        !          2505: This return is given if \fBpcre_dfa_exec()\fP encounters an item in the pattern
        !          2506: that it does not support, for instance, the use of \eC or a back reference.
        !          2507: .sp
        !          2508:   PCRE_ERROR_DFA_UCOND      (-17)
        !          2509: .sp
        !          2510: This return is given if \fBpcre_dfa_exec()\fP encounters a condition item that
        !          2511: uses a back reference for the condition, or a test for recursion in a specific
        !          2512: group. These are not supported.
        !          2513: .sp
        !          2514:   PCRE_ERROR_DFA_UMLIMIT    (-18)
        !          2515: .sp
        !          2516: This return is given if \fBpcre_dfa_exec()\fP is called with an \fIextra\fP
        !          2517: block that contains a setting of the \fImatch_limit\fP or
        !          2518: \fImatch_limit_recursion\fP fields. This is not supported (these fields are
        !          2519: meaningless for DFA matching).
        !          2520: .sp
        !          2521:   PCRE_ERROR_DFA_WSSIZE     (-19)
        !          2522: .sp
        !          2523: This return is given if \fBpcre_dfa_exec()\fP runs out of space in the
        !          2524: \fIworkspace\fP vector.
        !          2525: .sp
        !          2526:   PCRE_ERROR_DFA_RECURSE    (-20)
        !          2527: .sp
        !          2528: When a recursive subpattern is processed, the matching function calls itself
        !          2529: recursively, using private vectors for \fIovector\fP and \fIworkspace\fP. This
        !          2530: error is given if the output vector is not large enough. This should be
        !          2531: extremely rare, as a vector of size 1000 is used.
        !          2532: .
        !          2533: .
        !          2534: .SH "SEE ALSO"
        !          2535: .rs
        !          2536: .sp
        !          2537: \fBpcrebuild\fP(3), \fBpcrecallout\fP(3), \fBpcrecpp(3)\fP(3),
        !          2538: \fBpcrematching\fP(3), \fBpcrepartial\fP(3), \fBpcreposix\fP(3),
        !          2539: \fBpcreprecompile\fP(3), \fBpcresample\fP(3), \fBpcrestack\fP(3).
        !          2540: .
        !          2541: .
        !          2542: .SH AUTHOR
        !          2543: .rs
        !          2544: .sp
        !          2545: .nf
        !          2546: Philip Hazel
        !          2547: University Computing Service
        !          2548: Cambridge CB2 3QH, England.
        !          2549: .fi
        !          2550: .
        !          2551: .
        !          2552: .SH REVISION
        !          2553: .rs
        !          2554: .sp
        !          2555: .nf
        !          2556: Last updated: 02 December 2011
        !          2557: Copyright (c) 1997-2011 University of Cambridge.
        !          2558: .fi

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>