Annotation of embedaddon/pcre/doc/html/pcresyntax.html, revision 1.1

1.1     ! misho       1: <html>
        !             2: <head>
        !             3: <title>pcresyntax specification</title>
        !             4: </head>
        !             5: <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
        !             6: <h1>pcresyntax man page</h1>
        !             7: <p>
        !             8: Return to the <a href="index.html">PCRE index page</a>.
        !             9: </p>
        !            10: <p>
        !            11: This page is part of the PCRE HTML documentation. It was generated automatically
        !            12: from the original man page. If there is any nonsense in it, please consult the
        !            13: man page, in case the conversion went wrong.
        !            14: <br>
        !            15: <ul>
        !            16: <li><a name="TOC1" href="#SEC1">PCRE REGULAR EXPRESSION SYNTAX SUMMARY</a>
        !            17: <li><a name="TOC2" href="#SEC2">QUOTING</a>
        !            18: <li><a name="TOC3" href="#SEC3">CHARACTERS</a>
        !            19: <li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a>
        !            20: <li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTIES FOR \p and \P</a>
        !            21: <li><a name="TOC6" href="#SEC6">PCRE SPECIAL CATEGORY PROPERTIES FOR \p and \P</a>
        !            22: <li><a name="TOC7" href="#SEC7">SCRIPT NAMES FOR \p AND \P</a>
        !            23: <li><a name="TOC8" href="#SEC8">CHARACTER CLASSES</a>
        !            24: <li><a name="TOC9" href="#SEC9">QUANTIFIERS</a>
        !            25: <li><a name="TOC10" href="#SEC10">ANCHORS AND SIMPLE ASSERTIONS</a>
        !            26: <li><a name="TOC11" href="#SEC11">MATCH POINT RESET</a>
        !            27: <li><a name="TOC12" href="#SEC12">ALTERNATION</a>
        !            28: <li><a name="TOC13" href="#SEC13">CAPTURING</a>
        !            29: <li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
        !            30: <li><a name="TOC15" href="#SEC15">COMMENT</a>
        !            31: <li><a name="TOC16" href="#SEC16">OPTION SETTING</a>
        !            32: <li><a name="TOC17" href="#SEC17">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
        !            33: <li><a name="TOC18" href="#SEC18">BACKREFERENCES</a>
        !            34: <li><a name="TOC19" href="#SEC19">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
        !            35: <li><a name="TOC20" href="#SEC20">CONDITIONAL PATTERNS</a>
        !            36: <li><a name="TOC21" href="#SEC21">BACKTRACKING CONTROL</a>
        !            37: <li><a name="TOC22" href="#SEC22">NEWLINE CONVENTIONS</a>
        !            38: <li><a name="TOC23" href="#SEC23">WHAT \R MATCHES</a>
        !            39: <li><a name="TOC24" href="#SEC24">CALLOUTS</a>
        !            40: <li><a name="TOC25" href="#SEC25">SEE ALSO</a>
        !            41: <li><a name="TOC26" href="#SEC26">AUTHOR</a>
        !            42: <li><a name="TOC27" href="#SEC27">REVISION</a>
        !            43: </ul>
        !            44: <br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
        !            45: <P>
        !            46: The full syntax and semantics of the regular expressions that are supported by
        !            47: PCRE are described in the
        !            48: <a href="pcrepattern.html"><b>pcrepattern</b></a>
        !            49: documentation. This document contains just a quick-reference summary of the
        !            50: syntax.
        !            51: </P>
        !            52: <br><a name="SEC2" href="#TOC1">QUOTING</a><br>
        !            53: <P>
        !            54: <pre>
        !            55:   \x         where x is non-alphanumeric is a literal x
        !            56:   \Q...\E    treat enclosed characters as literal
        !            57: </PRE>
        !            58: </P>
        !            59: <br><a name="SEC3" href="#TOC1">CHARACTERS</a><br>
        !            60: <P>
        !            61: <pre>
        !            62:   \a         alarm, that is, the BEL character (hex 07)
        !            63:   \cx        "control-x", where x is any ASCII character
        !            64:   \e         escape (hex 1B)
        !            65:   \f         formfeed (hex 0C)
        !            66:   \n         newline (hex 0A)
        !            67:   \r         carriage return (hex 0D)
        !            68:   \t         tab (hex 09)
        !            69:   \ddd       character with octal code ddd, or backreference
        !            70:   \xhh       character with hex code hh
        !            71:   \x{hhh..}  character with hex code hhh..
        !            72: </PRE>
        !            73: </P>
        !            74: <br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
        !            75: <P>
        !            76: <pre>
        !            77:   .          any character except newline;
        !            78:                in dotall mode, any character whatsoever
        !            79:   \C         one byte, even in UTF-8 mode (best avoided)
        !            80:   \d         a decimal digit
        !            81:   \D         a character that is not a decimal digit
        !            82:   \h         a horizontal whitespace character
        !            83:   \H         a character that is not a horizontal whitespace character
        !            84:   \N         a character that is not a newline
        !            85:   \p{<i>xx</i>}     a character with the <i>xx</i> property
        !            86:   \P{<i>xx</i>}     a character without the <i>xx</i> property
        !            87:   \R         a newline sequence
        !            88:   \s         a whitespace character
        !            89:   \S         a character that is not a whitespace character
        !            90:   \v         a vertical whitespace character
        !            91:   \V         a character that is not a vertical whitespace character
        !            92:   \w         a "word" character
        !            93:   \W         a "non-word" character
        !            94:   \X         an extended Unicode sequence
        !            95: </pre>
        !            96: In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
        !            97: characters, even in UTF-8 mode. However, this can be changed by setting the
        !            98: PCRE_UCP option.
        !            99: </P>
        !           100: <br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
        !           101: <P>
        !           102: <pre>
        !           103:   C          Other
        !           104:   Cc         Control
        !           105:   Cf         Format
        !           106:   Cn         Unassigned
        !           107:   Co         Private use
        !           108:   Cs         Surrogate
        !           109: 
        !           110:   L          Letter
        !           111:   Ll         Lower case letter
        !           112:   Lm         Modifier letter
        !           113:   Lo         Other letter
        !           114:   Lt         Title case letter
        !           115:   Lu         Upper case letter
        !           116:   L&         Ll, Lu, or Lt
        !           117: 
        !           118:   M          Mark
        !           119:   Mc         Spacing mark
        !           120:   Me         Enclosing mark
        !           121:   Mn         Non-spacing mark
        !           122: 
        !           123:   N          Number
        !           124:   Nd         Decimal number
        !           125:   Nl         Letter number
        !           126:   No         Other number
        !           127: 
        !           128:   P          Punctuation
        !           129:   Pc         Connector punctuation
        !           130:   Pd         Dash punctuation
        !           131:   Pe         Close punctuation
        !           132:   Pf         Final punctuation
        !           133:   Pi         Initial punctuation
        !           134:   Po         Other punctuation
        !           135:   Ps         Open punctuation
        !           136: 
        !           137:   S          Symbol
        !           138:   Sc         Currency symbol
        !           139:   Sk         Modifier symbol
        !           140:   Sm         Mathematical symbol
        !           141:   So         Other symbol
        !           142: 
        !           143:   Z          Separator
        !           144:   Zl         Line separator
        !           145:   Zp         Paragraph separator
        !           146:   Zs         Space separator
        !           147: </PRE>
        !           148: </P>
        !           149: <br><a name="SEC6" href="#TOC1">PCRE SPECIAL CATEGORY PROPERTIES FOR \p and \P</a><br>
        !           150: <P>
        !           151: <pre>
        !           152:   Xan        Alphanumeric: union of properties L and N
        !           153:   Xps        POSIX space: property Z or tab, NL, VT, FF, CR
        !           154:   Xsp        Perl space: property Z or tab, NL, FF, CR
        !           155:   Xwd        Perl word: property Xan or underscore
        !           156: </PRE>
        !           157: </P>
        !           158: <br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
        !           159: <P>
        !           160: Arabic,
        !           161: Armenian,
        !           162: Avestan,
        !           163: Balinese,
        !           164: Bamum,
        !           165: Bengali,
        !           166: Bopomofo,
        !           167: Braille,
        !           168: Buginese,
        !           169: Buhid,
        !           170: Canadian_Aboriginal,
        !           171: Carian,
        !           172: Cham,
        !           173: Cherokee,
        !           174: Common,
        !           175: Coptic,
        !           176: Cuneiform,
        !           177: Cypriot,
        !           178: Cyrillic,
        !           179: Deseret,
        !           180: Devanagari,
        !           181: Egyptian_Hieroglyphs,
        !           182: Ethiopic,
        !           183: Georgian,
        !           184: Glagolitic,
        !           185: Gothic,
        !           186: Greek,
        !           187: Gujarati,
        !           188: Gurmukhi,
        !           189: Han,
        !           190: Hangul,
        !           191: Hanunoo,
        !           192: Hebrew,
        !           193: Hiragana,
        !           194: Imperial_Aramaic,
        !           195: Inherited,
        !           196: Inscriptional_Pahlavi,
        !           197: Inscriptional_Parthian,
        !           198: Javanese,
        !           199: Kaithi,
        !           200: Kannada,
        !           201: Katakana,
        !           202: Kayah_Li,
        !           203: Kharoshthi,
        !           204: Khmer,
        !           205: Lao,
        !           206: Latin,
        !           207: Lepcha,
        !           208: Limbu,
        !           209: Linear_B,
        !           210: Lisu,
        !           211: Lycian,
        !           212: Lydian,
        !           213: Malayalam,
        !           214: Meetei_Mayek,
        !           215: Mongolian,
        !           216: Myanmar,
        !           217: New_Tai_Lue,
        !           218: Nko,
        !           219: Ogham,
        !           220: Old_Italic,
        !           221: Old_Persian,
        !           222: Old_South_Arabian,
        !           223: Old_Turkic,
        !           224: Ol_Chiki,
        !           225: Oriya,
        !           226: Osmanya,
        !           227: Phags_Pa,
        !           228: Phoenician,
        !           229: Rejang,
        !           230: Runic,
        !           231: Samaritan,
        !           232: Saurashtra,
        !           233: Shavian,
        !           234: Sinhala,
        !           235: Sundanese,
        !           236: Syloti_Nagri,
        !           237: Syriac,
        !           238: Tagalog,
        !           239: Tagbanwa,
        !           240: Tai_Le,
        !           241: Tai_Tham,
        !           242: Tai_Viet,
        !           243: Tamil,
        !           244: Telugu,
        !           245: Thaana,
        !           246: Thai,
        !           247: Tibetan,
        !           248: Tifinagh,
        !           249: Ugaritic,
        !           250: Vai,
        !           251: Yi.
        !           252: </P>
        !           253: <br><a name="SEC8" href="#TOC1">CHARACTER CLASSES</a><br>
        !           254: <P>
        !           255: <pre>
        !           256:   [...]       positive character class
        !           257:   [^...]      negative character class
        !           258:   [x-y]       range (can be used for hex characters)
        !           259:   [[:xxx:]]   positive POSIX named set
        !           260:   [[:^xxx:]]  negative POSIX named set
        !           261: 
        !           262:   alnum       alphanumeric
        !           263:   alpha       alphabetic
        !           264:   ascii       0-127
        !           265:   blank       space or tab
        !           266:   cntrl       control character
        !           267:   digit       decimal digit
        !           268:   graph       printing, excluding space
        !           269:   lower       lower case letter
        !           270:   print       printing, including space
        !           271:   punct       printing, excluding alphanumeric
        !           272:   space       whitespace
        !           273:   upper       upper case letter
        !           274:   word        same as \w
        !           275:   xdigit      hexadecimal digit
        !           276: </pre>
        !           277: In PCRE, POSIX character set names recognize only ASCII characters by default,
        !           278: but some of them use Unicode properties if PCRE_UCP is set. You can use
        !           279: \Q...\E inside a character class.
        !           280: </P>
        !           281: <br><a name="SEC9" href="#TOC1">QUANTIFIERS</a><br>
        !           282: <P>
        !           283: <pre>
        !           284:   ?           0 or 1, greedy
        !           285:   ?+          0 or 1, possessive
        !           286:   ??          0 or 1, lazy
        !           287:   *           0 or more, greedy
        !           288:   *+          0 or more, possessive
        !           289:   *?          0 or more, lazy
        !           290:   +           1 or more, greedy
        !           291:   ++          1 or more, possessive
        !           292:   +?          1 or more, lazy
        !           293:   {n}         exactly n
        !           294:   {n,m}       at least n, no more than m, greedy
        !           295:   {n,m}+      at least n, no more than m, possessive
        !           296:   {n,m}?      at least n, no more than m, lazy
        !           297:   {n,}        n or more, greedy
        !           298:   {n,}+       n or more, possessive
        !           299:   {n,}?       n or more, lazy
        !           300: </PRE>
        !           301: </P>
        !           302: <br><a name="SEC10" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
        !           303: <P>
        !           304: <pre>
        !           305:   \b          word boundary
        !           306:   \B          not a word boundary
        !           307:   ^           start of subject
        !           308:                also after internal newline in multiline mode
        !           309:   \A          start of subject
        !           310:   $           end of subject
        !           311:                also before newline at end of subject
        !           312:                also before internal newline in multiline mode
        !           313:   \Z          end of subject
        !           314:                also before newline at end of subject
        !           315:   \z          end of subject
        !           316:   \G          first matching position in subject
        !           317: </PRE>
        !           318: </P>
        !           319: <br><a name="SEC11" href="#TOC1">MATCH POINT RESET</a><br>
        !           320: <P>
        !           321: <pre>
        !           322:   \K          reset start of match
        !           323: </PRE>
        !           324: </P>
        !           325: <br><a name="SEC12" href="#TOC1">ALTERNATION</a><br>
        !           326: <P>
        !           327: <pre>
        !           328:   expr|expr|expr...
        !           329: </PRE>
        !           330: </P>
        !           331: <br><a name="SEC13" href="#TOC1">CAPTURING</a><br>
        !           332: <P>
        !           333: <pre>
        !           334:   (...)           capturing group
        !           335:   (?&#60;name&#62;...)    named capturing group (Perl)
        !           336:   (?'name'...)    named capturing group (Perl)
        !           337:   (?P&#60;name&#62;...)   named capturing group (Python)
        !           338:   (?:...)         non-capturing group
        !           339:   (?|...)         non-capturing group; reset group numbers for
        !           340:                    capturing groups in each alternative
        !           341: </PRE>
        !           342: </P>
        !           343: <br><a name="SEC14" href="#TOC1">ATOMIC GROUPS</a><br>
        !           344: <P>
        !           345: <pre>
        !           346:   (?&#62;...)         atomic, non-capturing group
        !           347: </PRE>
        !           348: </P>
        !           349: <br><a name="SEC15" href="#TOC1">COMMENT</a><br>
        !           350: <P>
        !           351: <pre>
        !           352:   (?#....)        comment (not nestable)
        !           353: </PRE>
        !           354: </P>
        !           355: <br><a name="SEC16" href="#TOC1">OPTION SETTING</a><br>
        !           356: <P>
        !           357: <pre>
        !           358:   (?i)            caseless
        !           359:   (?J)            allow duplicate names
        !           360:   (?m)            multiline
        !           361:   (?s)            single line (dotall)
        !           362:   (?U)            default ungreedy (lazy)
        !           363:   (?x)            extended (ignore white space)
        !           364:   (?-...)         unset option(s)
        !           365: </pre>
        !           366: The following are recognized only at the start of a pattern or after one of the
        !           367: newline-setting options with similar syntax:
        !           368: <pre>
        !           369:   (*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE)
        !           370:   (*UTF8)         set UTF-8 mode (PCRE_UTF8)
        !           371:   (*UCP)          set PCRE_UCP (use Unicode properties for \d etc)
        !           372: </PRE>
        !           373: </P>
        !           374: <br><a name="SEC17" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
        !           375: <P>
        !           376: <pre>
        !           377:   (?=...)         positive look ahead
        !           378:   (?!...)         negative look ahead
        !           379:   (?&#60;=...)        positive look behind
        !           380:   (?&#60;!...)        negative look behind
        !           381: </pre>
        !           382: Each top-level branch of a look behind must be of a fixed length.
        !           383: </P>
        !           384: <br><a name="SEC18" href="#TOC1">BACKREFERENCES</a><br>
        !           385: <P>
        !           386: <pre>
        !           387:   \n              reference by number (can be ambiguous)
        !           388:   \gn             reference by number
        !           389:   \g{n}           reference by number
        !           390:   \g{-n}          relative reference by number
        !           391:   \k&#60;name&#62;        reference by name (Perl)
        !           392:   \k'name'        reference by name (Perl)
        !           393:   \g{name}        reference by name (Perl)
        !           394:   \k{name}        reference by name (.NET)
        !           395:   (?P=name)       reference by name (Python)
        !           396: </PRE>
        !           397: </P>
        !           398: <br><a name="SEC19" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
        !           399: <P>
        !           400: <pre>
        !           401:   (?R)            recurse whole pattern
        !           402:   (?n)            call subpattern by absolute number
        !           403:   (?+n)           call subpattern by relative number
        !           404:   (?-n)           call subpattern by relative number
        !           405:   (?&name)        call subpattern by name (Perl)
        !           406:   (?P&#62;name)       call subpattern by name (Python)
        !           407:   \g&#60;name&#62;        call subpattern by name (Oniguruma)
        !           408:   \g'name'        call subpattern by name (Oniguruma)
        !           409:   \g&#60;n&#62;           call subpattern by absolute number (Oniguruma)
        !           410:   \g'n'           call subpattern by absolute number (Oniguruma)
        !           411:   \g&#60;+n&#62;          call subpattern by relative number (PCRE extension)
        !           412:   \g'+n'          call subpattern by relative number (PCRE extension)
        !           413:   \g&#60;-n&#62;          call subpattern by relative number (PCRE extension)
        !           414:   \g'-n'          call subpattern by relative number (PCRE extension)
        !           415: </PRE>
        !           416: </P>
        !           417: <br><a name="SEC20" href="#TOC1">CONDITIONAL PATTERNS</a><br>
        !           418: <P>
        !           419: <pre>
        !           420:   (?(condition)yes-pattern)
        !           421:   (?(condition)yes-pattern|no-pattern)
        !           422: 
        !           423:   (?(n)...        absolute reference condition
        !           424:   (?(+n)...       relative reference condition
        !           425:   (?(-n)...       relative reference condition
        !           426:   (?(&#60;name&#62;)...   named reference condition (Perl)
        !           427:   (?('name')...   named reference condition (Perl)
        !           428:   (?(name)...     named reference condition (PCRE)
        !           429:   (?(R)...        overall recursion condition
        !           430:   (?(Rn)...       specific group recursion condition
        !           431:   (?(R&name)...   specific recursion condition
        !           432:   (?(DEFINE)...   define subpattern for reference
        !           433:   (?(assert)...   assertion condition
        !           434: </PRE>
        !           435: </P>
        !           436: <br><a name="SEC21" href="#TOC1">BACKTRACKING CONTROL</a><br>
        !           437: <P>
        !           438: The following act immediately they are reached:
        !           439: <pre>
        !           440:   (*ACCEPT)       force successful match
        !           441:   (*FAIL)         force backtrack; synonym (*F)
        !           442: </pre>
        !           443: The following act only when a subsequent match failure causes a backtrack to
        !           444: reach them. They all force a match failure, but they differ in what happens
        !           445: afterwards. Those that advance the start-of-match point do so only if the
        !           446: pattern is not anchored.
        !           447: <pre>
        !           448:   (*COMMIT)       overall failure, no advance of starting point
        !           449:   (*PRUNE)        advance to next starting character
        !           450:   (*SKIP)         advance start to current matching position
        !           451:   (*THEN)         local failure, backtrack to next alternation
        !           452: </PRE>
        !           453: </P>
        !           454: <br><a name="SEC22" href="#TOC1">NEWLINE CONVENTIONS</a><br>
        !           455: <P>
        !           456: These are recognized only at the very start of the pattern or after a
        !           457: (*BSR_...) or (*UTF8) or (*UCP) option.
        !           458: <pre>
        !           459:   (*CR)           carriage return only
        !           460:   (*LF)           linefeed only
        !           461:   (*CRLF)         carriage return followed by linefeed
        !           462:   (*ANYCRLF)      all three of the above
        !           463:   (*ANY)          any Unicode newline sequence
        !           464: </PRE>
        !           465: </P>
        !           466: <br><a name="SEC23" href="#TOC1">WHAT \R MATCHES</a><br>
        !           467: <P>
        !           468: These are recognized only at the very start of the pattern or after a
        !           469: (*...) option that sets the newline convention or UTF-8 or UCP mode.
        !           470: <pre>
        !           471:   (*BSR_ANYCRLF)  CR, LF, or CRLF
        !           472:   (*BSR_UNICODE)  any Unicode newline sequence
        !           473: </PRE>
        !           474: </P>
        !           475: <br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
        !           476: <P>
        !           477: <pre>
        !           478:   (?C)      callout
        !           479:   (?Cn)     callout with data n
        !           480: </PRE>
        !           481: </P>
        !           482: <br><a name="SEC25" href="#TOC1">SEE ALSO</a><br>
        !           483: <P>
        !           484: <b>pcrepattern</b>(3), <b>pcreapi</b>(3), <b>pcrecallout</b>(3),
        !           485: <b>pcrematching</b>(3), <b>pcre</b>(3).
        !           486: </P>
        !           487: <br><a name="SEC26" href="#TOC1">AUTHOR</a><br>
        !           488: <P>
        !           489: Philip Hazel
        !           490: <br>
        !           491: University Computing Service
        !           492: <br>
        !           493: Cambridge CB2 3QH, England.
        !           494: <br>
        !           495: </P>
        !           496: <br><a name="SEC27" href="#TOC1">REVISION</a><br>
        !           497: <P>
        !           498: Last updated: 21 November 2010
        !           499: <br>
        !           500: Copyright &copy; 1997-2010 University of Cambridge.
        !           501: <br>
        !           502: <p>
        !           503: Return to the <a href="index.html">PCRE index page</a>.
        !           504: </p>

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>