Annotation of embedaddon/pcre/pcrecpp_unittest.cc, revision 1.1.1.1

1.1       misho       1: // -*- coding: utf-8 -*-
                      2: //
                      3: // Copyright (c) 2005 - 2010, Google Inc.
                      4: // All rights reserved.
                      5: //
                      6: // Redistribution and use in source and binary forms, with or without
                      7: // modification, are permitted provided that the following conditions are
                      8: // met:
                      9: //
                     10: //     * Redistributions of source code must retain the above copyright
                     11: // notice, this list of conditions and the following disclaimer.
                     12: //     * Redistributions in binary form must reproduce the above
                     13: // copyright notice, this list of conditions and the following disclaimer
                     14: // in the documentation and/or other materials provided with the
                     15: // distribution.
                     16: //     * Neither the name of Google Inc. nor the names of its
                     17: // contributors may be used to endorse or promote products derived from
                     18: // this software without specific prior written permission.
                     19: //
                     20: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
                     21: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
                     22: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
                     23: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
                     24: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
                     25: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
                     26: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     27: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     28: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     29: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
                     30: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     31: //
                     32: // Author: Sanjay Ghemawat
                     33: //
                     34: // TODO: Test extractions for PartialMatch/Consume
                     35: 
                     36: #ifdef HAVE_CONFIG_H
                     37: #include "config.h"
                     38: #endif
                     39: 
                     40: #include <stdio.h>
                     41: #include <string.h>      /* for memset and strcmp */
                     42: #include <cassert>
                     43: #include <vector>
                     44: #include "pcrecpp.h"
                     45: 
                     46: using pcrecpp::StringPiece;
                     47: using pcrecpp::RE;
                     48: using pcrecpp::RE_Options;
                     49: using pcrecpp::Hex;
                     50: using pcrecpp::Octal;
                     51: using pcrecpp::CRadix;
                     52: 
                     53: static bool VERBOSE_TEST  = false;
                     54: 
                     55: // CHECK dies with a fatal error if condition is not true.  It is *not*
                     56: // controlled by NDEBUG, so the check will be executed regardless of
                     57: // compilation mode.  Therefore, it is safe to do things like:
                     58: //    CHECK_EQ(fp->Write(x), 4)
                     59: #define CHECK(condition) do {                           \
                     60:   if (!(condition)) {                                   \
                     61:     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
                     62:             __FILE__, __LINE__, #condition);            \
                     63:     exit(1);                                            \
                     64:   }                                                     \
                     65: } while (0)
                     66: 
                     67: #define CHECK_EQ(a, b)   CHECK(a == b)
                     68: 
                     69: static void Timing1(int num_iters) {
                     70:   // Same pattern lots of times
                     71:   RE pattern("ruby:\\d+");
                     72:   StringPiece p("ruby:1234");
                     73:   for (int j = num_iters; j > 0; j--) {
                     74:     CHECK(pattern.FullMatch(p));
                     75:   }
                     76: }
                     77: 
                     78: static void Timing2(int num_iters) {
                     79:   // Same pattern lots of times
                     80:   RE pattern("ruby:(\\d+)");
                     81:   int i;
                     82:   for (int j = num_iters; j > 0; j--) {
                     83:     CHECK(pattern.FullMatch("ruby:1234", &i));
                     84:     CHECK_EQ(i, 1234);
                     85:   }
                     86: }
                     87: 
                     88: static void Timing3(int num_iters) {
                     89:   string text_string;
                     90:   for (int j = num_iters; j > 0; j--) {
                     91:     text_string += "this is another line\n";
                     92:   }
                     93: 
                     94:   RE line_matcher(".*\n");
                     95:   string line;
                     96:   StringPiece text(text_string);
                     97:   int counter = 0;
                     98:   while (line_matcher.Consume(&text)) {
                     99:     counter++;
                    100:   }
                    101:   printf("Matched %d lines\n", counter);
                    102: }
                    103: 
                    104: #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
                    105: 
                    106: static void LeakTest() {
                    107:   // Check for memory leaks
                    108:   unsigned long long initial_size = 0;
                    109:   for (int i = 0; i < 100000; i++) {
                    110:     if (i == 50000) {
                    111:       initial_size = VirtualProcessSize();
                    112:       printf("Size after 50000: %llu\n", initial_size);
                    113:     }
                    114:     char buf[100];  // definitely big enough
                    115:     sprintf(buf, "pat%09d", i);
                    116:     RE newre(buf);
                    117:   }
                    118:   uint64 final_size = VirtualProcessSize();
                    119:   printf("Size after 100000: %llu\n", final_size);
                    120:   const double growth = double(final_size - initial_size) / final_size;
                    121:   printf("Growth: %0.2f%%", growth * 100);
                    122:   CHECK(growth < 0.02);       // Allow < 2% growth
                    123: }
                    124: 
                    125: #endif
                    126: 
                    127: static void RadixTests() {
                    128:   printf("Testing hex\n");
                    129: 
                    130: #define CHECK_HEX(type, value) \
                    131:   do { \
                    132:     type v; \
                    133:     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
                    134:     CHECK_EQ(v, 0x ## value); \
                    135:     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
                    136:     CHECK_EQ(v, 0x ## value); \
                    137:   } while(0)
                    138: 
                    139:   CHECK_HEX(short,              2bad);
                    140:   CHECK_HEX(unsigned short,     2badU);
                    141:   CHECK_HEX(int,                dead);
                    142:   CHECK_HEX(unsigned int,       deadU);
                    143:   CHECK_HEX(long,               7eadbeefL);
                    144:   CHECK_HEX(unsigned long,      deadbeefUL);
                    145: #ifdef HAVE_LONG_LONG
                    146:   CHECK_HEX(long long,          12345678deadbeefLL);
                    147: #endif
                    148: #ifdef HAVE_UNSIGNED_LONG_LONG
                    149:   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
                    150: #endif
                    151: 
                    152: #undef CHECK_HEX
                    153: 
                    154:   printf("Testing octal\n");
                    155: 
                    156: #define CHECK_OCTAL(type, value) \
                    157:   do { \
                    158:     type v; \
                    159:     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
                    160:     CHECK_EQ(v, 0 ## value); \
                    161:     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
                    162:     CHECK_EQ(v, 0 ## value); \
                    163:   } while(0)
                    164: 
                    165:   CHECK_OCTAL(short,              77777);
                    166:   CHECK_OCTAL(unsigned short,     177777U);
                    167:   CHECK_OCTAL(int,                17777777777);
                    168:   CHECK_OCTAL(unsigned int,       37777777777U);
                    169:   CHECK_OCTAL(long,               17777777777L);
                    170:   CHECK_OCTAL(unsigned long,      37777777777UL);
                    171: #ifdef HAVE_LONG_LONG
                    172:   CHECK_OCTAL(long long,          777777777777777777777LL);
                    173: #endif
                    174: #ifdef HAVE_UNSIGNED_LONG_LONG
                    175:   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
                    176: #endif
                    177: 
                    178: #undef CHECK_OCTAL
                    179: 
                    180:   printf("Testing decimal\n");
                    181: 
                    182: #define CHECK_DECIMAL(type, value) \
                    183:   do { \
                    184:     type v; \
                    185:     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
                    186:     CHECK_EQ(v, value); \
                    187:     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
                    188:     CHECK_EQ(v, value); \
                    189:   } while(0)
                    190: 
                    191:   CHECK_DECIMAL(short,              -1);
                    192:   CHECK_DECIMAL(unsigned short,     9999);
                    193:   CHECK_DECIMAL(int,                -1000);
                    194:   CHECK_DECIMAL(unsigned int,       12345U);
                    195:   CHECK_DECIMAL(long,               -10000000L);
                    196:   CHECK_DECIMAL(unsigned long,      3083324652U);
                    197: #ifdef HAVE_LONG_LONG
                    198:   CHECK_DECIMAL(long long,          -100000000000000LL);
                    199: #endif
                    200: #ifdef HAVE_UNSIGNED_LONG_LONG
                    201:   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
                    202: #endif
                    203: 
                    204: #undef CHECK_DECIMAL
                    205: 
                    206: }
                    207: 
                    208: static void TestReplace() {
                    209:   printf("Testing Replace\n");
                    210: 
                    211:   struct ReplaceTest {
                    212:     const char *regexp;
                    213:     const char *rewrite;
                    214:     const char *original;
                    215:     const char *single;
                    216:     const char *global;
                    217:     int global_count;         // the expected return value from ReplaceAll
                    218:   };
                    219:   static const ReplaceTest tests[] = {
                    220:     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
                    221:       "\\2\\1ay",
                    222:       "the quick brown fox jumps over the lazy dogs.",
                    223:       "ethay quick brown fox jumps over the lazy dogs.",
                    224:       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
                    225:       9 },
                    226:     { "\\w+",
                    227:       "\\0-NOSPAM",
                    228:       "paul.haahr@google.com",
                    229:       "paul-NOSPAM.haahr@google.com",
                    230:       "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
                    231:       4 },
                    232:     { "^",
                    233:       "(START)",
                    234:       "foo",
                    235:       "(START)foo",
                    236:       "(START)foo",
                    237:       1 },
                    238:     { "^",
                    239:       "(START)",
                    240:       "",
                    241:       "(START)",
                    242:       "(START)",
                    243:       1 },
                    244:     { "$",
                    245:       "(END)",
                    246:       "",
                    247:       "(END)",
                    248:       "(END)",
                    249:       1 },
                    250:     { "b",
                    251:       "bb",
                    252:       "ababababab",
                    253:       "abbabababab",
                    254:       "abbabbabbabbabb",
                    255:        5 },
                    256:     { "b",
                    257:       "bb",
                    258:       "bbbbbb",
                    259:       "bbbbbbb",
                    260:       "bbbbbbbbbbbb",
                    261:       6 },
                    262:     { "b+",
                    263:       "bb",
                    264:       "bbbbbb",
                    265:       "bb",
                    266:       "bb",
                    267:       1 },
                    268:     { "b*",
                    269:       "bb",
                    270:       "bbbbbb",
                    271:       "bb",
                    272:       "bbbb",
                    273:       2 },
                    274:     { "b*",
                    275:       "bb",
                    276:       "aaaaa",
                    277:       "bbaaaaa",
                    278:       "bbabbabbabbabbabb",
                    279:       6 },
                    280:     { "b*",
                    281:       "bb",
                    282:       "aa\naa\n",
                    283:       "bbaa\naa\n",
                    284:       "bbabbabb\nbbabbabb\nbb",
                    285:       7 },
                    286:     { "b*",
                    287:       "bb",
                    288:       "aa\raa\r",
                    289:       "bbaa\raa\r",
                    290:       "bbabbabb\rbbabbabb\rbb",
                    291:       7 },
                    292:     { "b*",
                    293:       "bb",
                    294:       "aa\r\naa\r\n",
                    295:       "bbaa\r\naa\r\n",
                    296:       "bbabbabb\r\nbbabbabb\r\nbb",
                    297:       7 },
                    298:     // Check empty-string matching (it's tricky!)
                    299:     { "aa|b*",
                    300:       "@",
                    301:       "aa",
                    302:       "@",
                    303:       "@@",
                    304:       2 },
                    305:     { "b*|aa",
                    306:       "@",
                    307:       "aa",
                    308:       "@aa",
                    309:       "@@@",
                    310:       3 },
                    311: #ifdef SUPPORT_UTF8
                    312:     { "b*",
                    313:       "bb",
                    314:       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
                    315:       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
                    316:       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
                    317:       5 },
                    318:     { "b*",
                    319:       "bb",
                    320:       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
                    321:       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
                    322:       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
                    323:        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
                    324:       9 },
                    325: #endif
                    326:     { "", NULL, NULL, NULL, NULL, 0 }
                    327:   };
                    328: 
                    329: #ifdef SUPPORT_UTF8
                    330:   const bool support_utf8 = true;
                    331: #else
                    332:   const bool support_utf8 = false;
                    333: #endif
                    334: 
                    335:   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
                    336:     RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
                    337:     assert(re.error().empty());
                    338:     string one(t->original);
                    339:     CHECK(re.Replace(t->rewrite, &one));
                    340:     CHECK_EQ(one, t->single);
                    341:     string all(t->original);
                    342:     const int replace_count = re.GlobalReplace(t->rewrite, &all);
                    343:     CHECK_EQ(all, t->global);
                    344:     CHECK_EQ(replace_count, t->global_count);
                    345:   }
                    346: 
                    347:   // One final test: test \r\n replacement when we're not in CRLF mode
                    348:   {
                    349:     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
                    350:     assert(re.error().empty());
                    351:     string all("aa\r\naa\r\n");
                    352:     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
                    353:     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
                    354:   }
                    355:   {
                    356:     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
                    357:     assert(re.error().empty());
                    358:     string all("aa\r\naa\r\n");
                    359:     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
                    360:     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
                    361:   }
                    362:   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
                    363:   //       Alas, the answer depends on how pcre was compiled.
                    364: }
                    365: 
                    366: static void TestExtract() {
                    367:   printf("Testing Extract\n");
                    368: 
                    369:   string s;
                    370: 
                    371:   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
                    372:   CHECK_EQ(s, "kremvax!boris");
                    373: 
                    374:   // check the RE interface as well
                    375:   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
                    376:   CHECK_EQ(s, "'foo'");
                    377:   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
                    378:   CHECK_EQ(s, "'foo'");
                    379: }
                    380: 
                    381: static void TestConsume() {
                    382:   printf("Testing Consume\n");
                    383: 
                    384:   string word;
                    385: 
                    386:   string s("   aaa b!@#$@#$cccc");
                    387:   StringPiece input(s);
                    388: 
                    389:   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
                    390:   CHECK(r.Consume(&input, &word));
                    391:   CHECK_EQ(word, "aaa");
                    392:   CHECK(r.Consume(&input, &word));
                    393:   CHECK_EQ(word, "b");
                    394:   CHECK(! r.Consume(&input, &word));
                    395: }
                    396: 
                    397: static void TestFindAndConsume() {
                    398:   printf("Testing FindAndConsume\n");
                    399: 
                    400:   string word;
                    401: 
                    402:   string s("   aaa b!@#$@#$cccc");
                    403:   StringPiece input(s);
                    404: 
                    405:   RE r("(\\w+)");      // matches a word
                    406:   CHECK(r.FindAndConsume(&input, &word));
                    407:   CHECK_EQ(word, "aaa");
                    408:   CHECK(r.FindAndConsume(&input, &word));
                    409:   CHECK_EQ(word, "b");
                    410:   CHECK(r.FindAndConsume(&input, &word));
                    411:   CHECK_EQ(word, "cccc");
                    412:   CHECK(! r.FindAndConsume(&input, &word));
                    413: }
                    414: 
                    415: static void TestMatchNumberPeculiarity() {
                    416:   printf("Testing match-number peculiarity\n");
                    417: 
                    418:   string word1;
                    419:   string word2;
                    420:   string word3;
                    421: 
                    422:   RE r("(foo)|(bar)|(baz)");
                    423:   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
                    424:   CHECK_EQ(word1, "foo");
                    425:   CHECK_EQ(word2, "");
                    426:   CHECK_EQ(word3, "");
                    427:   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
                    428:   CHECK_EQ(word1, "");
                    429:   CHECK_EQ(word2, "bar");
                    430:   CHECK_EQ(word3, "");
                    431:   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
                    432:   CHECK_EQ(word1, "");
                    433:   CHECK_EQ(word2, "");
                    434:   CHECK_EQ(word3, "baz");
                    435:   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
                    436: 
                    437:   string a;
                    438:   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
                    439:   CHECK_EQ(a, "");
                    440: }
                    441: 
                    442: static void TestRecursion() {
                    443:   printf("Testing recursion\n");
                    444: 
                    445:   // Get one string that passes (sometimes), one that never does.
                    446:   string text_good("abcdefghijk");
                    447:   string text_bad("acdefghijkl");
                    448: 
                    449:   // According to pcretest, matching text_good against (\w+)*b
                    450:   // requires match_limit of at least 8192, and match_recursion_limit
                    451:   // of at least 37.
                    452: 
                    453:   RE_Options options_ml;
                    454:   options_ml.set_match_limit(8192);
                    455:   RE re("(\\w+)*b", options_ml);
                    456:   CHECK(re.PartialMatch(text_good) == true);
                    457:   CHECK(re.PartialMatch(text_bad) == false);
                    458:   CHECK(re.FullMatch(text_good) == false);
                    459:   CHECK(re.FullMatch(text_bad) == false);
                    460: 
                    461:   options_ml.set_match_limit(1024);
                    462:   RE re2("(\\w+)*b", options_ml);
                    463:   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
                    464:   CHECK(re2.PartialMatch(text_bad) == false);
                    465:   CHECK(re2.FullMatch(text_good) == false);
                    466:   CHECK(re2.FullMatch(text_bad) == false);
                    467: 
                    468:   RE_Options options_mlr;
                    469:   options_mlr.set_match_limit_recursion(50);
                    470:   RE re3("(\\w+)*b", options_mlr);
                    471:   CHECK(re3.PartialMatch(text_good) == true);
                    472:   CHECK(re3.PartialMatch(text_bad) == false);
                    473:   CHECK(re3.FullMatch(text_good) == false);
                    474:   CHECK(re3.FullMatch(text_bad) == false);
                    475: 
                    476:   options_mlr.set_match_limit_recursion(10);
                    477:   RE re4("(\\w+)*b", options_mlr);
                    478:   CHECK(re4.PartialMatch(text_good) == false);
                    479:   CHECK(re4.PartialMatch(text_bad) == false);
                    480:   CHECK(re4.FullMatch(text_good) == false);
                    481:   CHECK(re4.FullMatch(text_bad) == false);
                    482: }
                    483: 
                    484: // A meta-quoted string, interpreted as a pattern, should always match
                    485: // the original unquoted string.
                    486: static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
                    487:   string quoted = RE::QuoteMeta(unquoted);
                    488:   RE re(quoted, options);
                    489:   CHECK(re.FullMatch(unquoted));
                    490: }
                    491: 
                    492: // A string containing meaningful regexp characters, which is then meta-
                    493: // quoted, should not generally match a string the unquoted string does.
                    494: static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
                    495:                                   RE_Options options = RE_Options()) {
                    496:   string quoted = RE::QuoteMeta(unquoted);
                    497:   RE re(quoted, options);
                    498:   CHECK(!re.FullMatch(should_not_match));
                    499: }
                    500: 
                    501: // Tests that quoted meta characters match their original strings,
                    502: // and that a few things that shouldn't match indeed do not.
                    503: static void TestQuotaMetaSimple() {
                    504:   TestQuoteMeta("foo");
                    505:   TestQuoteMeta("foo.bar");
                    506:   TestQuoteMeta("foo\\.bar");
                    507:   TestQuoteMeta("[1-9]");
                    508:   TestQuoteMeta("1.5-2.0?");
                    509:   TestQuoteMeta("\\d");
                    510:   TestQuoteMeta("Who doesn't like ice cream?");
                    511:   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
                    512:   TestQuoteMeta("((?!)xxx).*yyy");
                    513:   TestQuoteMeta("([");
                    514:   TestQuoteMeta(string("foo\0bar", 7));
                    515: }
                    516: 
                    517: static void TestQuoteMetaSimpleNegative() {
                    518:   NegativeTestQuoteMeta("foo", "bar");
                    519:   NegativeTestQuoteMeta("...", "bar");
                    520:   NegativeTestQuoteMeta("\\.", ".");
                    521:   NegativeTestQuoteMeta("\\.", "..");
                    522:   NegativeTestQuoteMeta("(a)", "a");
                    523:   NegativeTestQuoteMeta("(a|b)", "a");
                    524:   NegativeTestQuoteMeta("(a|b)", "(a)");
                    525:   NegativeTestQuoteMeta("(a|b)", "a|b");
                    526:   NegativeTestQuoteMeta("[0-9]", "0");
                    527:   NegativeTestQuoteMeta("[0-9]", "0-9");
                    528:   NegativeTestQuoteMeta("[0-9]", "[9]");
                    529:   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
                    530: }
                    531: 
                    532: static void TestQuoteMetaLatin1() {
                    533:   TestQuoteMeta("3\xb2 = 9");
                    534: }
                    535: 
                    536: static void TestQuoteMetaUtf8() {
                    537: #ifdef SUPPORT_UTF8
                    538:   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
                    539:   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
                    540:   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
                    541:   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
                    542:   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
                    543:   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
                    544:   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
                    545:   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
                    546:                         "27\\\xc2\\\xb0",
                    547:                         pcrecpp::UTF8());
                    548: #endif
                    549: }
                    550: 
                    551: static void TestQuoteMetaAll() {
                    552:   printf("Testing QuoteMeta\n");
                    553:   TestQuotaMetaSimple();
                    554:   TestQuoteMetaSimpleNegative();
                    555:   TestQuoteMetaLatin1();
                    556:   TestQuoteMetaUtf8();
                    557: }
                    558: 
                    559: //
                    560: // Options tests contributed by
                    561: // Giuseppe Maxia, CTO, Stardata s.r.l.
                    562: // July 2005
                    563: //
                    564: static void GetOneOptionResult(
                    565:                 const char *option_name,
                    566:                 const char *regex,
                    567:                 const char *str,
                    568:                 RE_Options options,
                    569:                 bool full,
                    570:                 string expected) {
                    571: 
                    572:   printf("Testing Option <%s>\n", option_name);
                    573:   if(VERBOSE_TEST)
                    574:     printf("/%s/ finds \"%s\" within \"%s\" \n",
                    575:                     regex,
                    576:                     expected.c_str(),
                    577:                     str);
                    578:   string captured("");
                    579:   if (full)
                    580:     RE(regex,options).FullMatch(str, &captured);
                    581:   else
                    582:     RE(regex,options).PartialMatch(str, &captured);
                    583:   CHECK_EQ(captured, expected);
                    584: }
                    585: 
                    586: static void TestOneOption(
                    587:                 const char *option_name,
                    588:                 const char *regex,
                    589:                 const char *str,
                    590:                 RE_Options options,
                    591:                 bool full,
                    592:                 bool assertive = true) {
                    593: 
                    594:   printf("Testing Option <%s>\n", option_name);
                    595:   if (VERBOSE_TEST)
                    596:     printf("'%s' %s /%s/ \n",
                    597:                   str,
                    598:                   (assertive? "matches" : "doesn't match"),
                    599:                   regex);
                    600:   if (assertive) {
                    601:     if (full)
                    602:       CHECK(RE(regex,options).FullMatch(str));
                    603:     else
                    604:       CHECK(RE(regex,options).PartialMatch(str));
                    605:   } else {
                    606:     if (full)
                    607:       CHECK(!RE(regex,options).FullMatch(str));
                    608:     else
                    609:       CHECK(!RE(regex,options).PartialMatch(str));
                    610:   }
                    611: }
                    612: 
                    613: static void Test_CASELESS() {
                    614:   RE_Options options;
                    615:   RE_Options options2;
                    616: 
                    617:   options.set_caseless(true);
                    618:   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
                    619:   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
                    620:   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
                    621: 
                    622:   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
                    623:   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
                    624:   options.set_caseless(false);
                    625:   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
                    626: }
                    627: 
                    628: static void Test_MULTILINE() {
                    629:   RE_Options options;
                    630:   RE_Options options2;
                    631:   const char *str = "HELLO\n" "cruel\n" "world\n";
                    632: 
                    633:   options.set_multiline(true);
                    634:   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
                    635:   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
                    636:   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
                    637:   options.set_multiline(false);
                    638:   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
                    639: }
                    640: 
                    641: static void Test_DOTALL() {
                    642:   RE_Options options;
                    643:   RE_Options options2;
                    644:   const char *str = "HELLO\n" "cruel\n" "world";
                    645: 
                    646:   options.set_dotall(true);
                    647:   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
                    648:   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
                    649:   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
                    650:   options.set_dotall(false);
                    651:   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
                    652: }
                    653: 
                    654: static void Test_DOLLAR_ENDONLY() {
                    655:   RE_Options options;
                    656:   RE_Options options2;
                    657:   const char *str = "HELLO world\n";
                    658: 
                    659:   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
                    660:   options.set_dollar_endonly(true);
                    661:   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
                    662:   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
                    663: }
                    664: 
                    665: static void Test_EXTRA() {
                    666:   RE_Options options;
                    667:   const char *str = "HELLO";
                    668: 
                    669:   options.set_extra(true);
                    670:   TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
                    671:   TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
                    672:   options.set_extra(false);
                    673:   TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
                    674: }
                    675: 
                    676: static void Test_EXTENDED() {
                    677:   RE_Options options;
                    678:   RE_Options options2;
                    679:   const char *str = "HELLO world";
                    680: 
                    681:   options.set_extended(true);
                    682:   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
                    683:   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
                    684:   TestOneOption("EXTENDED (class)",
                    685:                     "^ HE L{2} O "
                    686:                     "\\s+        "
                    687:                     "\\w+ $      ",
                    688:                     str,
                    689:                     options,
                    690:                     false);
                    691: 
                    692:   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
                    693:   TestOneOption("EXTENDED (function)",
                    694:                     "^ HE L{2} O "
                    695:                     "\\s+        "
                    696:                     "\\w+ $      ",
                    697:                     str,
                    698:                     pcrecpp::EXTENDED(),
                    699:                     false);
                    700: 
                    701:   options.set_extended(false);
                    702:   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
                    703: }
                    704: 
                    705: static void Test_NO_AUTO_CAPTURE() {
                    706:   RE_Options options;
                    707:   const char *str = "HELLO world";
                    708:   string captured;
                    709: 
                    710:   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
                    711:   if (VERBOSE_TEST)
                    712:     printf("parentheses capture text\n");
                    713:   RE re("(world|universe)$", options);
                    714:   CHECK(re.Extract("\\1", str , &captured));
                    715:   CHECK_EQ(captured, "world");
                    716:   options.set_no_auto_capture(true);
                    717:   printf("testing Option <NO_AUTO_CAPTURE>\n");
                    718:   if (VERBOSE_TEST)
                    719:     printf("parentheses do not capture text\n");
                    720:   re.Extract("\\1",str, &captured );
                    721:   CHECK_EQ(captured, "world");
                    722: }
                    723: 
                    724: static void Test_UNGREEDY() {
                    725:   RE_Options options;
                    726:   const char *str = "HELLO, 'this' is the 'world'";
                    727: 
                    728:   options.set_ungreedy(true);
                    729:   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
                    730:   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
                    731:   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
                    732: 
                    733:   options.set_ungreedy(false);
                    734:   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
                    735:   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
                    736: }
                    737: 
                    738: static void Test_all_options() {
                    739:   const char *str = "HELLO\n" "cruel\n" "world";
                    740:   RE_Options options;
                    741:   options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
                    742: 
                    743:   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
                    744:   options.set_all_options(0);
                    745:   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
                    746:   options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
                    747: 
                    748:   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
                    749:   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
                    750:                   " ^ c r u e l $ ",
                    751:                   str,
                    752:                   RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
                    753:                   false);
                    754: 
                    755:   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
                    756:                   " ^ c r u e l $ ",
                    757:                   str,
                    758:                   RE_Options()
                    759:                        .set_multiline(true)
                    760:                        .set_extended(true),
                    761:                   false);
                    762: 
                    763:   options.set_all_options(0);
                    764:   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
                    765: 
                    766: }
                    767: 
                    768: static void TestOptions() {
                    769:   printf("Testing Options\n");
                    770:   Test_CASELESS();
                    771:   Test_MULTILINE();
                    772:   Test_DOTALL();
                    773:   Test_DOLLAR_ENDONLY();
                    774:   Test_EXTENDED();
                    775:   Test_NO_AUTO_CAPTURE();
                    776:   Test_UNGREEDY();
                    777:   Test_EXTRA();
                    778:   Test_all_options();
                    779: }
                    780: 
                    781: static void TestConstructors() {
                    782:   printf("Testing constructors\n");
                    783: 
                    784:   RE_Options options;
                    785:   options.set_dotall(true);
                    786:   const char *str = "HELLO\n" "cruel\n" "world";
                    787: 
                    788:   RE orig("HELLO.*world", options);
                    789:   CHECK(orig.FullMatch(str));
                    790: 
                    791:   RE copy1(orig);
                    792:   CHECK(copy1.FullMatch(str));
                    793: 
                    794:   RE copy2("not a match");
                    795:   CHECK(!copy2.FullMatch(str));
                    796:   copy2 = copy1;
                    797:   CHECK(copy2.FullMatch(str));
                    798:   copy2 = orig;
                    799:   CHECK(copy2.FullMatch(str));
                    800: 
                    801:   // Make sure when we assign to ourselves, nothing bad happens
                    802:   orig = orig;
                    803:   copy1 = copy1;
                    804:   copy2 = copy2;
                    805:   CHECK(orig.FullMatch(str));
                    806:   CHECK(copy1.FullMatch(str));
                    807:   CHECK(copy2.FullMatch(str));
                    808: }
                    809: 
                    810: int main(int argc, char** argv) {
                    811:   // Treat any flag as --help
                    812:   if (argc > 1 && argv[1][0] == '-') {
                    813:     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
                    814:            "       If 'timingX ###' is specified, run the given timing test\n"
                    815:            "       with the given number of iterations, rather than running\n"
                    816:            "       the default corectness test.\n", argv[0]);
                    817:     return 0;
                    818:   }
                    819: 
                    820:   if (argc > 1) {
                    821:     if ( argc == 2 || atoi(argv[2]) == 0) {
                    822:       printf("timing mode needs a num-iters argument\n");
                    823:       return 1;
                    824:     }
                    825:     if (!strcmp(argv[1], "timing1"))
                    826:       Timing1(atoi(argv[2]));
                    827:     else if (!strcmp(argv[1], "timing2"))
                    828:       Timing2(atoi(argv[2]));
                    829:     else if (!strcmp(argv[1], "timing3"))
                    830:       Timing3(atoi(argv[2]));
                    831:     else
                    832:       printf("Unknown argument '%s'\n", argv[1]);
                    833:     return 0;
                    834:   }
                    835: 
                    836:   printf("PCRE C++ wrapper tests\n");
                    837:   printf("Testing FullMatch\n");
                    838: 
                    839:   int i;
                    840:   string s;
                    841: 
                    842:   /***** FullMatch with no args *****/
                    843: 
                    844:   CHECK(RE("h.*o").FullMatch("hello"));
                    845:   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
                    846:   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
                    847:   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
                    848:   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
                    849:   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
                    850: 
                    851:   /***** FullMatch with args *****/
                    852: 
                    853:   // Zero-arg
                    854:   CHECK(RE("\\d+").FullMatch("1001"));
                    855: 
                    856:   // Single-arg
                    857:   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
                    858:   CHECK_EQ(i, 1001);
                    859:   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
                    860:   CHECK_EQ(i, -123);
                    861:   CHECK(!RE("()\\d+").FullMatch("10", &i));
                    862:   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
                    863:                                 &i));
                    864: 
                    865:   // Digits surrounding integer-arg
                    866:   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
                    867:   CHECK_EQ(i, 23);
                    868:   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
                    869:   CHECK_EQ(i, 1);
                    870:   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
                    871:   CHECK_EQ(i, -1);
                    872:   CHECK(RE("(\\d)").PartialMatch("1234", &i));
                    873:   CHECK_EQ(i, 1);
                    874:   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
                    875:   CHECK_EQ(i, -1);
                    876: 
                    877:   // String-arg
                    878:   CHECK(RE("h(.*)o").FullMatch("hello", &s));
                    879:   CHECK_EQ(s, string("ell"));
                    880: 
                    881:   // StringPiece-arg
                    882:   StringPiece sp;
                    883:   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
                    884:   CHECK_EQ(sp.size(), 4);
                    885:   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
                    886:   CHECK_EQ(i, 1234);
                    887: 
                    888:   // Multi-arg
                    889:   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
                    890:   CHECK_EQ(s, string("ruby"));
                    891:   CHECK_EQ(i, 1234);
                    892: 
                    893:   // Ignore non-void* NULL arg
                    894:   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
                    895:   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
                    896:   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
                    897:   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
                    898: #ifdef HAVE_LONG_LONG
                    899:   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
                    900: #endif
                    901:   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
                    902:   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
                    903: 
                    904:   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
                    905:   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
                    906:   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
                    907:   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
                    908:   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
                    909:   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
                    910: 
                    911:   // Ignored arg
                    912:   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
                    913:   CHECK_EQ(s, string("ruby"));
                    914:   CHECK_EQ(i, 1234);
                    915: 
                    916:   // Type tests
                    917:   {
                    918:     char c;
                    919:     CHECK(RE("(H)ello").FullMatch("Hello", &c));
                    920:     CHECK_EQ(c, 'H');
                    921:   }
                    922:   {
                    923:     unsigned char c;
                    924:     CHECK(RE("(H)ello").FullMatch("Hello", &c));
                    925:     CHECK_EQ(c, static_cast<unsigned char>('H'));
                    926:   }
                    927:   {
                    928:     short v;
                    929:     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
                    930:     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
                    931:     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
                    932:     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
                    933:     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
                    934:     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
                    935:   }
                    936:   {
                    937:     unsigned short v;
                    938:     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
                    939:     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
                    940:     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
                    941:     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
                    942:   }
                    943:   {
                    944:     int v;
                    945:     static const int max_value = 0x7fffffff;
                    946:     static const int min_value = -max_value - 1;
                    947:     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
                    948:     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
                    949:     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
                    950:     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
                    951:     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
                    952:     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
                    953:   }
                    954:   {
                    955:     unsigned int v;
                    956:     static const unsigned int max_value = 0xfffffffful;
                    957:     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
                    958:     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
                    959:     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
                    960:   }
                    961: #ifdef HAVE_LONG_LONG
                    962: # if defined(__MINGW__) || defined(__MINGW32__)
                    963: #   define LLD "%I64d"
                    964: #   define LLU "%I64u"
                    965: # else
                    966: #   define LLD "%lld"
                    967: #   define LLU "%llu"
                    968: # endif
                    969:   {
                    970:     long long v;
                    971:     static const long long max_value = 0x7fffffffffffffffLL;
                    972:     static const long long min_value = -max_value - 1;
                    973:     char buf[32];  // definitely big enough for a long long
                    974: 
                    975:     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
                    976:     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
                    977: 
                    978:     sprintf(buf, LLD, max_value);
                    979:     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
                    980: 
                    981:     sprintf(buf, LLD, min_value);
                    982:     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
                    983: 
                    984:     sprintf(buf, LLD, max_value);
                    985:     assert(buf[strlen(buf)-1] != '9');
                    986:     buf[strlen(buf)-1]++;
                    987:     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
                    988: 
                    989:     sprintf(buf, LLD, min_value);
                    990:     assert(buf[strlen(buf)-1] != '9');
                    991:     buf[strlen(buf)-1]++;
                    992:     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
                    993:   }
                    994: #endif
                    995: #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
                    996:   {
                    997:     unsigned long long v;
                    998:     long long v2;
                    999:     static const unsigned long long max_value = 0xffffffffffffffffULL;
                   1000:     char buf[32];  // definitely big enough for a unsigned long long
                   1001: 
                   1002:     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
                   1003:     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
                   1004: 
                   1005:     sprintf(buf, LLU, max_value);
                   1006:     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
                   1007: 
                   1008:     assert(buf[strlen(buf)-1] != '9');
                   1009:     buf[strlen(buf)-1]++;
                   1010:     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
                   1011:   }
                   1012: #endif
                   1013:   {
                   1014:     float v;
                   1015:     CHECK(RE("(.*)").FullMatch("100", &v));
                   1016:     CHECK(RE("(.*)").FullMatch("-100.", &v));
                   1017:     CHECK(RE("(.*)").FullMatch("1e23", &v));
                   1018:   }
                   1019:   {
                   1020:     double v;
                   1021:     CHECK(RE("(.*)").FullMatch("100", &v));
                   1022:     CHECK(RE("(.*)").FullMatch("-100.", &v));
                   1023:     CHECK(RE("(.*)").FullMatch("1e23", &v));
                   1024:   }
                   1025: 
                   1026:   // Check that matching is fully anchored
                   1027:   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
                   1028:   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
                   1029:   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
                   1030:   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
                   1031: 
                   1032:   // Braces
                   1033:   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
                   1034:   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
                   1035:   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
                   1036: 
                   1037:   // Complicated RE
                   1038:   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
                   1039:   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
                   1040:   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
                   1041:   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
                   1042: 
                   1043:   // Check full-match handling (needs '$' tacked on internally)
                   1044:   CHECK(RE("fo|foo").FullMatch("fo"));
                   1045:   CHECK(RE("fo|foo").FullMatch("foo"));
                   1046:   CHECK(RE("fo|foo$").FullMatch("fo"));
                   1047:   CHECK(RE("fo|foo$").FullMatch("foo"));
                   1048:   CHECK(RE("foo$").FullMatch("foo"));
                   1049:   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
                   1050:   CHECK(!RE("fo|bar").FullMatch("fox"));
                   1051: 
                   1052:   // Uncomment the following if we change the handling of '$' to
                   1053:   // prevent it from matching a trailing newline
                   1054:   if (false) {
                   1055:     // Check that we don't get bitten by pcre's special handling of a
                   1056:     // '\n' at the end of the string matching '$'
                   1057:     CHECK(!RE("foo$").PartialMatch("foo\n"));
                   1058:   }
                   1059: 
                   1060:   // Number of args
                   1061:   int a[16];
                   1062:   CHECK(RE("").FullMatch(""));
                   1063: 
                   1064:   memset(a, 0, sizeof(0));
                   1065:   CHECK(RE("(\\d){1}").FullMatch("1",
                   1066:                                  &a[0]));
                   1067:   CHECK_EQ(a[0], 1);
                   1068: 
                   1069:   memset(a, 0, sizeof(0));
                   1070:   CHECK(RE("(\\d)(\\d)").FullMatch("12",
                   1071:                                    &a[0],  &a[1]));
                   1072:   CHECK_EQ(a[0], 1);
                   1073:   CHECK_EQ(a[1], 2);
                   1074: 
                   1075:   memset(a, 0, sizeof(0));
                   1076:   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
                   1077:                                         &a[0],  &a[1],  &a[2]));
                   1078:   CHECK_EQ(a[0], 1);
                   1079:   CHECK_EQ(a[1], 2);
                   1080:   CHECK_EQ(a[2], 3);
                   1081: 
                   1082:   memset(a, 0, sizeof(0));
                   1083:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
                   1084:                                              &a[0],  &a[1],  &a[2],  &a[3]));
                   1085:   CHECK_EQ(a[0], 1);
                   1086:   CHECK_EQ(a[1], 2);
                   1087:   CHECK_EQ(a[2], 3);
                   1088:   CHECK_EQ(a[3], 4);
                   1089: 
                   1090:   memset(a, 0, sizeof(0));
                   1091:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
                   1092:                                                   &a[0],  &a[1],  &a[2],
                   1093:                                                   &a[3],  &a[4]));
                   1094:   CHECK_EQ(a[0], 1);
                   1095:   CHECK_EQ(a[1], 2);
                   1096:   CHECK_EQ(a[2], 3);
                   1097:   CHECK_EQ(a[3], 4);
                   1098:   CHECK_EQ(a[4], 5);
                   1099: 
                   1100:   memset(a, 0, sizeof(0));
                   1101:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
                   1102:                                                        &a[0],  &a[1],  &a[2],
                   1103:                                                        &a[3],  &a[4],  &a[5]));
                   1104:   CHECK_EQ(a[0], 1);
                   1105:   CHECK_EQ(a[1], 2);
                   1106:   CHECK_EQ(a[2], 3);
                   1107:   CHECK_EQ(a[3], 4);
                   1108:   CHECK_EQ(a[4], 5);
                   1109:   CHECK_EQ(a[5], 6);
                   1110: 
                   1111:   memset(a, 0, sizeof(0));
                   1112:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
                   1113:                                                             &a[0],  &a[1],  &a[2],  &a[3],
                   1114:                                                             &a[4],  &a[5],  &a[6]));
                   1115:   CHECK_EQ(a[0], 1);
                   1116:   CHECK_EQ(a[1], 2);
                   1117:   CHECK_EQ(a[2], 3);
                   1118:   CHECK_EQ(a[3], 4);
                   1119:   CHECK_EQ(a[4], 5);
                   1120:   CHECK_EQ(a[5], 6);
                   1121:   CHECK_EQ(a[6], 7);
                   1122: 
                   1123:   memset(a, 0, sizeof(0));
                   1124:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
                   1125:            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
                   1126:                "1234567890123456",
                   1127:                &a[0],  &a[1],  &a[2],  &a[3],
                   1128:                &a[4],  &a[5],  &a[6],  &a[7],
                   1129:                &a[8],  &a[9],  &a[10], &a[11],
                   1130:                &a[12], &a[13], &a[14], &a[15]));
                   1131:   CHECK_EQ(a[0], 1);
                   1132:   CHECK_EQ(a[1], 2);
                   1133:   CHECK_EQ(a[2], 3);
                   1134:   CHECK_EQ(a[3], 4);
                   1135:   CHECK_EQ(a[4], 5);
                   1136:   CHECK_EQ(a[5], 6);
                   1137:   CHECK_EQ(a[6], 7);
                   1138:   CHECK_EQ(a[7], 8);
                   1139:   CHECK_EQ(a[8], 9);
                   1140:   CHECK_EQ(a[9], 0);
                   1141:   CHECK_EQ(a[10], 1);
                   1142:   CHECK_EQ(a[11], 2);
                   1143:   CHECK_EQ(a[12], 3);
                   1144:   CHECK_EQ(a[13], 4);
                   1145:   CHECK_EQ(a[14], 5);
                   1146:   CHECK_EQ(a[15], 6);
                   1147: 
                   1148:   /***** PartialMatch *****/
                   1149: 
                   1150:   printf("Testing PartialMatch\n");
                   1151: 
                   1152:   CHECK(RE("h.*o").PartialMatch("hello"));
                   1153:   CHECK(RE("h.*o").PartialMatch("othello"));
                   1154:   CHECK(RE("h.*o").PartialMatch("hello!"));
                   1155:   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
                   1156: 
                   1157:   /***** other tests *****/
                   1158: 
                   1159:   RadixTests();
                   1160:   TestReplace();
                   1161:   TestExtract();
                   1162:   TestConsume();
                   1163:   TestFindAndConsume();
                   1164:   TestQuoteMetaAll();
                   1165:   TestMatchNumberPeculiarity();
                   1166: 
                   1167:   // Check the pattern() accessor
                   1168:   {
                   1169:     const string kPattern = "http://([^/]+)/.*";
                   1170:     const RE re(kPattern);
                   1171:     CHECK_EQ(kPattern, re.pattern());
                   1172:   }
                   1173: 
                   1174:   // Check RE error field.
                   1175:   {
                   1176:     RE re("foo");
                   1177:     CHECK(re.error().empty());  // Must have no error
                   1178:   }
                   1179: 
                   1180: #ifdef SUPPORT_UTF8
                   1181:   // Check UTF-8 handling
                   1182:   {
                   1183:     printf("Testing UTF-8 handling\n");
                   1184: 
                   1185:     // Three Japanese characters (nihongo)
                   1186:     const unsigned char utf8_string[] = {
                   1187:          0xe6, 0x97, 0xa5, // 65e5
                   1188:          0xe6, 0x9c, 0xac, // 627c
                   1189:          0xe8, 0xaa, 0x9e, // 8a9e
                   1190:          0
                   1191:     };
                   1192:     const unsigned char utf8_pattern[] = {
                   1193:          '.',
                   1194:          0xe6, 0x9c, 0xac, // 627c
                   1195:          '.',
                   1196:          0
                   1197:     };
                   1198: 
                   1199:     // Both should match in either mode, bytes or UTF-8
                   1200:     RE re_test1(".........");
                   1201:     CHECK(re_test1.FullMatch(utf8_string));
                   1202:     RE re_test2("...", pcrecpp::UTF8());
                   1203:     CHECK(re_test2.FullMatch(utf8_string));
                   1204: 
                   1205:     // Check that '.' matches one byte or UTF-8 character
                   1206:     // according to the mode.
                   1207:     string ss;
                   1208:     RE re_test3("(.)");
                   1209:     CHECK(re_test3.PartialMatch(utf8_string, &ss));
                   1210:     CHECK_EQ(ss, string("\xe6"));
                   1211:     RE re_test4("(.)", pcrecpp::UTF8());
                   1212:     CHECK(re_test4.PartialMatch(utf8_string, &ss));
                   1213:     CHECK_EQ(ss, string("\xe6\x97\xa5"));
                   1214: 
                   1215:     // Check that string matches itself in either mode
                   1216:     RE re_test5(utf8_string);
                   1217:     CHECK(re_test5.FullMatch(utf8_string));
                   1218:     RE re_test6(utf8_string, pcrecpp::UTF8());
                   1219:     CHECK(re_test6.FullMatch(utf8_string));
                   1220: 
                   1221:     // Check that pattern matches string only in UTF8 mode
                   1222:     RE re_test7(utf8_pattern);
                   1223:     CHECK(!re_test7.FullMatch(utf8_string));
                   1224:     RE re_test8(utf8_pattern, pcrecpp::UTF8());
                   1225:     CHECK(re_test8.FullMatch(utf8_string));
                   1226:   }
                   1227: 
                   1228:   // Check that ungreedy, UTF8 regular expressions don't match when they
                   1229:   // oughtn't -- see bug 82246.
                   1230:   {
                   1231:     // This code always worked.
                   1232:     const char* pattern = "\\w+X";
                   1233:     const string target = "a aX";
                   1234:     RE match_sentence(pattern);
                   1235:     RE match_sentence_re(pattern, pcrecpp::UTF8());
                   1236: 
                   1237:     CHECK(!match_sentence.FullMatch(target));
                   1238:     CHECK(!match_sentence_re.FullMatch(target));
                   1239:   }
                   1240: 
                   1241:   {
                   1242:     const char* pattern = "(?U)\\w+X";
                   1243:     const string target = "a aX";
                   1244:     RE match_sentence(pattern);
                   1245:     RE match_sentence_re(pattern, pcrecpp::UTF8());
                   1246: 
                   1247:     CHECK(!match_sentence.FullMatch(target));
                   1248:     CHECK(!match_sentence_re.FullMatch(target));
                   1249:   }
                   1250: #endif  /* def SUPPORT_UTF8 */
                   1251: 
                   1252:   printf("Testing error reporting\n");
                   1253: 
                   1254:   { RE re("a\\1"); CHECK(!re.error().empty()); }
                   1255:   {
                   1256:     RE re("a[x");
                   1257:     CHECK(!re.error().empty());
                   1258:   }
                   1259:   {
                   1260:     RE re("a[z-a]");
                   1261:     CHECK(!re.error().empty());
                   1262:   }
                   1263:   {
                   1264:     RE re("a[[:foobar:]]");
                   1265:     CHECK(!re.error().empty());
                   1266:   }
                   1267:   {
                   1268:     RE re("a(b");
                   1269:     CHECK(!re.error().empty());
                   1270:   }
                   1271:   {
                   1272:     RE re("a\\");
                   1273:     CHECK(!re.error().empty());
                   1274:   }
                   1275: 
                   1276:   // Test that recursion is stopped
                   1277:   TestRecursion();
                   1278: 
                   1279:   // Test Options
                   1280:   if (getenv("VERBOSE_TEST") != NULL)
                   1281:     VERBOSE_TEST  = true;
                   1282:   TestOptions();
                   1283: 
                   1284:   // Test the constructors
                   1285:   TestConstructors();
                   1286: 
                   1287:   // Done
                   1288:   printf("OK\n");
                   1289: 
                   1290:   return 0;
                   1291: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>