Annotation of embedaddon/pcre/pcrecpp_unittest.cc, revision 1.1

1.1     ! misho       1: // -*- coding: utf-8 -*-
        !             2: //
        !             3: // Copyright (c) 2005 - 2010, Google Inc.
        !             4: // All rights reserved.
        !             5: //
        !             6: // Redistribution and use in source and binary forms, with or without
        !             7: // modification, are permitted provided that the following conditions are
        !             8: // met:
        !             9: //
        !            10: //     * Redistributions of source code must retain the above copyright
        !            11: // notice, this list of conditions and the following disclaimer.
        !            12: //     * Redistributions in binary form must reproduce the above
        !            13: // copyright notice, this list of conditions and the following disclaimer
        !            14: // in the documentation and/or other materials provided with the
        !            15: // distribution.
        !            16: //     * Neither the name of Google Inc. nor the names of its
        !            17: // contributors may be used to endorse or promote products derived from
        !            18: // this software without specific prior written permission.
        !            19: //
        !            20: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
        !            21: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
        !            22: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
        !            23: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
        !            24: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
        !            25: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
        !            26: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
        !            27: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
        !            28: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
        !            29: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
        !            30: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        !            31: //
        !            32: // Author: Sanjay Ghemawat
        !            33: //
        !            34: // TODO: Test extractions for PartialMatch/Consume
        !            35: 
        !            36: #ifdef HAVE_CONFIG_H
        !            37: #include "config.h"
        !            38: #endif
        !            39: 
        !            40: #include <stdio.h>
        !            41: #include <string.h>      /* for memset and strcmp */
        !            42: #include <cassert>
        !            43: #include <vector>
        !            44: #include "pcrecpp.h"
        !            45: 
        !            46: using pcrecpp::StringPiece;
        !            47: using pcrecpp::RE;
        !            48: using pcrecpp::RE_Options;
        !            49: using pcrecpp::Hex;
        !            50: using pcrecpp::Octal;
        !            51: using pcrecpp::CRadix;
        !            52: 
        !            53: static bool VERBOSE_TEST  = false;
        !            54: 
        !            55: // CHECK dies with a fatal error if condition is not true.  It is *not*
        !            56: // controlled by NDEBUG, so the check will be executed regardless of
        !            57: // compilation mode.  Therefore, it is safe to do things like:
        !            58: //    CHECK_EQ(fp->Write(x), 4)
        !            59: #define CHECK(condition) do {                           \
        !            60:   if (!(condition)) {                                   \
        !            61:     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
        !            62:             __FILE__, __LINE__, #condition);            \
        !            63:     exit(1);                                            \
        !            64:   }                                                     \
        !            65: } while (0)
        !            66: 
        !            67: #define CHECK_EQ(a, b)   CHECK(a == b)
        !            68: 
        !            69: static void Timing1(int num_iters) {
        !            70:   // Same pattern lots of times
        !            71:   RE pattern("ruby:\\d+");
        !            72:   StringPiece p("ruby:1234");
        !            73:   for (int j = num_iters; j > 0; j--) {
        !            74:     CHECK(pattern.FullMatch(p));
        !            75:   }
        !            76: }
        !            77: 
        !            78: static void Timing2(int num_iters) {
        !            79:   // Same pattern lots of times
        !            80:   RE pattern("ruby:(\\d+)");
        !            81:   int i;
        !            82:   for (int j = num_iters; j > 0; j--) {
        !            83:     CHECK(pattern.FullMatch("ruby:1234", &i));
        !            84:     CHECK_EQ(i, 1234);
        !            85:   }
        !            86: }
        !            87: 
        !            88: static void Timing3(int num_iters) {
        !            89:   string text_string;
        !            90:   for (int j = num_iters; j > 0; j--) {
        !            91:     text_string += "this is another line\n";
        !            92:   }
        !            93: 
        !            94:   RE line_matcher(".*\n");
        !            95:   string line;
        !            96:   StringPiece text(text_string);
        !            97:   int counter = 0;
        !            98:   while (line_matcher.Consume(&text)) {
        !            99:     counter++;
        !           100:   }
        !           101:   printf("Matched %d lines\n", counter);
        !           102: }
        !           103: 
        !           104: #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
        !           105: 
        !           106: static void LeakTest() {
        !           107:   // Check for memory leaks
        !           108:   unsigned long long initial_size = 0;
        !           109:   for (int i = 0; i < 100000; i++) {
        !           110:     if (i == 50000) {
        !           111:       initial_size = VirtualProcessSize();
        !           112:       printf("Size after 50000: %llu\n", initial_size);
        !           113:     }
        !           114:     char buf[100];  // definitely big enough
        !           115:     sprintf(buf, "pat%09d", i);
        !           116:     RE newre(buf);
        !           117:   }
        !           118:   uint64 final_size = VirtualProcessSize();
        !           119:   printf("Size after 100000: %llu\n", final_size);
        !           120:   const double growth = double(final_size - initial_size) / final_size;
        !           121:   printf("Growth: %0.2f%%", growth * 100);
        !           122:   CHECK(growth < 0.02);       // Allow < 2% growth
        !           123: }
        !           124: 
        !           125: #endif
        !           126: 
        !           127: static void RadixTests() {
        !           128:   printf("Testing hex\n");
        !           129: 
        !           130: #define CHECK_HEX(type, value) \
        !           131:   do { \
        !           132:     type v; \
        !           133:     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
        !           134:     CHECK_EQ(v, 0x ## value); \
        !           135:     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
        !           136:     CHECK_EQ(v, 0x ## value); \
        !           137:   } while(0)
        !           138: 
        !           139:   CHECK_HEX(short,              2bad);
        !           140:   CHECK_HEX(unsigned short,     2badU);
        !           141:   CHECK_HEX(int,                dead);
        !           142:   CHECK_HEX(unsigned int,       deadU);
        !           143:   CHECK_HEX(long,               7eadbeefL);
        !           144:   CHECK_HEX(unsigned long,      deadbeefUL);
        !           145: #ifdef HAVE_LONG_LONG
        !           146:   CHECK_HEX(long long,          12345678deadbeefLL);
        !           147: #endif
        !           148: #ifdef HAVE_UNSIGNED_LONG_LONG
        !           149:   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
        !           150: #endif
        !           151: 
        !           152: #undef CHECK_HEX
        !           153: 
        !           154:   printf("Testing octal\n");
        !           155: 
        !           156: #define CHECK_OCTAL(type, value) \
        !           157:   do { \
        !           158:     type v; \
        !           159:     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
        !           160:     CHECK_EQ(v, 0 ## value); \
        !           161:     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
        !           162:     CHECK_EQ(v, 0 ## value); \
        !           163:   } while(0)
        !           164: 
        !           165:   CHECK_OCTAL(short,              77777);
        !           166:   CHECK_OCTAL(unsigned short,     177777U);
        !           167:   CHECK_OCTAL(int,                17777777777);
        !           168:   CHECK_OCTAL(unsigned int,       37777777777U);
        !           169:   CHECK_OCTAL(long,               17777777777L);
        !           170:   CHECK_OCTAL(unsigned long,      37777777777UL);
        !           171: #ifdef HAVE_LONG_LONG
        !           172:   CHECK_OCTAL(long long,          777777777777777777777LL);
        !           173: #endif
        !           174: #ifdef HAVE_UNSIGNED_LONG_LONG
        !           175:   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
        !           176: #endif
        !           177: 
        !           178: #undef CHECK_OCTAL
        !           179: 
        !           180:   printf("Testing decimal\n");
        !           181: 
        !           182: #define CHECK_DECIMAL(type, value) \
        !           183:   do { \
        !           184:     type v; \
        !           185:     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
        !           186:     CHECK_EQ(v, value); \
        !           187:     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
        !           188:     CHECK_EQ(v, value); \
        !           189:   } while(0)
        !           190: 
        !           191:   CHECK_DECIMAL(short,              -1);
        !           192:   CHECK_DECIMAL(unsigned short,     9999);
        !           193:   CHECK_DECIMAL(int,                -1000);
        !           194:   CHECK_DECIMAL(unsigned int,       12345U);
        !           195:   CHECK_DECIMAL(long,               -10000000L);
        !           196:   CHECK_DECIMAL(unsigned long,      3083324652U);
        !           197: #ifdef HAVE_LONG_LONG
        !           198:   CHECK_DECIMAL(long long,          -100000000000000LL);
        !           199: #endif
        !           200: #ifdef HAVE_UNSIGNED_LONG_LONG
        !           201:   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
        !           202: #endif
        !           203: 
        !           204: #undef CHECK_DECIMAL
        !           205: 
        !           206: }
        !           207: 
        !           208: static void TestReplace() {
        !           209:   printf("Testing Replace\n");
        !           210: 
        !           211:   struct ReplaceTest {
        !           212:     const char *regexp;
        !           213:     const char *rewrite;
        !           214:     const char *original;
        !           215:     const char *single;
        !           216:     const char *global;
        !           217:     int global_count;         // the expected return value from ReplaceAll
        !           218:   };
        !           219:   static const ReplaceTest tests[] = {
        !           220:     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
        !           221:       "\\2\\1ay",
        !           222:       "the quick brown fox jumps over the lazy dogs.",
        !           223:       "ethay quick brown fox jumps over the lazy dogs.",
        !           224:       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
        !           225:       9 },
        !           226:     { "\\w+",
        !           227:       "\\0-NOSPAM",
        !           228:       "paul.haahr@google.com",
        !           229:       "paul-NOSPAM.haahr@google.com",
        !           230:       "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
        !           231:       4 },
        !           232:     { "^",
        !           233:       "(START)",
        !           234:       "foo",
        !           235:       "(START)foo",
        !           236:       "(START)foo",
        !           237:       1 },
        !           238:     { "^",
        !           239:       "(START)",
        !           240:       "",
        !           241:       "(START)",
        !           242:       "(START)",
        !           243:       1 },
        !           244:     { "$",
        !           245:       "(END)",
        !           246:       "",
        !           247:       "(END)",
        !           248:       "(END)",
        !           249:       1 },
        !           250:     { "b",
        !           251:       "bb",
        !           252:       "ababababab",
        !           253:       "abbabababab",
        !           254:       "abbabbabbabbabb",
        !           255:        5 },
        !           256:     { "b",
        !           257:       "bb",
        !           258:       "bbbbbb",
        !           259:       "bbbbbbb",
        !           260:       "bbbbbbbbbbbb",
        !           261:       6 },
        !           262:     { "b+",
        !           263:       "bb",
        !           264:       "bbbbbb",
        !           265:       "bb",
        !           266:       "bb",
        !           267:       1 },
        !           268:     { "b*",
        !           269:       "bb",
        !           270:       "bbbbbb",
        !           271:       "bb",
        !           272:       "bbbb",
        !           273:       2 },
        !           274:     { "b*",
        !           275:       "bb",
        !           276:       "aaaaa",
        !           277:       "bbaaaaa",
        !           278:       "bbabbabbabbabbabb",
        !           279:       6 },
        !           280:     { "b*",
        !           281:       "bb",
        !           282:       "aa\naa\n",
        !           283:       "bbaa\naa\n",
        !           284:       "bbabbabb\nbbabbabb\nbb",
        !           285:       7 },
        !           286:     { "b*",
        !           287:       "bb",
        !           288:       "aa\raa\r",
        !           289:       "bbaa\raa\r",
        !           290:       "bbabbabb\rbbabbabb\rbb",
        !           291:       7 },
        !           292:     { "b*",
        !           293:       "bb",
        !           294:       "aa\r\naa\r\n",
        !           295:       "bbaa\r\naa\r\n",
        !           296:       "bbabbabb\r\nbbabbabb\r\nbb",
        !           297:       7 },
        !           298:     // Check empty-string matching (it's tricky!)
        !           299:     { "aa|b*",
        !           300:       "@",
        !           301:       "aa",
        !           302:       "@",
        !           303:       "@@",
        !           304:       2 },
        !           305:     { "b*|aa",
        !           306:       "@",
        !           307:       "aa",
        !           308:       "@aa",
        !           309:       "@@@",
        !           310:       3 },
        !           311: #ifdef SUPPORT_UTF8
        !           312:     { "b*",
        !           313:       "bb",
        !           314:       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
        !           315:       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
        !           316:       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
        !           317:       5 },
        !           318:     { "b*",
        !           319:       "bb",
        !           320:       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
        !           321:       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
        !           322:       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
        !           323:        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
        !           324:       9 },
        !           325: #endif
        !           326:     { "", NULL, NULL, NULL, NULL, 0 }
        !           327:   };
        !           328: 
        !           329: #ifdef SUPPORT_UTF8
        !           330:   const bool support_utf8 = true;
        !           331: #else
        !           332:   const bool support_utf8 = false;
        !           333: #endif
        !           334: 
        !           335:   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
        !           336:     RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
        !           337:     assert(re.error().empty());
        !           338:     string one(t->original);
        !           339:     CHECK(re.Replace(t->rewrite, &one));
        !           340:     CHECK_EQ(one, t->single);
        !           341:     string all(t->original);
        !           342:     const int replace_count = re.GlobalReplace(t->rewrite, &all);
        !           343:     CHECK_EQ(all, t->global);
        !           344:     CHECK_EQ(replace_count, t->global_count);
        !           345:   }
        !           346: 
        !           347:   // One final test: test \r\n replacement when we're not in CRLF mode
        !           348:   {
        !           349:     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
        !           350:     assert(re.error().empty());
        !           351:     string all("aa\r\naa\r\n");
        !           352:     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
        !           353:     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
        !           354:   }
        !           355:   {
        !           356:     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
        !           357:     assert(re.error().empty());
        !           358:     string all("aa\r\naa\r\n");
        !           359:     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
        !           360:     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
        !           361:   }
        !           362:   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
        !           363:   //       Alas, the answer depends on how pcre was compiled.
        !           364: }
        !           365: 
        !           366: static void TestExtract() {
        !           367:   printf("Testing Extract\n");
        !           368: 
        !           369:   string s;
        !           370: 
        !           371:   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
        !           372:   CHECK_EQ(s, "kremvax!boris");
        !           373: 
        !           374:   // check the RE interface as well
        !           375:   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
        !           376:   CHECK_EQ(s, "'foo'");
        !           377:   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
        !           378:   CHECK_EQ(s, "'foo'");
        !           379: }
        !           380: 
        !           381: static void TestConsume() {
        !           382:   printf("Testing Consume\n");
        !           383: 
        !           384:   string word;
        !           385: 
        !           386:   string s("   aaa b!@#$@#$cccc");
        !           387:   StringPiece input(s);
        !           388: 
        !           389:   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
        !           390:   CHECK(r.Consume(&input, &word));
        !           391:   CHECK_EQ(word, "aaa");
        !           392:   CHECK(r.Consume(&input, &word));
        !           393:   CHECK_EQ(word, "b");
        !           394:   CHECK(! r.Consume(&input, &word));
        !           395: }
        !           396: 
        !           397: static void TestFindAndConsume() {
        !           398:   printf("Testing FindAndConsume\n");
        !           399: 
        !           400:   string word;
        !           401: 
        !           402:   string s("   aaa b!@#$@#$cccc");
        !           403:   StringPiece input(s);
        !           404: 
        !           405:   RE r("(\\w+)");      // matches a word
        !           406:   CHECK(r.FindAndConsume(&input, &word));
        !           407:   CHECK_EQ(word, "aaa");
        !           408:   CHECK(r.FindAndConsume(&input, &word));
        !           409:   CHECK_EQ(word, "b");
        !           410:   CHECK(r.FindAndConsume(&input, &word));
        !           411:   CHECK_EQ(word, "cccc");
        !           412:   CHECK(! r.FindAndConsume(&input, &word));
        !           413: }
        !           414: 
        !           415: static void TestMatchNumberPeculiarity() {
        !           416:   printf("Testing match-number peculiarity\n");
        !           417: 
        !           418:   string word1;
        !           419:   string word2;
        !           420:   string word3;
        !           421: 
        !           422:   RE r("(foo)|(bar)|(baz)");
        !           423:   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
        !           424:   CHECK_EQ(word1, "foo");
        !           425:   CHECK_EQ(word2, "");
        !           426:   CHECK_EQ(word3, "");
        !           427:   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
        !           428:   CHECK_EQ(word1, "");
        !           429:   CHECK_EQ(word2, "bar");
        !           430:   CHECK_EQ(word3, "");
        !           431:   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
        !           432:   CHECK_EQ(word1, "");
        !           433:   CHECK_EQ(word2, "");
        !           434:   CHECK_EQ(word3, "baz");
        !           435:   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
        !           436: 
        !           437:   string a;
        !           438:   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
        !           439:   CHECK_EQ(a, "");
        !           440: }
        !           441: 
        !           442: static void TestRecursion() {
        !           443:   printf("Testing recursion\n");
        !           444: 
        !           445:   // Get one string that passes (sometimes), one that never does.
        !           446:   string text_good("abcdefghijk");
        !           447:   string text_bad("acdefghijkl");
        !           448: 
        !           449:   // According to pcretest, matching text_good against (\w+)*b
        !           450:   // requires match_limit of at least 8192, and match_recursion_limit
        !           451:   // of at least 37.
        !           452: 
        !           453:   RE_Options options_ml;
        !           454:   options_ml.set_match_limit(8192);
        !           455:   RE re("(\\w+)*b", options_ml);
        !           456:   CHECK(re.PartialMatch(text_good) == true);
        !           457:   CHECK(re.PartialMatch(text_bad) == false);
        !           458:   CHECK(re.FullMatch(text_good) == false);
        !           459:   CHECK(re.FullMatch(text_bad) == false);
        !           460: 
        !           461:   options_ml.set_match_limit(1024);
        !           462:   RE re2("(\\w+)*b", options_ml);
        !           463:   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
        !           464:   CHECK(re2.PartialMatch(text_bad) == false);
        !           465:   CHECK(re2.FullMatch(text_good) == false);
        !           466:   CHECK(re2.FullMatch(text_bad) == false);
        !           467: 
        !           468:   RE_Options options_mlr;
        !           469:   options_mlr.set_match_limit_recursion(50);
        !           470:   RE re3("(\\w+)*b", options_mlr);
        !           471:   CHECK(re3.PartialMatch(text_good) == true);
        !           472:   CHECK(re3.PartialMatch(text_bad) == false);
        !           473:   CHECK(re3.FullMatch(text_good) == false);
        !           474:   CHECK(re3.FullMatch(text_bad) == false);
        !           475: 
        !           476:   options_mlr.set_match_limit_recursion(10);
        !           477:   RE re4("(\\w+)*b", options_mlr);
        !           478:   CHECK(re4.PartialMatch(text_good) == false);
        !           479:   CHECK(re4.PartialMatch(text_bad) == false);
        !           480:   CHECK(re4.FullMatch(text_good) == false);
        !           481:   CHECK(re4.FullMatch(text_bad) == false);
        !           482: }
        !           483: 
        !           484: // A meta-quoted string, interpreted as a pattern, should always match
        !           485: // the original unquoted string.
        !           486: static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
        !           487:   string quoted = RE::QuoteMeta(unquoted);
        !           488:   RE re(quoted, options);
        !           489:   CHECK(re.FullMatch(unquoted));
        !           490: }
        !           491: 
        !           492: // A string containing meaningful regexp characters, which is then meta-
        !           493: // quoted, should not generally match a string the unquoted string does.
        !           494: static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
        !           495:                                   RE_Options options = RE_Options()) {
        !           496:   string quoted = RE::QuoteMeta(unquoted);
        !           497:   RE re(quoted, options);
        !           498:   CHECK(!re.FullMatch(should_not_match));
        !           499: }
        !           500: 
        !           501: // Tests that quoted meta characters match their original strings,
        !           502: // and that a few things that shouldn't match indeed do not.
        !           503: static void TestQuotaMetaSimple() {
        !           504:   TestQuoteMeta("foo");
        !           505:   TestQuoteMeta("foo.bar");
        !           506:   TestQuoteMeta("foo\\.bar");
        !           507:   TestQuoteMeta("[1-9]");
        !           508:   TestQuoteMeta("1.5-2.0?");
        !           509:   TestQuoteMeta("\\d");
        !           510:   TestQuoteMeta("Who doesn't like ice cream?");
        !           511:   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
        !           512:   TestQuoteMeta("((?!)xxx).*yyy");
        !           513:   TestQuoteMeta("([");
        !           514:   TestQuoteMeta(string("foo\0bar", 7));
        !           515: }
        !           516: 
        !           517: static void TestQuoteMetaSimpleNegative() {
        !           518:   NegativeTestQuoteMeta("foo", "bar");
        !           519:   NegativeTestQuoteMeta("...", "bar");
        !           520:   NegativeTestQuoteMeta("\\.", ".");
        !           521:   NegativeTestQuoteMeta("\\.", "..");
        !           522:   NegativeTestQuoteMeta("(a)", "a");
        !           523:   NegativeTestQuoteMeta("(a|b)", "a");
        !           524:   NegativeTestQuoteMeta("(a|b)", "(a)");
        !           525:   NegativeTestQuoteMeta("(a|b)", "a|b");
        !           526:   NegativeTestQuoteMeta("[0-9]", "0");
        !           527:   NegativeTestQuoteMeta("[0-9]", "0-9");
        !           528:   NegativeTestQuoteMeta("[0-9]", "[9]");
        !           529:   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
        !           530: }
        !           531: 
        !           532: static void TestQuoteMetaLatin1() {
        !           533:   TestQuoteMeta("3\xb2 = 9");
        !           534: }
        !           535: 
        !           536: static void TestQuoteMetaUtf8() {
        !           537: #ifdef SUPPORT_UTF8
        !           538:   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
        !           539:   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
        !           540:   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
        !           541:   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
        !           542:   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
        !           543:   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
        !           544:   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
        !           545:   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
        !           546:                         "27\\\xc2\\\xb0",
        !           547:                         pcrecpp::UTF8());
        !           548: #endif
        !           549: }
        !           550: 
        !           551: static void TestQuoteMetaAll() {
        !           552:   printf("Testing QuoteMeta\n");
        !           553:   TestQuotaMetaSimple();
        !           554:   TestQuoteMetaSimpleNegative();
        !           555:   TestQuoteMetaLatin1();
        !           556:   TestQuoteMetaUtf8();
        !           557: }
        !           558: 
        !           559: //
        !           560: // Options tests contributed by
        !           561: // Giuseppe Maxia, CTO, Stardata s.r.l.
        !           562: // July 2005
        !           563: //
        !           564: static void GetOneOptionResult(
        !           565:                 const char *option_name,
        !           566:                 const char *regex,
        !           567:                 const char *str,
        !           568:                 RE_Options options,
        !           569:                 bool full,
        !           570:                 string expected) {
        !           571: 
        !           572:   printf("Testing Option <%s>\n", option_name);
        !           573:   if(VERBOSE_TEST)
        !           574:     printf("/%s/ finds \"%s\" within \"%s\" \n",
        !           575:                     regex,
        !           576:                     expected.c_str(),
        !           577:                     str);
        !           578:   string captured("");
        !           579:   if (full)
        !           580:     RE(regex,options).FullMatch(str, &captured);
        !           581:   else
        !           582:     RE(regex,options).PartialMatch(str, &captured);
        !           583:   CHECK_EQ(captured, expected);
        !           584: }
        !           585: 
        !           586: static void TestOneOption(
        !           587:                 const char *option_name,
        !           588:                 const char *regex,
        !           589:                 const char *str,
        !           590:                 RE_Options options,
        !           591:                 bool full,
        !           592:                 bool assertive = true) {
        !           593: 
        !           594:   printf("Testing Option <%s>\n", option_name);
        !           595:   if (VERBOSE_TEST)
        !           596:     printf("'%s' %s /%s/ \n",
        !           597:                   str,
        !           598:                   (assertive? "matches" : "doesn't match"),
        !           599:                   regex);
        !           600:   if (assertive) {
        !           601:     if (full)
        !           602:       CHECK(RE(regex,options).FullMatch(str));
        !           603:     else
        !           604:       CHECK(RE(regex,options).PartialMatch(str));
        !           605:   } else {
        !           606:     if (full)
        !           607:       CHECK(!RE(regex,options).FullMatch(str));
        !           608:     else
        !           609:       CHECK(!RE(regex,options).PartialMatch(str));
        !           610:   }
        !           611: }
        !           612: 
        !           613: static void Test_CASELESS() {
        !           614:   RE_Options options;
        !           615:   RE_Options options2;
        !           616: 
        !           617:   options.set_caseless(true);
        !           618:   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
        !           619:   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
        !           620:   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
        !           621: 
        !           622:   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
        !           623:   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
        !           624:   options.set_caseless(false);
        !           625:   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
        !           626: }
        !           627: 
        !           628: static void Test_MULTILINE() {
        !           629:   RE_Options options;
        !           630:   RE_Options options2;
        !           631:   const char *str = "HELLO\n" "cruel\n" "world\n";
        !           632: 
        !           633:   options.set_multiline(true);
        !           634:   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
        !           635:   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
        !           636:   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
        !           637:   options.set_multiline(false);
        !           638:   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
        !           639: }
        !           640: 
        !           641: static void Test_DOTALL() {
        !           642:   RE_Options options;
        !           643:   RE_Options options2;
        !           644:   const char *str = "HELLO\n" "cruel\n" "world";
        !           645: 
        !           646:   options.set_dotall(true);
        !           647:   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
        !           648:   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
        !           649:   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
        !           650:   options.set_dotall(false);
        !           651:   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
        !           652: }
        !           653: 
        !           654: static void Test_DOLLAR_ENDONLY() {
        !           655:   RE_Options options;
        !           656:   RE_Options options2;
        !           657:   const char *str = "HELLO world\n";
        !           658: 
        !           659:   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
        !           660:   options.set_dollar_endonly(true);
        !           661:   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
        !           662:   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
        !           663: }
        !           664: 
        !           665: static void Test_EXTRA() {
        !           666:   RE_Options options;
        !           667:   const char *str = "HELLO";
        !           668: 
        !           669:   options.set_extra(true);
        !           670:   TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
        !           671:   TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
        !           672:   options.set_extra(false);
        !           673:   TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
        !           674: }
        !           675: 
        !           676: static void Test_EXTENDED() {
        !           677:   RE_Options options;
        !           678:   RE_Options options2;
        !           679:   const char *str = "HELLO world";
        !           680: 
        !           681:   options.set_extended(true);
        !           682:   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
        !           683:   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
        !           684:   TestOneOption("EXTENDED (class)",
        !           685:                     "^ HE L{2} O "
        !           686:                     "\\s+        "
        !           687:                     "\\w+ $      ",
        !           688:                     str,
        !           689:                     options,
        !           690:                     false);
        !           691: 
        !           692:   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
        !           693:   TestOneOption("EXTENDED (function)",
        !           694:                     "^ HE L{2} O "
        !           695:                     "\\s+        "
        !           696:                     "\\w+ $      ",
        !           697:                     str,
        !           698:                     pcrecpp::EXTENDED(),
        !           699:                     false);
        !           700: 
        !           701:   options.set_extended(false);
        !           702:   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
        !           703: }
        !           704: 
        !           705: static void Test_NO_AUTO_CAPTURE() {
        !           706:   RE_Options options;
        !           707:   const char *str = "HELLO world";
        !           708:   string captured;
        !           709: 
        !           710:   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
        !           711:   if (VERBOSE_TEST)
        !           712:     printf("parentheses capture text\n");
        !           713:   RE re("(world|universe)$", options);
        !           714:   CHECK(re.Extract("\\1", str , &captured));
        !           715:   CHECK_EQ(captured, "world");
        !           716:   options.set_no_auto_capture(true);
        !           717:   printf("testing Option <NO_AUTO_CAPTURE>\n");
        !           718:   if (VERBOSE_TEST)
        !           719:     printf("parentheses do not capture text\n");
        !           720:   re.Extract("\\1",str, &captured );
        !           721:   CHECK_EQ(captured, "world");
        !           722: }
        !           723: 
        !           724: static void Test_UNGREEDY() {
        !           725:   RE_Options options;
        !           726:   const char *str = "HELLO, 'this' is the 'world'";
        !           727: 
        !           728:   options.set_ungreedy(true);
        !           729:   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
        !           730:   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
        !           731:   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
        !           732: 
        !           733:   options.set_ungreedy(false);
        !           734:   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
        !           735:   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
        !           736: }
        !           737: 
        !           738: static void Test_all_options() {
        !           739:   const char *str = "HELLO\n" "cruel\n" "world";
        !           740:   RE_Options options;
        !           741:   options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
        !           742: 
        !           743:   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
        !           744:   options.set_all_options(0);
        !           745:   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
        !           746:   options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
        !           747: 
        !           748:   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
        !           749:   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
        !           750:                   " ^ c r u e l $ ",
        !           751:                   str,
        !           752:                   RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
        !           753:                   false);
        !           754: 
        !           755:   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
        !           756:                   " ^ c r u e l $ ",
        !           757:                   str,
        !           758:                   RE_Options()
        !           759:                        .set_multiline(true)
        !           760:                        .set_extended(true),
        !           761:                   false);
        !           762: 
        !           763:   options.set_all_options(0);
        !           764:   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
        !           765: 
        !           766: }
        !           767: 
        !           768: static void TestOptions() {
        !           769:   printf("Testing Options\n");
        !           770:   Test_CASELESS();
        !           771:   Test_MULTILINE();
        !           772:   Test_DOTALL();
        !           773:   Test_DOLLAR_ENDONLY();
        !           774:   Test_EXTENDED();
        !           775:   Test_NO_AUTO_CAPTURE();
        !           776:   Test_UNGREEDY();
        !           777:   Test_EXTRA();
        !           778:   Test_all_options();
        !           779: }
        !           780: 
        !           781: static void TestConstructors() {
        !           782:   printf("Testing constructors\n");
        !           783: 
        !           784:   RE_Options options;
        !           785:   options.set_dotall(true);
        !           786:   const char *str = "HELLO\n" "cruel\n" "world";
        !           787: 
        !           788:   RE orig("HELLO.*world", options);
        !           789:   CHECK(orig.FullMatch(str));
        !           790: 
        !           791:   RE copy1(orig);
        !           792:   CHECK(copy1.FullMatch(str));
        !           793: 
        !           794:   RE copy2("not a match");
        !           795:   CHECK(!copy2.FullMatch(str));
        !           796:   copy2 = copy1;
        !           797:   CHECK(copy2.FullMatch(str));
        !           798:   copy2 = orig;
        !           799:   CHECK(copy2.FullMatch(str));
        !           800: 
        !           801:   // Make sure when we assign to ourselves, nothing bad happens
        !           802:   orig = orig;
        !           803:   copy1 = copy1;
        !           804:   copy2 = copy2;
        !           805:   CHECK(orig.FullMatch(str));
        !           806:   CHECK(copy1.FullMatch(str));
        !           807:   CHECK(copy2.FullMatch(str));
        !           808: }
        !           809: 
        !           810: int main(int argc, char** argv) {
        !           811:   // Treat any flag as --help
        !           812:   if (argc > 1 && argv[1][0] == '-') {
        !           813:     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
        !           814:            "       If 'timingX ###' is specified, run the given timing test\n"
        !           815:            "       with the given number of iterations, rather than running\n"
        !           816:            "       the default corectness test.\n", argv[0]);
        !           817:     return 0;
        !           818:   }
        !           819: 
        !           820:   if (argc > 1) {
        !           821:     if ( argc == 2 || atoi(argv[2]) == 0) {
        !           822:       printf("timing mode needs a num-iters argument\n");
        !           823:       return 1;
        !           824:     }
        !           825:     if (!strcmp(argv[1], "timing1"))
        !           826:       Timing1(atoi(argv[2]));
        !           827:     else if (!strcmp(argv[1], "timing2"))
        !           828:       Timing2(atoi(argv[2]));
        !           829:     else if (!strcmp(argv[1], "timing3"))
        !           830:       Timing3(atoi(argv[2]));
        !           831:     else
        !           832:       printf("Unknown argument '%s'\n", argv[1]);
        !           833:     return 0;
        !           834:   }
        !           835: 
        !           836:   printf("PCRE C++ wrapper tests\n");
        !           837:   printf("Testing FullMatch\n");
        !           838: 
        !           839:   int i;
        !           840:   string s;
        !           841: 
        !           842:   /***** FullMatch with no args *****/
        !           843: 
        !           844:   CHECK(RE("h.*o").FullMatch("hello"));
        !           845:   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
        !           846:   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
        !           847:   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
        !           848:   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
        !           849:   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
        !           850: 
        !           851:   /***** FullMatch with args *****/
        !           852: 
        !           853:   // Zero-arg
        !           854:   CHECK(RE("\\d+").FullMatch("1001"));
        !           855: 
        !           856:   // Single-arg
        !           857:   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
        !           858:   CHECK_EQ(i, 1001);
        !           859:   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
        !           860:   CHECK_EQ(i, -123);
        !           861:   CHECK(!RE("()\\d+").FullMatch("10", &i));
        !           862:   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
        !           863:                                 &i));
        !           864: 
        !           865:   // Digits surrounding integer-arg
        !           866:   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
        !           867:   CHECK_EQ(i, 23);
        !           868:   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
        !           869:   CHECK_EQ(i, 1);
        !           870:   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
        !           871:   CHECK_EQ(i, -1);
        !           872:   CHECK(RE("(\\d)").PartialMatch("1234", &i));
        !           873:   CHECK_EQ(i, 1);
        !           874:   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
        !           875:   CHECK_EQ(i, -1);
        !           876: 
        !           877:   // String-arg
        !           878:   CHECK(RE("h(.*)o").FullMatch("hello", &s));
        !           879:   CHECK_EQ(s, string("ell"));
        !           880: 
        !           881:   // StringPiece-arg
        !           882:   StringPiece sp;
        !           883:   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
        !           884:   CHECK_EQ(sp.size(), 4);
        !           885:   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
        !           886:   CHECK_EQ(i, 1234);
        !           887: 
        !           888:   // Multi-arg
        !           889:   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
        !           890:   CHECK_EQ(s, string("ruby"));
        !           891:   CHECK_EQ(i, 1234);
        !           892: 
        !           893:   // Ignore non-void* NULL arg
        !           894:   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
        !           895:   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
        !           896:   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
        !           897:   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
        !           898: #ifdef HAVE_LONG_LONG
        !           899:   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
        !           900: #endif
        !           901:   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
        !           902:   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
        !           903: 
        !           904:   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
        !           905:   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
        !           906:   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
        !           907:   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
        !           908:   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
        !           909:   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
        !           910: 
        !           911:   // Ignored arg
        !           912:   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
        !           913:   CHECK_EQ(s, string("ruby"));
        !           914:   CHECK_EQ(i, 1234);
        !           915: 
        !           916:   // Type tests
        !           917:   {
        !           918:     char c;
        !           919:     CHECK(RE("(H)ello").FullMatch("Hello", &c));
        !           920:     CHECK_EQ(c, 'H');
        !           921:   }
        !           922:   {
        !           923:     unsigned char c;
        !           924:     CHECK(RE("(H)ello").FullMatch("Hello", &c));
        !           925:     CHECK_EQ(c, static_cast<unsigned char>('H'));
        !           926:   }
        !           927:   {
        !           928:     short v;
        !           929:     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
        !           930:     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
        !           931:     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
        !           932:     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
        !           933:     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
        !           934:     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
        !           935:   }
        !           936:   {
        !           937:     unsigned short v;
        !           938:     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
        !           939:     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
        !           940:     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
        !           941:     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
        !           942:   }
        !           943:   {
        !           944:     int v;
        !           945:     static const int max_value = 0x7fffffff;
        !           946:     static const int min_value = -max_value - 1;
        !           947:     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
        !           948:     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
        !           949:     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
        !           950:     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
        !           951:     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
        !           952:     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
        !           953:   }
        !           954:   {
        !           955:     unsigned int v;
        !           956:     static const unsigned int max_value = 0xfffffffful;
        !           957:     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
        !           958:     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
        !           959:     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
        !           960:   }
        !           961: #ifdef HAVE_LONG_LONG
        !           962: # if defined(__MINGW__) || defined(__MINGW32__)
        !           963: #   define LLD "%I64d"
        !           964: #   define LLU "%I64u"
        !           965: # else
        !           966: #   define LLD "%lld"
        !           967: #   define LLU "%llu"
        !           968: # endif
        !           969:   {
        !           970:     long long v;
        !           971:     static const long long max_value = 0x7fffffffffffffffLL;
        !           972:     static const long long min_value = -max_value - 1;
        !           973:     char buf[32];  // definitely big enough for a long long
        !           974: 
        !           975:     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
        !           976:     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
        !           977: 
        !           978:     sprintf(buf, LLD, max_value);
        !           979:     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
        !           980: 
        !           981:     sprintf(buf, LLD, min_value);
        !           982:     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
        !           983: 
        !           984:     sprintf(buf, LLD, max_value);
        !           985:     assert(buf[strlen(buf)-1] != '9');
        !           986:     buf[strlen(buf)-1]++;
        !           987:     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
        !           988: 
        !           989:     sprintf(buf, LLD, min_value);
        !           990:     assert(buf[strlen(buf)-1] != '9');
        !           991:     buf[strlen(buf)-1]++;
        !           992:     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
        !           993:   }
        !           994: #endif
        !           995: #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
        !           996:   {
        !           997:     unsigned long long v;
        !           998:     long long v2;
        !           999:     static const unsigned long long max_value = 0xffffffffffffffffULL;
        !          1000:     char buf[32];  // definitely big enough for a unsigned long long
        !          1001: 
        !          1002:     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
        !          1003:     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
        !          1004: 
        !          1005:     sprintf(buf, LLU, max_value);
        !          1006:     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
        !          1007: 
        !          1008:     assert(buf[strlen(buf)-1] != '9');
        !          1009:     buf[strlen(buf)-1]++;
        !          1010:     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
        !          1011:   }
        !          1012: #endif
        !          1013:   {
        !          1014:     float v;
        !          1015:     CHECK(RE("(.*)").FullMatch("100", &v));
        !          1016:     CHECK(RE("(.*)").FullMatch("-100.", &v));
        !          1017:     CHECK(RE("(.*)").FullMatch("1e23", &v));
        !          1018:   }
        !          1019:   {
        !          1020:     double v;
        !          1021:     CHECK(RE("(.*)").FullMatch("100", &v));
        !          1022:     CHECK(RE("(.*)").FullMatch("-100.", &v));
        !          1023:     CHECK(RE("(.*)").FullMatch("1e23", &v));
        !          1024:   }
        !          1025: 
        !          1026:   // Check that matching is fully anchored
        !          1027:   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
        !          1028:   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
        !          1029:   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
        !          1030:   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
        !          1031: 
        !          1032:   // Braces
        !          1033:   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
        !          1034:   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
        !          1035:   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
        !          1036: 
        !          1037:   // Complicated RE
        !          1038:   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
        !          1039:   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
        !          1040:   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
        !          1041:   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
        !          1042: 
        !          1043:   // Check full-match handling (needs '$' tacked on internally)
        !          1044:   CHECK(RE("fo|foo").FullMatch("fo"));
        !          1045:   CHECK(RE("fo|foo").FullMatch("foo"));
        !          1046:   CHECK(RE("fo|foo$").FullMatch("fo"));
        !          1047:   CHECK(RE("fo|foo$").FullMatch("foo"));
        !          1048:   CHECK(RE("foo$").FullMatch("foo"));
        !          1049:   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
        !          1050:   CHECK(!RE("fo|bar").FullMatch("fox"));
        !          1051: 
        !          1052:   // Uncomment the following if we change the handling of '$' to
        !          1053:   // prevent it from matching a trailing newline
        !          1054:   if (false) {
        !          1055:     // Check that we don't get bitten by pcre's special handling of a
        !          1056:     // '\n' at the end of the string matching '$'
        !          1057:     CHECK(!RE("foo$").PartialMatch("foo\n"));
        !          1058:   }
        !          1059: 
        !          1060:   // Number of args
        !          1061:   int a[16];
        !          1062:   CHECK(RE("").FullMatch(""));
        !          1063: 
        !          1064:   memset(a, 0, sizeof(0));
        !          1065:   CHECK(RE("(\\d){1}").FullMatch("1",
        !          1066:                                  &a[0]));
        !          1067:   CHECK_EQ(a[0], 1);
        !          1068: 
        !          1069:   memset(a, 0, sizeof(0));
        !          1070:   CHECK(RE("(\\d)(\\d)").FullMatch("12",
        !          1071:                                    &a[0],  &a[1]));
        !          1072:   CHECK_EQ(a[0], 1);
        !          1073:   CHECK_EQ(a[1], 2);
        !          1074: 
        !          1075:   memset(a, 0, sizeof(0));
        !          1076:   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
        !          1077:                                         &a[0],  &a[1],  &a[2]));
        !          1078:   CHECK_EQ(a[0], 1);
        !          1079:   CHECK_EQ(a[1], 2);
        !          1080:   CHECK_EQ(a[2], 3);
        !          1081: 
        !          1082:   memset(a, 0, sizeof(0));
        !          1083:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
        !          1084:                                              &a[0],  &a[1],  &a[2],  &a[3]));
        !          1085:   CHECK_EQ(a[0], 1);
        !          1086:   CHECK_EQ(a[1], 2);
        !          1087:   CHECK_EQ(a[2], 3);
        !          1088:   CHECK_EQ(a[3], 4);
        !          1089: 
        !          1090:   memset(a, 0, sizeof(0));
        !          1091:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
        !          1092:                                                   &a[0],  &a[1],  &a[2],
        !          1093:                                                   &a[3],  &a[4]));
        !          1094:   CHECK_EQ(a[0], 1);
        !          1095:   CHECK_EQ(a[1], 2);
        !          1096:   CHECK_EQ(a[2], 3);
        !          1097:   CHECK_EQ(a[3], 4);
        !          1098:   CHECK_EQ(a[4], 5);
        !          1099: 
        !          1100:   memset(a, 0, sizeof(0));
        !          1101:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
        !          1102:                                                        &a[0],  &a[1],  &a[2],
        !          1103:                                                        &a[3],  &a[4],  &a[5]));
        !          1104:   CHECK_EQ(a[0], 1);
        !          1105:   CHECK_EQ(a[1], 2);
        !          1106:   CHECK_EQ(a[2], 3);
        !          1107:   CHECK_EQ(a[3], 4);
        !          1108:   CHECK_EQ(a[4], 5);
        !          1109:   CHECK_EQ(a[5], 6);
        !          1110: 
        !          1111:   memset(a, 0, sizeof(0));
        !          1112:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
        !          1113:                                                             &a[0],  &a[1],  &a[2],  &a[3],
        !          1114:                                                             &a[4],  &a[5],  &a[6]));
        !          1115:   CHECK_EQ(a[0], 1);
        !          1116:   CHECK_EQ(a[1], 2);
        !          1117:   CHECK_EQ(a[2], 3);
        !          1118:   CHECK_EQ(a[3], 4);
        !          1119:   CHECK_EQ(a[4], 5);
        !          1120:   CHECK_EQ(a[5], 6);
        !          1121:   CHECK_EQ(a[6], 7);
        !          1122: 
        !          1123:   memset(a, 0, sizeof(0));
        !          1124:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
        !          1125:            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
        !          1126:                "1234567890123456",
        !          1127:                &a[0],  &a[1],  &a[2],  &a[3],
        !          1128:                &a[4],  &a[5],  &a[6],  &a[7],
        !          1129:                &a[8],  &a[9],  &a[10], &a[11],
        !          1130:                &a[12], &a[13], &a[14], &a[15]));
        !          1131:   CHECK_EQ(a[0], 1);
        !          1132:   CHECK_EQ(a[1], 2);
        !          1133:   CHECK_EQ(a[2], 3);
        !          1134:   CHECK_EQ(a[3], 4);
        !          1135:   CHECK_EQ(a[4], 5);
        !          1136:   CHECK_EQ(a[5], 6);
        !          1137:   CHECK_EQ(a[6], 7);
        !          1138:   CHECK_EQ(a[7], 8);
        !          1139:   CHECK_EQ(a[8], 9);
        !          1140:   CHECK_EQ(a[9], 0);
        !          1141:   CHECK_EQ(a[10], 1);
        !          1142:   CHECK_EQ(a[11], 2);
        !          1143:   CHECK_EQ(a[12], 3);
        !          1144:   CHECK_EQ(a[13], 4);
        !          1145:   CHECK_EQ(a[14], 5);
        !          1146:   CHECK_EQ(a[15], 6);
        !          1147: 
        !          1148:   /***** PartialMatch *****/
        !          1149: 
        !          1150:   printf("Testing PartialMatch\n");
        !          1151: 
        !          1152:   CHECK(RE("h.*o").PartialMatch("hello"));
        !          1153:   CHECK(RE("h.*o").PartialMatch("othello"));
        !          1154:   CHECK(RE("h.*o").PartialMatch("hello!"));
        !          1155:   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
        !          1156: 
        !          1157:   /***** other tests *****/
        !          1158: 
        !          1159:   RadixTests();
        !          1160:   TestReplace();
        !          1161:   TestExtract();
        !          1162:   TestConsume();
        !          1163:   TestFindAndConsume();
        !          1164:   TestQuoteMetaAll();
        !          1165:   TestMatchNumberPeculiarity();
        !          1166: 
        !          1167:   // Check the pattern() accessor
        !          1168:   {
        !          1169:     const string kPattern = "http://([^/]+)/.*";
        !          1170:     const RE re(kPattern);
        !          1171:     CHECK_EQ(kPattern, re.pattern());
        !          1172:   }
        !          1173: 
        !          1174:   // Check RE error field.
        !          1175:   {
        !          1176:     RE re("foo");
        !          1177:     CHECK(re.error().empty());  // Must have no error
        !          1178:   }
        !          1179: 
        !          1180: #ifdef SUPPORT_UTF8
        !          1181:   // Check UTF-8 handling
        !          1182:   {
        !          1183:     printf("Testing UTF-8 handling\n");
        !          1184: 
        !          1185:     // Three Japanese characters (nihongo)
        !          1186:     const unsigned char utf8_string[] = {
        !          1187:          0xe6, 0x97, 0xa5, // 65e5
        !          1188:          0xe6, 0x9c, 0xac, // 627c
        !          1189:          0xe8, 0xaa, 0x9e, // 8a9e
        !          1190:          0
        !          1191:     };
        !          1192:     const unsigned char utf8_pattern[] = {
        !          1193:          '.',
        !          1194:          0xe6, 0x9c, 0xac, // 627c
        !          1195:          '.',
        !          1196:          0
        !          1197:     };
        !          1198: 
        !          1199:     // Both should match in either mode, bytes or UTF-8
        !          1200:     RE re_test1(".........");
        !          1201:     CHECK(re_test1.FullMatch(utf8_string));
        !          1202:     RE re_test2("...", pcrecpp::UTF8());
        !          1203:     CHECK(re_test2.FullMatch(utf8_string));
        !          1204: 
        !          1205:     // Check that '.' matches one byte or UTF-8 character
        !          1206:     // according to the mode.
        !          1207:     string ss;
        !          1208:     RE re_test3("(.)");
        !          1209:     CHECK(re_test3.PartialMatch(utf8_string, &ss));
        !          1210:     CHECK_EQ(ss, string("\xe6"));
        !          1211:     RE re_test4("(.)", pcrecpp::UTF8());
        !          1212:     CHECK(re_test4.PartialMatch(utf8_string, &ss));
        !          1213:     CHECK_EQ(ss, string("\xe6\x97\xa5"));
        !          1214: 
        !          1215:     // Check that string matches itself in either mode
        !          1216:     RE re_test5(utf8_string);
        !          1217:     CHECK(re_test5.FullMatch(utf8_string));
        !          1218:     RE re_test6(utf8_string, pcrecpp::UTF8());
        !          1219:     CHECK(re_test6.FullMatch(utf8_string));
        !          1220: 
        !          1221:     // Check that pattern matches string only in UTF8 mode
        !          1222:     RE re_test7(utf8_pattern);
        !          1223:     CHECK(!re_test7.FullMatch(utf8_string));
        !          1224:     RE re_test8(utf8_pattern, pcrecpp::UTF8());
        !          1225:     CHECK(re_test8.FullMatch(utf8_string));
        !          1226:   }
        !          1227: 
        !          1228:   // Check that ungreedy, UTF8 regular expressions don't match when they
        !          1229:   // oughtn't -- see bug 82246.
        !          1230:   {
        !          1231:     // This code always worked.
        !          1232:     const char* pattern = "\\w+X";
        !          1233:     const string target = "a aX";
        !          1234:     RE match_sentence(pattern);
        !          1235:     RE match_sentence_re(pattern, pcrecpp::UTF8());
        !          1236: 
        !          1237:     CHECK(!match_sentence.FullMatch(target));
        !          1238:     CHECK(!match_sentence_re.FullMatch(target));
        !          1239:   }
        !          1240: 
        !          1241:   {
        !          1242:     const char* pattern = "(?U)\\w+X";
        !          1243:     const string target = "a aX";
        !          1244:     RE match_sentence(pattern);
        !          1245:     RE match_sentence_re(pattern, pcrecpp::UTF8());
        !          1246: 
        !          1247:     CHECK(!match_sentence.FullMatch(target));
        !          1248:     CHECK(!match_sentence_re.FullMatch(target));
        !          1249:   }
        !          1250: #endif  /* def SUPPORT_UTF8 */
        !          1251: 
        !          1252:   printf("Testing error reporting\n");
        !          1253: 
        !          1254:   { RE re("a\\1"); CHECK(!re.error().empty()); }
        !          1255:   {
        !          1256:     RE re("a[x");
        !          1257:     CHECK(!re.error().empty());
        !          1258:   }
        !          1259:   {
        !          1260:     RE re("a[z-a]");
        !          1261:     CHECK(!re.error().empty());
        !          1262:   }
        !          1263:   {
        !          1264:     RE re("a[[:foobar:]]");
        !          1265:     CHECK(!re.error().empty());
        !          1266:   }
        !          1267:   {
        !          1268:     RE re("a(b");
        !          1269:     CHECK(!re.error().empty());
        !          1270:   }
        !          1271:   {
        !          1272:     RE re("a\\");
        !          1273:     CHECK(!re.error().empty());
        !          1274:   }
        !          1275: 
        !          1276:   // Test that recursion is stopped
        !          1277:   TestRecursion();
        !          1278: 
        !          1279:   // Test Options
        !          1280:   if (getenv("VERBOSE_TEST") != NULL)
        !          1281:     VERBOSE_TEST  = true;
        !          1282:   TestOptions();
        !          1283: 
        !          1284:   // Test the constructors
        !          1285:   TestConstructors();
        !          1286: 
        !          1287:   // Done
        !          1288:   printf("OK\n");
        !          1289: 
        !          1290:   return 0;
        !          1291: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>