File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcrecpp_unittest.cc
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 08:25:55 2013 UTC (10 years, 10 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, v8_33, v8_31, v8_30, v8_21, HEAD
8.33

    1: // -*- coding: utf-8 -*-
    2: //
    3: // Copyright (c) 2005 - 2010, Google Inc.
    4: // All rights reserved.
    5: //
    6: // Redistribution and use in source and binary forms, with or without
    7: // modification, are permitted provided that the following conditions are
    8: // met:
    9: //
   10: //     * Redistributions of source code must retain the above copyright
   11: // notice, this list of conditions and the following disclaimer.
   12: //     * Redistributions in binary form must reproduce the above
   13: // copyright notice, this list of conditions and the following disclaimer
   14: // in the documentation and/or other materials provided with the
   15: // distribution.
   16: //     * Neither the name of Google Inc. nor the names of its
   17: // contributors may be used to endorse or promote products derived from
   18: // this software without specific prior written permission.
   19: //
   20: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   21: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   22: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   23: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   24: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   25: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   26: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   27: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   28: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   29: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   30: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   31: //
   32: // Author: Sanjay Ghemawat
   33: //
   34: // TODO: Test extractions for PartialMatch/Consume
   35: 
   36: #ifdef HAVE_CONFIG_H
   37: #include "config.h"
   38: #endif
   39: 
   40: #include <stdio.h>
   41: #include <string.h>      /* for memset and strcmp */
   42: #include <cassert>
   43: #include <vector>
   44: #include "pcrecpp.h"
   45: 
   46: using pcrecpp::StringPiece;
   47: using pcrecpp::RE;
   48: using pcrecpp::RE_Options;
   49: using pcrecpp::Hex;
   50: using pcrecpp::Octal;
   51: using pcrecpp::CRadix;
   52: 
   53: static bool VERBOSE_TEST  = false;
   54: 
   55: // CHECK dies with a fatal error if condition is not true.  It is *not*
   56: // controlled by NDEBUG, so the check will be executed regardless of
   57: // compilation mode.  Therefore, it is safe to do things like:
   58: //    CHECK_EQ(fp->Write(x), 4)
   59: #define CHECK(condition) do {                           \
   60:   if (!(condition)) {                                   \
   61:     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
   62:             __FILE__, __LINE__, #condition);            \
   63:     exit(1);                                            \
   64:   }                                                     \
   65: } while (0)
   66: 
   67: #define CHECK_EQ(a, b)   CHECK(a == b)
   68: 
   69: static void Timing1(int num_iters) {
   70:   // Same pattern lots of times
   71:   RE pattern("ruby:\\d+");
   72:   StringPiece p("ruby:1234");
   73:   for (int j = num_iters; j > 0; j--) {
   74:     CHECK(pattern.FullMatch(p));
   75:   }
   76: }
   77: 
   78: static void Timing2(int num_iters) {
   79:   // Same pattern lots of times
   80:   RE pattern("ruby:(\\d+)");
   81:   int i;
   82:   for (int j = num_iters; j > 0; j--) {
   83:     CHECK(pattern.FullMatch("ruby:1234", &i));
   84:     CHECK_EQ(i, 1234);
   85:   }
   86: }
   87: 
   88: static void Timing3(int num_iters) {
   89:   string text_string;
   90:   for (int j = num_iters; j > 0; j--) {
   91:     text_string += "this is another line\n";
   92:   }
   93: 
   94:   RE line_matcher(".*\n");
   95:   string line;
   96:   StringPiece text(text_string);
   97:   int counter = 0;
   98:   while (line_matcher.Consume(&text)) {
   99:     counter++;
  100:   }
  101:   printf("Matched %d lines\n", counter);
  102: }
  103: 
  104: #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
  105: 
  106: static void LeakTest() {
  107:   // Check for memory leaks
  108:   unsigned long long initial_size = 0;
  109:   for (int i = 0; i < 100000; i++) {
  110:     if (i == 50000) {
  111:       initial_size = VirtualProcessSize();
  112:       printf("Size after 50000: %llu\n", initial_size);
  113:     }
  114:     char buf[100];  // definitely big enough
  115:     sprintf(buf, "pat%09d", i);
  116:     RE newre(buf);
  117:   }
  118:   uint64 final_size = VirtualProcessSize();
  119:   printf("Size after 100000: %llu\n", final_size);
  120:   const double growth = double(final_size - initial_size) / final_size;
  121:   printf("Growth: %0.2f%%", growth * 100);
  122:   CHECK(growth < 0.02);       // Allow < 2% growth
  123: }
  124: 
  125: #endif
  126: 
  127: static void RadixTests() {
  128:   printf("Testing hex\n");
  129: 
  130: #define CHECK_HEX(type, value) \
  131:   do { \
  132:     type v; \
  133:     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
  134:     CHECK_EQ(v, 0x ## value); \
  135:     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
  136:     CHECK_EQ(v, 0x ## value); \
  137:   } while(0)
  138: 
  139:   CHECK_HEX(short,              2bad);
  140:   CHECK_HEX(unsigned short,     2badU);
  141:   CHECK_HEX(int,                dead);
  142:   CHECK_HEX(unsigned int,       deadU);
  143:   CHECK_HEX(long,               7eadbeefL);
  144:   CHECK_HEX(unsigned long,      deadbeefUL);
  145: #ifdef HAVE_LONG_LONG
  146:   CHECK_HEX(long long,          12345678deadbeefLL);
  147: #endif
  148: #ifdef HAVE_UNSIGNED_LONG_LONG
  149:   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
  150: #endif
  151: 
  152: #undef CHECK_HEX
  153: 
  154:   printf("Testing octal\n");
  155: 
  156: #define CHECK_OCTAL(type, value) \
  157:   do { \
  158:     type v; \
  159:     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
  160:     CHECK_EQ(v, 0 ## value); \
  161:     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
  162:     CHECK_EQ(v, 0 ## value); \
  163:   } while(0)
  164: 
  165:   CHECK_OCTAL(short,              77777);
  166:   CHECK_OCTAL(unsigned short,     177777U);
  167:   CHECK_OCTAL(int,                17777777777);
  168:   CHECK_OCTAL(unsigned int,       37777777777U);
  169:   CHECK_OCTAL(long,               17777777777L);
  170:   CHECK_OCTAL(unsigned long,      37777777777UL);
  171: #ifdef HAVE_LONG_LONG
  172:   CHECK_OCTAL(long long,          777777777777777777777LL);
  173: #endif
  174: #ifdef HAVE_UNSIGNED_LONG_LONG
  175:   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
  176: #endif
  177: 
  178: #undef CHECK_OCTAL
  179: 
  180:   printf("Testing decimal\n");
  181: 
  182: #define CHECK_DECIMAL(type, value) \
  183:   do { \
  184:     type v; \
  185:     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
  186:     CHECK_EQ(v, value); \
  187:     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
  188:     CHECK_EQ(v, value); \
  189:   } while(0)
  190: 
  191:   CHECK_DECIMAL(short,              -1);
  192:   CHECK_DECIMAL(unsigned short,     9999);
  193:   CHECK_DECIMAL(int,                -1000);
  194:   CHECK_DECIMAL(unsigned int,       12345U);
  195:   CHECK_DECIMAL(long,               -10000000L);
  196:   CHECK_DECIMAL(unsigned long,      3083324652U);
  197: #ifdef HAVE_LONG_LONG
  198:   CHECK_DECIMAL(long long,          -100000000000000LL);
  199: #endif
  200: #ifdef HAVE_UNSIGNED_LONG_LONG
  201:   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
  202: #endif
  203: 
  204: #undef CHECK_DECIMAL
  205: 
  206: }
  207: 
  208: static void TestReplace() {
  209:   printf("Testing Replace\n");
  210: 
  211:   struct ReplaceTest {
  212:     const char *regexp;
  213:     const char *rewrite;
  214:     const char *original;
  215:     const char *single;
  216:     const char *global;
  217:     int global_count;         // the expected return value from ReplaceAll
  218:   };
  219:   static const ReplaceTest tests[] = {
  220:     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
  221:       "\\2\\1ay",
  222:       "the quick brown fox jumps over the lazy dogs.",
  223:       "ethay quick brown fox jumps over the lazy dogs.",
  224:       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
  225:       9 },
  226:     { "\\w+",
  227:       "\\0-NOSPAM",
  228:       "paul.haahr@google.com",
  229:       "paul-NOSPAM.haahr@google.com",
  230:       "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
  231:       4 },
  232:     { "^",
  233:       "(START)",
  234:       "foo",
  235:       "(START)foo",
  236:       "(START)foo",
  237:       1 },
  238:     { "^",
  239:       "(START)",
  240:       "",
  241:       "(START)",
  242:       "(START)",
  243:       1 },
  244:     { "$",
  245:       "(END)",
  246:       "",
  247:       "(END)",
  248:       "(END)",
  249:       1 },
  250:     { "b",
  251:       "bb",
  252:       "ababababab",
  253:       "abbabababab",
  254:       "abbabbabbabbabb",
  255:        5 },
  256:     { "b",
  257:       "bb",
  258:       "bbbbbb",
  259:       "bbbbbbb",
  260:       "bbbbbbbbbbbb",
  261:       6 },
  262:     { "b+",
  263:       "bb",
  264:       "bbbbbb",
  265:       "bb",
  266:       "bb",
  267:       1 },
  268:     { "b*",
  269:       "bb",
  270:       "bbbbbb",
  271:       "bb",
  272:       "bbbb",
  273:       2 },
  274:     { "b*",
  275:       "bb",
  276:       "aaaaa",
  277:       "bbaaaaa",
  278:       "bbabbabbabbabbabb",
  279:       6 },
  280:     { "b*",
  281:       "bb",
  282:       "aa\naa\n",
  283:       "bbaa\naa\n",
  284:       "bbabbabb\nbbabbabb\nbb",
  285:       7 },
  286:     { "b*",
  287:       "bb",
  288:       "aa\raa\r",
  289:       "bbaa\raa\r",
  290:       "bbabbabb\rbbabbabb\rbb",
  291:       7 },
  292:     { "b*",
  293:       "bb",
  294:       "aa\r\naa\r\n",
  295:       "bbaa\r\naa\r\n",
  296:       "bbabbabb\r\nbbabbabb\r\nbb",
  297:       7 },
  298:     // Check empty-string matching (it's tricky!)
  299:     { "aa|b*",
  300:       "@",
  301:       "aa",
  302:       "@",
  303:       "@@",
  304:       2 },
  305:     { "b*|aa",
  306:       "@",
  307:       "aa",
  308:       "@aa",
  309:       "@@@",
  310:       3 },
  311: #ifdef SUPPORT_UTF8
  312:     { "b*",
  313:       "bb",
  314:       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
  315:       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
  316:       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
  317:       5 },
  318:     { "b*",
  319:       "bb",
  320:       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
  321:       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
  322:       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
  323:        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
  324:       9 },
  325: #endif
  326:     { "", NULL, NULL, NULL, NULL, 0 }
  327:   };
  328: 
  329: #ifdef SUPPORT_UTF8
  330:   const bool support_utf8 = true;
  331: #else
  332:   const bool support_utf8 = false;
  333: #endif
  334: 
  335:   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
  336:     RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
  337:     assert(re.error().empty());
  338:     string one(t->original);
  339:     CHECK(re.Replace(t->rewrite, &one));
  340:     CHECK_EQ(one, t->single);
  341:     string all(t->original);
  342:     const int replace_count = re.GlobalReplace(t->rewrite, &all);
  343:     CHECK_EQ(all, t->global);
  344:     CHECK_EQ(replace_count, t->global_count);
  345:   }
  346: 
  347:   // One final test: test \r\n replacement when we're not in CRLF mode
  348:   {
  349:     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
  350:     assert(re.error().empty());
  351:     string all("aa\r\naa\r\n");
  352:     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
  353:     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
  354:   }
  355:   {
  356:     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
  357:     assert(re.error().empty());
  358:     string all("aa\r\naa\r\n");
  359:     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
  360:     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
  361:   }
  362:   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
  363:   //       Alas, the answer depends on how pcre was compiled.
  364: }
  365: 
  366: static void TestExtract() {
  367:   printf("Testing Extract\n");
  368: 
  369:   string s;
  370: 
  371:   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
  372:   CHECK_EQ(s, "kremvax!boris");
  373: 
  374:   // check the RE interface as well
  375:   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
  376:   CHECK_EQ(s, "'foo'");
  377:   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
  378:   CHECK_EQ(s, "'foo'");
  379: }
  380: 
  381: static void TestConsume() {
  382:   printf("Testing Consume\n");
  383: 
  384:   string word;
  385: 
  386:   string s("   aaa b!@#$@#$cccc");
  387:   StringPiece input(s);
  388: 
  389:   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
  390:   CHECK(r.Consume(&input, &word));
  391:   CHECK_EQ(word, "aaa");
  392:   CHECK(r.Consume(&input, &word));
  393:   CHECK_EQ(word, "b");
  394:   CHECK(! r.Consume(&input, &word));
  395: }
  396: 
  397: static void TestFindAndConsume() {
  398:   printf("Testing FindAndConsume\n");
  399: 
  400:   string word;
  401: 
  402:   string s("   aaa b!@#$@#$cccc");
  403:   StringPiece input(s);
  404: 
  405:   RE r("(\\w+)");      // matches a word
  406:   CHECK(r.FindAndConsume(&input, &word));
  407:   CHECK_EQ(word, "aaa");
  408:   CHECK(r.FindAndConsume(&input, &word));
  409:   CHECK_EQ(word, "b");
  410:   CHECK(r.FindAndConsume(&input, &word));
  411:   CHECK_EQ(word, "cccc");
  412:   CHECK(! r.FindAndConsume(&input, &word));
  413: }
  414: 
  415: static void TestMatchNumberPeculiarity() {
  416:   printf("Testing match-number peculiarity\n");
  417: 
  418:   string word1;
  419:   string word2;
  420:   string word3;
  421: 
  422:   RE r("(foo)|(bar)|(baz)");
  423:   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
  424:   CHECK_EQ(word1, "foo");
  425:   CHECK_EQ(word2, "");
  426:   CHECK_EQ(word3, "");
  427:   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
  428:   CHECK_EQ(word1, "");
  429:   CHECK_EQ(word2, "bar");
  430:   CHECK_EQ(word3, "");
  431:   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
  432:   CHECK_EQ(word1, "");
  433:   CHECK_EQ(word2, "");
  434:   CHECK_EQ(word3, "baz");
  435:   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
  436: 
  437:   string a;
  438:   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
  439:   CHECK_EQ(a, "");
  440: }
  441: 
  442: static void TestRecursion() {
  443:   printf("Testing recursion\n");
  444: 
  445:   // Get one string that passes (sometimes), one that never does.
  446:   string text_good("abcdefghijk");
  447:   string text_bad("acdefghijkl");
  448: 
  449:   // According to pcretest, matching text_good against (\w+)*b
  450:   // requires match_limit of at least 8192, and match_recursion_limit
  451:   // of at least 37.
  452: 
  453:   RE_Options options_ml;
  454:   options_ml.set_match_limit(8192);
  455:   RE re("(\\w+)*b", options_ml);
  456:   CHECK(re.PartialMatch(text_good) == true);
  457:   CHECK(re.PartialMatch(text_bad) == false);
  458:   CHECK(re.FullMatch(text_good) == false);
  459:   CHECK(re.FullMatch(text_bad) == false);
  460: 
  461:   options_ml.set_match_limit(1024);
  462:   RE re2("(\\w+)*b", options_ml);
  463:   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
  464:   CHECK(re2.PartialMatch(text_bad) == false);
  465:   CHECK(re2.FullMatch(text_good) == false);
  466:   CHECK(re2.FullMatch(text_bad) == false);
  467: 
  468:   RE_Options options_mlr;
  469:   options_mlr.set_match_limit_recursion(50);
  470:   RE re3("(\\w+)*b", options_mlr);
  471:   CHECK(re3.PartialMatch(text_good) == true);
  472:   CHECK(re3.PartialMatch(text_bad) == false);
  473:   CHECK(re3.FullMatch(text_good) == false);
  474:   CHECK(re3.FullMatch(text_bad) == false);
  475: 
  476:   options_mlr.set_match_limit_recursion(10);
  477:   RE re4("(\\w+)*b", options_mlr);
  478:   CHECK(re4.PartialMatch(text_good) == false);
  479:   CHECK(re4.PartialMatch(text_bad) == false);
  480:   CHECK(re4.FullMatch(text_good) == false);
  481:   CHECK(re4.FullMatch(text_bad) == false);
  482: }
  483: 
  484: // A meta-quoted string, interpreted as a pattern, should always match
  485: // the original unquoted string.
  486: static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
  487:   string quoted = RE::QuoteMeta(unquoted);
  488:   RE re(quoted, options);
  489:   CHECK(re.FullMatch(unquoted));
  490: }
  491: 
  492: // A string containing meaningful regexp characters, which is then meta-
  493: // quoted, should not generally match a string the unquoted string does.
  494: static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
  495:                                   RE_Options options = RE_Options()) {
  496:   string quoted = RE::QuoteMeta(unquoted);
  497:   RE re(quoted, options);
  498:   CHECK(!re.FullMatch(should_not_match));
  499: }
  500: 
  501: // Tests that quoted meta characters match their original strings,
  502: // and that a few things that shouldn't match indeed do not.
  503: static void TestQuotaMetaSimple() {
  504:   TestQuoteMeta("foo");
  505:   TestQuoteMeta("foo.bar");
  506:   TestQuoteMeta("foo\\.bar");
  507:   TestQuoteMeta("[1-9]");
  508:   TestQuoteMeta("1.5-2.0?");
  509:   TestQuoteMeta("\\d");
  510:   TestQuoteMeta("Who doesn't like ice cream?");
  511:   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
  512:   TestQuoteMeta("((?!)xxx).*yyy");
  513:   TestQuoteMeta("([");
  514:   TestQuoteMeta(string("foo\0bar", 7));
  515: }
  516: 
  517: static void TestQuoteMetaSimpleNegative() {
  518:   NegativeTestQuoteMeta("foo", "bar");
  519:   NegativeTestQuoteMeta("...", "bar");
  520:   NegativeTestQuoteMeta("\\.", ".");
  521:   NegativeTestQuoteMeta("\\.", "..");
  522:   NegativeTestQuoteMeta("(a)", "a");
  523:   NegativeTestQuoteMeta("(a|b)", "a");
  524:   NegativeTestQuoteMeta("(a|b)", "(a)");
  525:   NegativeTestQuoteMeta("(a|b)", "a|b");
  526:   NegativeTestQuoteMeta("[0-9]", "0");
  527:   NegativeTestQuoteMeta("[0-9]", "0-9");
  528:   NegativeTestQuoteMeta("[0-9]", "[9]");
  529:   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
  530: }
  531: 
  532: static void TestQuoteMetaLatin1() {
  533:   TestQuoteMeta("3\xb2 = 9");
  534: }
  535: 
  536: static void TestQuoteMetaUtf8() {
  537: #ifdef SUPPORT_UTF8
  538:   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
  539:   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
  540:   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
  541:   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
  542:   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
  543:   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
  544:   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
  545:   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
  546:                         "27\\\xc2\\\xb0",
  547:                         pcrecpp::UTF8());
  548: #endif
  549: }
  550: 
  551: static void TestQuoteMetaAll() {
  552:   printf("Testing QuoteMeta\n");
  553:   TestQuotaMetaSimple();
  554:   TestQuoteMetaSimpleNegative();
  555:   TestQuoteMetaLatin1();
  556:   TestQuoteMetaUtf8();
  557: }
  558: 
  559: //
  560: // Options tests contributed by
  561: // Giuseppe Maxia, CTO, Stardata s.r.l.
  562: // July 2005
  563: //
  564: static void GetOneOptionResult(
  565:                 const char *option_name,
  566:                 const char *regex,
  567:                 const char *str,
  568:                 RE_Options options,
  569:                 bool full,
  570:                 string expected) {
  571: 
  572:   printf("Testing Option <%s>\n", option_name);
  573:   if(VERBOSE_TEST)
  574:     printf("/%s/ finds \"%s\" within \"%s\" \n",
  575:                     regex,
  576:                     expected.c_str(),
  577:                     str);
  578:   string captured("");
  579:   if (full)
  580:     RE(regex,options).FullMatch(str, &captured);
  581:   else
  582:     RE(regex,options).PartialMatch(str, &captured);
  583:   CHECK_EQ(captured, expected);
  584: }
  585: 
  586: static void TestOneOption(
  587:                 const char *option_name,
  588:                 const char *regex,
  589:                 const char *str,
  590:                 RE_Options options,
  591:                 bool full,
  592:                 bool assertive = true) {
  593: 
  594:   printf("Testing Option <%s>\n", option_name);
  595:   if (VERBOSE_TEST)
  596:     printf("'%s' %s /%s/ \n",
  597:                   str,
  598:                   (assertive? "matches" : "doesn't match"),
  599:                   regex);
  600:   if (assertive) {
  601:     if (full)
  602:       CHECK(RE(regex,options).FullMatch(str));
  603:     else
  604:       CHECK(RE(regex,options).PartialMatch(str));
  605:   } else {
  606:     if (full)
  607:       CHECK(!RE(regex,options).FullMatch(str));
  608:     else
  609:       CHECK(!RE(regex,options).PartialMatch(str));
  610:   }
  611: }
  612: 
  613: static void Test_CASELESS() {
  614:   RE_Options options;
  615:   RE_Options options2;
  616: 
  617:   options.set_caseless(true);
  618:   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
  619:   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
  620:   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
  621: 
  622:   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
  623:   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
  624:   options.set_caseless(false);
  625:   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
  626: }
  627: 
  628: static void Test_MULTILINE() {
  629:   RE_Options options;
  630:   RE_Options options2;
  631:   const char *str = "HELLO\n" "cruel\n" "world\n";
  632: 
  633:   options.set_multiline(true);
  634:   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
  635:   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
  636:   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
  637:   options.set_multiline(false);
  638:   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
  639: }
  640: 
  641: static void Test_DOTALL() {
  642:   RE_Options options;
  643:   RE_Options options2;
  644:   const char *str = "HELLO\n" "cruel\n" "world";
  645: 
  646:   options.set_dotall(true);
  647:   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
  648:   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
  649:   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
  650:   options.set_dotall(false);
  651:   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
  652: }
  653: 
  654: static void Test_DOLLAR_ENDONLY() {
  655:   RE_Options options;
  656:   RE_Options options2;
  657:   const char *str = "HELLO world\n";
  658: 
  659:   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
  660:   options.set_dollar_endonly(true);
  661:   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
  662:   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
  663: }
  664: 
  665: static void Test_EXTRA() {
  666:   RE_Options options;
  667:   const char *str = "HELLO";
  668: 
  669:   options.set_extra(true);
  670:   TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
  671:   TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
  672:   options.set_extra(false);
  673:   TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
  674: }
  675: 
  676: static void Test_EXTENDED() {
  677:   RE_Options options;
  678:   RE_Options options2;
  679:   const char *str = "HELLO world";
  680: 
  681:   options.set_extended(true);
  682:   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
  683:   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
  684:   TestOneOption("EXTENDED (class)",
  685:                     "^ HE L{2} O "
  686:                     "\\s+        "
  687:                     "\\w+ $      ",
  688:                     str,
  689:                     options,
  690:                     false);
  691: 
  692:   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
  693:   TestOneOption("EXTENDED (function)",
  694:                     "^ HE L{2} O "
  695:                     "\\s+        "
  696:                     "\\w+ $      ",
  697:                     str,
  698:                     pcrecpp::EXTENDED(),
  699:                     false);
  700: 
  701:   options.set_extended(false);
  702:   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
  703: }
  704: 
  705: static void Test_NO_AUTO_CAPTURE() {
  706:   RE_Options options;
  707:   const char *str = "HELLO world";
  708:   string captured;
  709: 
  710:   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
  711:   if (VERBOSE_TEST)
  712:     printf("parentheses capture text\n");
  713:   RE re("(world|universe)$", options);
  714:   CHECK(re.Extract("\\1", str , &captured));
  715:   CHECK_EQ(captured, "world");
  716:   options.set_no_auto_capture(true);
  717:   printf("testing Option <NO_AUTO_CAPTURE>\n");
  718:   if (VERBOSE_TEST)
  719:     printf("parentheses do not capture text\n");
  720:   re.Extract("\\1",str, &captured );
  721:   CHECK_EQ(captured, "world");
  722: }
  723: 
  724: static void Test_UNGREEDY() {
  725:   RE_Options options;
  726:   const char *str = "HELLO, 'this' is the 'world'";
  727: 
  728:   options.set_ungreedy(true);
  729:   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
  730:   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
  731:   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
  732: 
  733:   options.set_ungreedy(false);
  734:   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
  735:   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
  736: }
  737: 
  738: static void Test_all_options() {
  739:   const char *str = "HELLO\n" "cruel\n" "world";
  740:   RE_Options options;
  741:   options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
  742: 
  743:   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
  744:   options.set_all_options(0);
  745:   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
  746:   options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
  747: 
  748:   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
  749:   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
  750:                   " ^ c r u e l $ ",
  751:                   str,
  752:                   RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
  753:                   false);
  754: 
  755:   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
  756:                   " ^ c r u e l $ ",
  757:                   str,
  758:                   RE_Options()
  759:                        .set_multiline(true)
  760:                        .set_extended(true),
  761:                   false);
  762: 
  763:   options.set_all_options(0);
  764:   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
  765: 
  766: }
  767: 
  768: static void TestOptions() {
  769:   printf("Testing Options\n");
  770:   Test_CASELESS();
  771:   Test_MULTILINE();
  772:   Test_DOTALL();
  773:   Test_DOLLAR_ENDONLY();
  774:   Test_EXTENDED();
  775:   Test_NO_AUTO_CAPTURE();
  776:   Test_UNGREEDY();
  777:   Test_EXTRA();
  778:   Test_all_options();
  779: }
  780: 
  781: static void TestConstructors() {
  782:   printf("Testing constructors\n");
  783: 
  784:   RE_Options options;
  785:   options.set_dotall(true);
  786:   const char *str = "HELLO\n" "cruel\n" "world";
  787: 
  788:   RE orig("HELLO.*world", options);
  789:   CHECK(orig.FullMatch(str));
  790: 
  791:   RE copy1(orig);
  792:   CHECK(copy1.FullMatch(str));
  793: 
  794:   RE copy2("not a match");
  795:   CHECK(!copy2.FullMatch(str));
  796:   copy2 = copy1;
  797:   CHECK(copy2.FullMatch(str));
  798:   copy2 = orig;
  799:   CHECK(copy2.FullMatch(str));
  800: 
  801:   // Make sure when we assign to ourselves, nothing bad happens
  802:   orig = orig;
  803:   copy1 = copy1;
  804:   copy2 = copy2;
  805:   CHECK(orig.FullMatch(str));
  806:   CHECK(copy1.FullMatch(str));
  807:   CHECK(copy2.FullMatch(str));
  808: }
  809: 
  810: int main(int argc, char** argv) {
  811:   // Treat any flag as --help
  812:   if (argc > 1 && argv[1][0] == '-') {
  813:     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
  814:            "       If 'timingX ###' is specified, run the given timing test\n"
  815:            "       with the given number of iterations, rather than running\n"
  816:            "       the default corectness test.\n", argv[0]);
  817:     return 0;
  818:   }
  819: 
  820:   if (argc > 1) {
  821:     if ( argc == 2 || atoi(argv[2]) == 0) {
  822:       printf("timing mode needs a num-iters argument\n");
  823:       return 1;
  824:     }
  825:     if (!strcmp(argv[1], "timing1"))
  826:       Timing1(atoi(argv[2]));
  827:     else if (!strcmp(argv[1], "timing2"))
  828:       Timing2(atoi(argv[2]));
  829:     else if (!strcmp(argv[1], "timing3"))
  830:       Timing3(atoi(argv[2]));
  831:     else
  832:       printf("Unknown argument '%s'\n", argv[1]);
  833:     return 0;
  834:   }
  835: 
  836:   printf("PCRE C++ wrapper tests\n");
  837:   printf("Testing FullMatch\n");
  838: 
  839:   int i;
  840:   string s;
  841: 
  842:   /***** FullMatch with no args *****/
  843: 
  844:   CHECK(RE("h.*o").FullMatch("hello"));
  845:   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
  846:   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
  847:   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
  848:   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
  849:   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
  850: 
  851:   /***** FullMatch with args *****/
  852: 
  853:   // Zero-arg
  854:   CHECK(RE("\\d+").FullMatch("1001"));
  855: 
  856:   // Single-arg
  857:   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
  858:   CHECK_EQ(i, 1001);
  859:   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
  860:   CHECK_EQ(i, -123);
  861:   CHECK(!RE("()\\d+").FullMatch("10", &i));
  862:   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
  863:                                 &i));
  864: 
  865:   // Digits surrounding integer-arg
  866:   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
  867:   CHECK_EQ(i, 23);
  868:   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
  869:   CHECK_EQ(i, 1);
  870:   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
  871:   CHECK_EQ(i, -1);
  872:   CHECK(RE("(\\d)").PartialMatch("1234", &i));
  873:   CHECK_EQ(i, 1);
  874:   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
  875:   CHECK_EQ(i, -1);
  876: 
  877:   // String-arg
  878:   CHECK(RE("h(.*)o").FullMatch("hello", &s));
  879:   CHECK_EQ(s, string("ell"));
  880: 
  881:   // StringPiece-arg
  882:   StringPiece sp;
  883:   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
  884:   CHECK_EQ(sp.size(), 4);
  885:   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
  886:   CHECK_EQ(i, 1234);
  887: 
  888:   // Multi-arg
  889:   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
  890:   CHECK_EQ(s, string("ruby"));
  891:   CHECK_EQ(i, 1234);
  892: 
  893:   // Ignore non-void* NULL arg
  894:   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
  895:   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
  896:   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
  897:   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
  898: #ifdef HAVE_LONG_LONG
  899:   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
  900: #endif
  901:   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
  902:   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
  903: 
  904:   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
  905:   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
  906:   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
  907:   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
  908:   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
  909:   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
  910: 
  911:   // Ignored arg
  912:   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
  913:   CHECK_EQ(s, string("ruby"));
  914:   CHECK_EQ(i, 1234);
  915: 
  916:   // Type tests
  917:   {
  918:     char c;
  919:     CHECK(RE("(H)ello").FullMatch("Hello", &c));
  920:     CHECK_EQ(c, 'H');
  921:   }
  922:   {
  923:     unsigned char c;
  924:     CHECK(RE("(H)ello").FullMatch("Hello", &c));
  925:     CHECK_EQ(c, static_cast<unsigned char>('H'));
  926:   }
  927:   {
  928:     short v;
  929:     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
  930:     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
  931:     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
  932:     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
  933:     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
  934:     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
  935:   }
  936:   {
  937:     unsigned short v;
  938:     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
  939:     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
  940:     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
  941:     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
  942:   }
  943:   {
  944:     int v;
  945:     static const int max_value = 0x7fffffff;
  946:     static const int min_value = -max_value - 1;
  947:     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
  948:     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
  949:     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
  950:     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
  951:     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
  952:     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
  953:   }
  954:   {
  955:     unsigned int v;
  956:     static const unsigned int max_value = 0xfffffffful;
  957:     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
  958:     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
  959:     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
  960:   }
  961: #ifdef HAVE_LONG_LONG
  962: # if defined(__MINGW__) || defined(__MINGW32__)
  963: #   define LLD "%I64d"
  964: #   define LLU "%I64u"
  965: # else
  966: #   define LLD "%lld"
  967: #   define LLU "%llu"
  968: # endif
  969:   {
  970:     long long v;
  971:     static const long long max_value = 0x7fffffffffffffffLL;
  972:     static const long long min_value = -max_value - 1;
  973:     char buf[32];  // definitely big enough for a long long
  974: 
  975:     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
  976:     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
  977: 
  978:     sprintf(buf, LLD, max_value);
  979:     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
  980: 
  981:     sprintf(buf, LLD, min_value);
  982:     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
  983: 
  984:     sprintf(buf, LLD, max_value);
  985:     assert(buf[strlen(buf)-1] != '9');
  986:     buf[strlen(buf)-1]++;
  987:     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
  988: 
  989:     sprintf(buf, LLD, min_value);
  990:     assert(buf[strlen(buf)-1] != '9');
  991:     buf[strlen(buf)-1]++;
  992:     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
  993:   }
  994: #endif
  995: #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
  996:   {
  997:     unsigned long long v;
  998:     long long v2;
  999:     static const unsigned long long max_value = 0xffffffffffffffffULL;
 1000:     char buf[32];  // definitely big enough for a unsigned long long
 1001: 
 1002:     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
 1003:     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
 1004: 
 1005:     sprintf(buf, LLU, max_value);
 1006:     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
 1007: 
 1008:     assert(buf[strlen(buf)-1] != '9');
 1009:     buf[strlen(buf)-1]++;
 1010:     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
 1011:   }
 1012: #endif
 1013:   {
 1014:     float v;
 1015:     CHECK(RE("(.*)").FullMatch("100", &v));
 1016:     CHECK(RE("(.*)").FullMatch("-100.", &v));
 1017:     CHECK(RE("(.*)").FullMatch("1e23", &v));
 1018:   }
 1019:   {
 1020:     double v;
 1021:     CHECK(RE("(.*)").FullMatch("100", &v));
 1022:     CHECK(RE("(.*)").FullMatch("-100.", &v));
 1023:     CHECK(RE("(.*)").FullMatch("1e23", &v));
 1024:   }
 1025: 
 1026:   // Check that matching is fully anchored
 1027:   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
 1028:   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
 1029:   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
 1030:   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
 1031: 
 1032:   // Braces
 1033:   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
 1034:   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
 1035:   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
 1036: 
 1037:   // Complicated RE
 1038:   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
 1039:   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
 1040:   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
 1041:   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
 1042: 
 1043:   // Check full-match handling (needs '$' tacked on internally)
 1044:   CHECK(RE("fo|foo").FullMatch("fo"));
 1045:   CHECK(RE("fo|foo").FullMatch("foo"));
 1046:   CHECK(RE("fo|foo$").FullMatch("fo"));
 1047:   CHECK(RE("fo|foo$").FullMatch("foo"));
 1048:   CHECK(RE("foo$").FullMatch("foo"));
 1049:   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
 1050:   CHECK(!RE("fo|bar").FullMatch("fox"));
 1051: 
 1052:   // Uncomment the following if we change the handling of '$' to
 1053:   // prevent it from matching a trailing newline
 1054:   if (false) {
 1055:     // Check that we don't get bitten by pcre's special handling of a
 1056:     // '\n' at the end of the string matching '$'
 1057:     CHECK(!RE("foo$").PartialMatch("foo\n"));
 1058:   }
 1059: 
 1060:   // Number of args
 1061:   int a[16];
 1062:   CHECK(RE("").FullMatch(""));
 1063: 
 1064:   memset(a, 0, sizeof(0));
 1065:   CHECK(RE("(\\d){1}").FullMatch("1",
 1066:                                  &a[0]));
 1067:   CHECK_EQ(a[0], 1);
 1068: 
 1069:   memset(a, 0, sizeof(0));
 1070:   CHECK(RE("(\\d)(\\d)").FullMatch("12",
 1071:                                    &a[0],  &a[1]));
 1072:   CHECK_EQ(a[0], 1);
 1073:   CHECK_EQ(a[1], 2);
 1074: 
 1075:   memset(a, 0, sizeof(0));
 1076:   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
 1077:                                         &a[0],  &a[1],  &a[2]));
 1078:   CHECK_EQ(a[0], 1);
 1079:   CHECK_EQ(a[1], 2);
 1080:   CHECK_EQ(a[2], 3);
 1081: 
 1082:   memset(a, 0, sizeof(0));
 1083:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
 1084:                                              &a[0],  &a[1],  &a[2],  &a[3]));
 1085:   CHECK_EQ(a[0], 1);
 1086:   CHECK_EQ(a[1], 2);
 1087:   CHECK_EQ(a[2], 3);
 1088:   CHECK_EQ(a[3], 4);
 1089: 
 1090:   memset(a, 0, sizeof(0));
 1091:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
 1092:                                                   &a[0],  &a[1],  &a[2],
 1093:                                                   &a[3],  &a[4]));
 1094:   CHECK_EQ(a[0], 1);
 1095:   CHECK_EQ(a[1], 2);
 1096:   CHECK_EQ(a[2], 3);
 1097:   CHECK_EQ(a[3], 4);
 1098:   CHECK_EQ(a[4], 5);
 1099: 
 1100:   memset(a, 0, sizeof(0));
 1101:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
 1102:                                                        &a[0],  &a[1],  &a[2],
 1103:                                                        &a[3],  &a[4],  &a[5]));
 1104:   CHECK_EQ(a[0], 1);
 1105:   CHECK_EQ(a[1], 2);
 1106:   CHECK_EQ(a[2], 3);
 1107:   CHECK_EQ(a[3], 4);
 1108:   CHECK_EQ(a[4], 5);
 1109:   CHECK_EQ(a[5], 6);
 1110: 
 1111:   memset(a, 0, sizeof(0));
 1112:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
 1113:                                                             &a[0],  &a[1],  &a[2],  &a[3],
 1114:                                                             &a[4],  &a[5],  &a[6]));
 1115:   CHECK_EQ(a[0], 1);
 1116:   CHECK_EQ(a[1], 2);
 1117:   CHECK_EQ(a[2], 3);
 1118:   CHECK_EQ(a[3], 4);
 1119:   CHECK_EQ(a[4], 5);
 1120:   CHECK_EQ(a[5], 6);
 1121:   CHECK_EQ(a[6], 7);
 1122: 
 1123:   memset(a, 0, sizeof(0));
 1124:   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
 1125:            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
 1126:                "1234567890123456",
 1127:                &a[0],  &a[1],  &a[2],  &a[3],
 1128:                &a[4],  &a[5],  &a[6],  &a[7],
 1129:                &a[8],  &a[9],  &a[10], &a[11],
 1130:                &a[12], &a[13], &a[14], &a[15]));
 1131:   CHECK_EQ(a[0], 1);
 1132:   CHECK_EQ(a[1], 2);
 1133:   CHECK_EQ(a[2], 3);
 1134:   CHECK_EQ(a[3], 4);
 1135:   CHECK_EQ(a[4], 5);
 1136:   CHECK_EQ(a[5], 6);
 1137:   CHECK_EQ(a[6], 7);
 1138:   CHECK_EQ(a[7], 8);
 1139:   CHECK_EQ(a[8], 9);
 1140:   CHECK_EQ(a[9], 0);
 1141:   CHECK_EQ(a[10], 1);
 1142:   CHECK_EQ(a[11], 2);
 1143:   CHECK_EQ(a[12], 3);
 1144:   CHECK_EQ(a[13], 4);
 1145:   CHECK_EQ(a[14], 5);
 1146:   CHECK_EQ(a[15], 6);
 1147: 
 1148:   /***** PartialMatch *****/
 1149: 
 1150:   printf("Testing PartialMatch\n");
 1151: 
 1152:   CHECK(RE("h.*o").PartialMatch("hello"));
 1153:   CHECK(RE("h.*o").PartialMatch("othello"));
 1154:   CHECK(RE("h.*o").PartialMatch("hello!"));
 1155:   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
 1156: 
 1157:   /***** other tests *****/
 1158: 
 1159:   RadixTests();
 1160:   TestReplace();
 1161:   TestExtract();
 1162:   TestConsume();
 1163:   TestFindAndConsume();
 1164:   TestQuoteMetaAll();
 1165:   TestMatchNumberPeculiarity();
 1166: 
 1167:   // Check the pattern() accessor
 1168:   {
 1169:     const string kPattern = "http://([^/]+)/.*";
 1170:     const RE re(kPattern);
 1171:     CHECK_EQ(kPattern, re.pattern());
 1172:   }
 1173: 
 1174:   // Check RE error field.
 1175:   {
 1176:     RE re("foo");
 1177:     CHECK(re.error().empty());  // Must have no error
 1178:   }
 1179: 
 1180: #ifdef SUPPORT_UTF8
 1181:   // Check UTF-8 handling
 1182:   {
 1183:     printf("Testing UTF-8 handling\n");
 1184: 
 1185:     // Three Japanese characters (nihongo)
 1186:     const unsigned char utf8_string[] = {
 1187:          0xe6, 0x97, 0xa5, // 65e5
 1188:          0xe6, 0x9c, 0xac, // 627c
 1189:          0xe8, 0xaa, 0x9e, // 8a9e
 1190:          0
 1191:     };
 1192:     const unsigned char utf8_pattern[] = {
 1193:          '.',
 1194:          0xe6, 0x9c, 0xac, // 627c
 1195:          '.',
 1196:          0
 1197:     };
 1198: 
 1199:     // Both should match in either mode, bytes or UTF-8
 1200:     RE re_test1(".........");
 1201:     CHECK(re_test1.FullMatch(utf8_string));
 1202:     RE re_test2("...", pcrecpp::UTF8());
 1203:     CHECK(re_test2.FullMatch(utf8_string));
 1204: 
 1205:     // Check that '.' matches one byte or UTF-8 character
 1206:     // according to the mode.
 1207:     string ss;
 1208:     RE re_test3("(.)");
 1209:     CHECK(re_test3.PartialMatch(utf8_string, &ss));
 1210:     CHECK_EQ(ss, string("\xe6"));
 1211:     RE re_test4("(.)", pcrecpp::UTF8());
 1212:     CHECK(re_test4.PartialMatch(utf8_string, &ss));
 1213:     CHECK_EQ(ss, string("\xe6\x97\xa5"));
 1214: 
 1215:     // Check that string matches itself in either mode
 1216:     RE re_test5(utf8_string);
 1217:     CHECK(re_test5.FullMatch(utf8_string));
 1218:     RE re_test6(utf8_string, pcrecpp::UTF8());
 1219:     CHECK(re_test6.FullMatch(utf8_string));
 1220: 
 1221:     // Check that pattern matches string only in UTF8 mode
 1222:     RE re_test7(utf8_pattern);
 1223:     CHECK(!re_test7.FullMatch(utf8_string));
 1224:     RE re_test8(utf8_pattern, pcrecpp::UTF8());
 1225:     CHECK(re_test8.FullMatch(utf8_string));
 1226:   }
 1227: 
 1228:   // Check that ungreedy, UTF8 regular expressions don't match when they
 1229:   // oughtn't -- see bug 82246.
 1230:   {
 1231:     // This code always worked.
 1232:     const char* pattern = "\\w+X";
 1233:     const string target = "a aX";
 1234:     RE match_sentence(pattern);
 1235:     RE match_sentence_re(pattern, pcrecpp::UTF8());
 1236: 
 1237:     CHECK(!match_sentence.FullMatch(target));
 1238:     CHECK(!match_sentence_re.FullMatch(target));
 1239:   }
 1240: 
 1241:   {
 1242:     const char* pattern = "(?U)\\w+X";
 1243:     const string target = "a aX";
 1244:     RE match_sentence(pattern);
 1245:     RE match_sentence_re(pattern, pcrecpp::UTF8());
 1246: 
 1247:     CHECK(!match_sentence.FullMatch(target));
 1248:     CHECK(!match_sentence_re.FullMatch(target));
 1249:   }
 1250: #endif  /* def SUPPORT_UTF8 */
 1251: 
 1252:   printf("Testing error reporting\n");
 1253: 
 1254:   { RE re("a\\1"); CHECK(!re.error().empty()); }
 1255:   {
 1256:     RE re("a[x");
 1257:     CHECK(!re.error().empty());
 1258:   }
 1259:   {
 1260:     RE re("a[z-a]");
 1261:     CHECK(!re.error().empty());
 1262:   }
 1263:   {
 1264:     RE re("a[[:foobar:]]");
 1265:     CHECK(!re.error().empty());
 1266:   }
 1267:   {
 1268:     RE re("a(b");
 1269:     CHECK(!re.error().empty());
 1270:   }
 1271:   {
 1272:     RE re("a\\");
 1273:     CHECK(!re.error().empty());
 1274:   }
 1275: 
 1276:   // Test that recursion is stopped
 1277:   TestRecursion();
 1278: 
 1279:   // Test Options
 1280:   if (getenv("VERBOSE_TEST") != NULL)
 1281:     VERBOSE_TEST  = true;
 1282:   TestOptions();
 1283: 
 1284:   // Test the constructors
 1285:   TestConstructors();
 1286: 
 1287:   // Done
 1288:   printf("OK\n");
 1289: 
 1290:   return 0;
 1291: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>