Annotation of embedaddon/pcre/pcrecpp_unittest.cc, revision 1.1
1.1 ! misho 1: // -*- coding: utf-8 -*-
! 2: //
! 3: // Copyright (c) 2005 - 2010, Google Inc.
! 4: // All rights reserved.
! 5: //
! 6: // Redistribution and use in source and binary forms, with or without
! 7: // modification, are permitted provided that the following conditions are
! 8: // met:
! 9: //
! 10: // * Redistributions of source code must retain the above copyright
! 11: // notice, this list of conditions and the following disclaimer.
! 12: // * Redistributions in binary form must reproduce the above
! 13: // copyright notice, this list of conditions and the following disclaimer
! 14: // in the documentation and/or other materials provided with the
! 15: // distribution.
! 16: // * Neither the name of Google Inc. nor the names of its
! 17: // contributors may be used to endorse or promote products derived from
! 18: // this software without specific prior written permission.
! 19: //
! 20: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! 21: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
! 22: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
! 23: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
! 24: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
! 25: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
! 26: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
! 27: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
! 28: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
! 29: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
! 30: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
! 31: //
! 32: // Author: Sanjay Ghemawat
! 33: //
! 34: // TODO: Test extractions for PartialMatch/Consume
! 35:
! 36: #ifdef HAVE_CONFIG_H
! 37: #include "config.h"
! 38: #endif
! 39:
! 40: #include <stdio.h>
! 41: #include <string.h> /* for memset and strcmp */
! 42: #include <cassert>
! 43: #include <vector>
! 44: #include "pcrecpp.h"
! 45:
! 46: using pcrecpp::StringPiece;
! 47: using pcrecpp::RE;
! 48: using pcrecpp::RE_Options;
! 49: using pcrecpp::Hex;
! 50: using pcrecpp::Octal;
! 51: using pcrecpp::CRadix;
! 52:
! 53: static bool VERBOSE_TEST = false;
! 54:
! 55: // CHECK dies with a fatal error if condition is not true. It is *not*
! 56: // controlled by NDEBUG, so the check will be executed regardless of
! 57: // compilation mode. Therefore, it is safe to do things like:
! 58: // CHECK_EQ(fp->Write(x), 4)
! 59: #define CHECK(condition) do { \
! 60: if (!(condition)) { \
! 61: fprintf(stderr, "%s:%d: Check failed: %s\n", \
! 62: __FILE__, __LINE__, #condition); \
! 63: exit(1); \
! 64: } \
! 65: } while (0)
! 66:
! 67: #define CHECK_EQ(a, b) CHECK(a == b)
! 68:
! 69: static void Timing1(int num_iters) {
! 70: // Same pattern lots of times
! 71: RE pattern("ruby:\\d+");
! 72: StringPiece p("ruby:1234");
! 73: for (int j = num_iters; j > 0; j--) {
! 74: CHECK(pattern.FullMatch(p));
! 75: }
! 76: }
! 77:
! 78: static void Timing2(int num_iters) {
! 79: // Same pattern lots of times
! 80: RE pattern("ruby:(\\d+)");
! 81: int i;
! 82: for (int j = num_iters; j > 0; j--) {
! 83: CHECK(pattern.FullMatch("ruby:1234", &i));
! 84: CHECK_EQ(i, 1234);
! 85: }
! 86: }
! 87:
! 88: static void Timing3(int num_iters) {
! 89: string text_string;
! 90: for (int j = num_iters; j > 0; j--) {
! 91: text_string += "this is another line\n";
! 92: }
! 93:
! 94: RE line_matcher(".*\n");
! 95: string line;
! 96: StringPiece text(text_string);
! 97: int counter = 0;
! 98: while (line_matcher.Consume(&text)) {
! 99: counter++;
! 100: }
! 101: printf("Matched %d lines\n", counter);
! 102: }
! 103:
! 104: #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
! 105:
! 106: static void LeakTest() {
! 107: // Check for memory leaks
! 108: unsigned long long initial_size = 0;
! 109: for (int i = 0; i < 100000; i++) {
! 110: if (i == 50000) {
! 111: initial_size = VirtualProcessSize();
! 112: printf("Size after 50000: %llu\n", initial_size);
! 113: }
! 114: char buf[100]; // definitely big enough
! 115: sprintf(buf, "pat%09d", i);
! 116: RE newre(buf);
! 117: }
! 118: uint64 final_size = VirtualProcessSize();
! 119: printf("Size after 100000: %llu\n", final_size);
! 120: const double growth = double(final_size - initial_size) / final_size;
! 121: printf("Growth: %0.2f%%", growth * 100);
! 122: CHECK(growth < 0.02); // Allow < 2% growth
! 123: }
! 124:
! 125: #endif
! 126:
! 127: static void RadixTests() {
! 128: printf("Testing hex\n");
! 129:
! 130: #define CHECK_HEX(type, value) \
! 131: do { \
! 132: type v; \
! 133: CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
! 134: CHECK_EQ(v, 0x ## value); \
! 135: CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
! 136: CHECK_EQ(v, 0x ## value); \
! 137: } while(0)
! 138:
! 139: CHECK_HEX(short, 2bad);
! 140: CHECK_HEX(unsigned short, 2badU);
! 141: CHECK_HEX(int, dead);
! 142: CHECK_HEX(unsigned int, deadU);
! 143: CHECK_HEX(long, 7eadbeefL);
! 144: CHECK_HEX(unsigned long, deadbeefUL);
! 145: #ifdef HAVE_LONG_LONG
! 146: CHECK_HEX(long long, 12345678deadbeefLL);
! 147: #endif
! 148: #ifdef HAVE_UNSIGNED_LONG_LONG
! 149: CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
! 150: #endif
! 151:
! 152: #undef CHECK_HEX
! 153:
! 154: printf("Testing octal\n");
! 155:
! 156: #define CHECK_OCTAL(type, value) \
! 157: do { \
! 158: type v; \
! 159: CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
! 160: CHECK_EQ(v, 0 ## value); \
! 161: CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
! 162: CHECK_EQ(v, 0 ## value); \
! 163: } while(0)
! 164:
! 165: CHECK_OCTAL(short, 77777);
! 166: CHECK_OCTAL(unsigned short, 177777U);
! 167: CHECK_OCTAL(int, 17777777777);
! 168: CHECK_OCTAL(unsigned int, 37777777777U);
! 169: CHECK_OCTAL(long, 17777777777L);
! 170: CHECK_OCTAL(unsigned long, 37777777777UL);
! 171: #ifdef HAVE_LONG_LONG
! 172: CHECK_OCTAL(long long, 777777777777777777777LL);
! 173: #endif
! 174: #ifdef HAVE_UNSIGNED_LONG_LONG
! 175: CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
! 176: #endif
! 177:
! 178: #undef CHECK_OCTAL
! 179:
! 180: printf("Testing decimal\n");
! 181:
! 182: #define CHECK_DECIMAL(type, value) \
! 183: do { \
! 184: type v; \
! 185: CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
! 186: CHECK_EQ(v, value); \
! 187: CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
! 188: CHECK_EQ(v, value); \
! 189: } while(0)
! 190:
! 191: CHECK_DECIMAL(short, -1);
! 192: CHECK_DECIMAL(unsigned short, 9999);
! 193: CHECK_DECIMAL(int, -1000);
! 194: CHECK_DECIMAL(unsigned int, 12345U);
! 195: CHECK_DECIMAL(long, -10000000L);
! 196: CHECK_DECIMAL(unsigned long, 3083324652U);
! 197: #ifdef HAVE_LONG_LONG
! 198: CHECK_DECIMAL(long long, -100000000000000LL);
! 199: #endif
! 200: #ifdef HAVE_UNSIGNED_LONG_LONG
! 201: CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
! 202: #endif
! 203:
! 204: #undef CHECK_DECIMAL
! 205:
! 206: }
! 207:
! 208: static void TestReplace() {
! 209: printf("Testing Replace\n");
! 210:
! 211: struct ReplaceTest {
! 212: const char *regexp;
! 213: const char *rewrite;
! 214: const char *original;
! 215: const char *single;
! 216: const char *global;
! 217: int global_count; // the expected return value from ReplaceAll
! 218: };
! 219: static const ReplaceTest tests[] = {
! 220: { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
! 221: "\\2\\1ay",
! 222: "the quick brown fox jumps over the lazy dogs.",
! 223: "ethay quick brown fox jumps over the lazy dogs.",
! 224: "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
! 225: 9 },
! 226: { "\\w+",
! 227: "\\0-NOSPAM",
! 228: "paul.haahr@google.com",
! 229: "paul-NOSPAM.haahr@google.com",
! 230: "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
! 231: 4 },
! 232: { "^",
! 233: "(START)",
! 234: "foo",
! 235: "(START)foo",
! 236: "(START)foo",
! 237: 1 },
! 238: { "^",
! 239: "(START)",
! 240: "",
! 241: "(START)",
! 242: "(START)",
! 243: 1 },
! 244: { "$",
! 245: "(END)",
! 246: "",
! 247: "(END)",
! 248: "(END)",
! 249: 1 },
! 250: { "b",
! 251: "bb",
! 252: "ababababab",
! 253: "abbabababab",
! 254: "abbabbabbabbabb",
! 255: 5 },
! 256: { "b",
! 257: "bb",
! 258: "bbbbbb",
! 259: "bbbbbbb",
! 260: "bbbbbbbbbbbb",
! 261: 6 },
! 262: { "b+",
! 263: "bb",
! 264: "bbbbbb",
! 265: "bb",
! 266: "bb",
! 267: 1 },
! 268: { "b*",
! 269: "bb",
! 270: "bbbbbb",
! 271: "bb",
! 272: "bbbb",
! 273: 2 },
! 274: { "b*",
! 275: "bb",
! 276: "aaaaa",
! 277: "bbaaaaa",
! 278: "bbabbabbabbabbabb",
! 279: 6 },
! 280: { "b*",
! 281: "bb",
! 282: "aa\naa\n",
! 283: "bbaa\naa\n",
! 284: "bbabbabb\nbbabbabb\nbb",
! 285: 7 },
! 286: { "b*",
! 287: "bb",
! 288: "aa\raa\r",
! 289: "bbaa\raa\r",
! 290: "bbabbabb\rbbabbabb\rbb",
! 291: 7 },
! 292: { "b*",
! 293: "bb",
! 294: "aa\r\naa\r\n",
! 295: "bbaa\r\naa\r\n",
! 296: "bbabbabb\r\nbbabbabb\r\nbb",
! 297: 7 },
! 298: // Check empty-string matching (it's tricky!)
! 299: { "aa|b*",
! 300: "@",
! 301: "aa",
! 302: "@",
! 303: "@@",
! 304: 2 },
! 305: { "b*|aa",
! 306: "@",
! 307: "aa",
! 308: "@aa",
! 309: "@@@",
! 310: 3 },
! 311: #ifdef SUPPORT_UTF8
! 312: { "b*",
! 313: "bb",
! 314: "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
! 315: "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
! 316: "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
! 317: 5 },
! 318: { "b*",
! 319: "bb",
! 320: "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
! 321: "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
! 322: ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
! 323: "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
! 324: 9 },
! 325: #endif
! 326: { "", NULL, NULL, NULL, NULL, 0 }
! 327: };
! 328:
! 329: #ifdef SUPPORT_UTF8
! 330: const bool support_utf8 = true;
! 331: #else
! 332: const bool support_utf8 = false;
! 333: #endif
! 334:
! 335: for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
! 336: RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
! 337: assert(re.error().empty());
! 338: string one(t->original);
! 339: CHECK(re.Replace(t->rewrite, &one));
! 340: CHECK_EQ(one, t->single);
! 341: string all(t->original);
! 342: const int replace_count = re.GlobalReplace(t->rewrite, &all);
! 343: CHECK_EQ(all, t->global);
! 344: CHECK_EQ(replace_count, t->global_count);
! 345: }
! 346:
! 347: // One final test: test \r\n replacement when we're not in CRLF mode
! 348: {
! 349: RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
! 350: assert(re.error().empty());
! 351: string all("aa\r\naa\r\n");
! 352: CHECK_EQ(re.GlobalReplace("bb", &all), 9);
! 353: CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
! 354: }
! 355: {
! 356: RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
! 357: assert(re.error().empty());
! 358: string all("aa\r\naa\r\n");
! 359: CHECK_EQ(re.GlobalReplace("bb", &all), 9);
! 360: CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
! 361: }
! 362: // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
! 363: // Alas, the answer depends on how pcre was compiled.
! 364: }
! 365:
! 366: static void TestExtract() {
! 367: printf("Testing Extract\n");
! 368:
! 369: string s;
! 370:
! 371: CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
! 372: CHECK_EQ(s, "kremvax!boris");
! 373:
! 374: // check the RE interface as well
! 375: CHECK(RE(".*").Extract("'\\0'", "foo", &s));
! 376: CHECK_EQ(s, "'foo'");
! 377: CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
! 378: CHECK_EQ(s, "'foo'");
! 379: }
! 380:
! 381: static void TestConsume() {
! 382: printf("Testing Consume\n");
! 383:
! 384: string word;
! 385:
! 386: string s(" aaa b!@#$@#$cccc");
! 387: StringPiece input(s);
! 388:
! 389: RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
! 390: CHECK(r.Consume(&input, &word));
! 391: CHECK_EQ(word, "aaa");
! 392: CHECK(r.Consume(&input, &word));
! 393: CHECK_EQ(word, "b");
! 394: CHECK(! r.Consume(&input, &word));
! 395: }
! 396:
! 397: static void TestFindAndConsume() {
! 398: printf("Testing FindAndConsume\n");
! 399:
! 400: string word;
! 401:
! 402: string s(" aaa b!@#$@#$cccc");
! 403: StringPiece input(s);
! 404:
! 405: RE r("(\\w+)"); // matches a word
! 406: CHECK(r.FindAndConsume(&input, &word));
! 407: CHECK_EQ(word, "aaa");
! 408: CHECK(r.FindAndConsume(&input, &word));
! 409: CHECK_EQ(word, "b");
! 410: CHECK(r.FindAndConsume(&input, &word));
! 411: CHECK_EQ(word, "cccc");
! 412: CHECK(! r.FindAndConsume(&input, &word));
! 413: }
! 414:
! 415: static void TestMatchNumberPeculiarity() {
! 416: printf("Testing match-number peculiarity\n");
! 417:
! 418: string word1;
! 419: string word2;
! 420: string word3;
! 421:
! 422: RE r("(foo)|(bar)|(baz)");
! 423: CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
! 424: CHECK_EQ(word1, "foo");
! 425: CHECK_EQ(word2, "");
! 426: CHECK_EQ(word3, "");
! 427: CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
! 428: CHECK_EQ(word1, "");
! 429: CHECK_EQ(word2, "bar");
! 430: CHECK_EQ(word3, "");
! 431: CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
! 432: CHECK_EQ(word1, "");
! 433: CHECK_EQ(word2, "");
! 434: CHECK_EQ(word3, "baz");
! 435: CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
! 436:
! 437: string a;
! 438: CHECK(RE("(foo)|hello").FullMatch("hello", &a));
! 439: CHECK_EQ(a, "");
! 440: }
! 441:
! 442: static void TestRecursion() {
! 443: printf("Testing recursion\n");
! 444:
! 445: // Get one string that passes (sometimes), one that never does.
! 446: string text_good("abcdefghijk");
! 447: string text_bad("acdefghijkl");
! 448:
! 449: // According to pcretest, matching text_good against (\w+)*b
! 450: // requires match_limit of at least 8192, and match_recursion_limit
! 451: // of at least 37.
! 452:
! 453: RE_Options options_ml;
! 454: options_ml.set_match_limit(8192);
! 455: RE re("(\\w+)*b", options_ml);
! 456: CHECK(re.PartialMatch(text_good) == true);
! 457: CHECK(re.PartialMatch(text_bad) == false);
! 458: CHECK(re.FullMatch(text_good) == false);
! 459: CHECK(re.FullMatch(text_bad) == false);
! 460:
! 461: options_ml.set_match_limit(1024);
! 462: RE re2("(\\w+)*b", options_ml);
! 463: CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
! 464: CHECK(re2.PartialMatch(text_bad) == false);
! 465: CHECK(re2.FullMatch(text_good) == false);
! 466: CHECK(re2.FullMatch(text_bad) == false);
! 467:
! 468: RE_Options options_mlr;
! 469: options_mlr.set_match_limit_recursion(50);
! 470: RE re3("(\\w+)*b", options_mlr);
! 471: CHECK(re3.PartialMatch(text_good) == true);
! 472: CHECK(re3.PartialMatch(text_bad) == false);
! 473: CHECK(re3.FullMatch(text_good) == false);
! 474: CHECK(re3.FullMatch(text_bad) == false);
! 475:
! 476: options_mlr.set_match_limit_recursion(10);
! 477: RE re4("(\\w+)*b", options_mlr);
! 478: CHECK(re4.PartialMatch(text_good) == false);
! 479: CHECK(re4.PartialMatch(text_bad) == false);
! 480: CHECK(re4.FullMatch(text_good) == false);
! 481: CHECK(re4.FullMatch(text_bad) == false);
! 482: }
! 483:
! 484: // A meta-quoted string, interpreted as a pattern, should always match
! 485: // the original unquoted string.
! 486: static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
! 487: string quoted = RE::QuoteMeta(unquoted);
! 488: RE re(quoted, options);
! 489: CHECK(re.FullMatch(unquoted));
! 490: }
! 491:
! 492: // A string containing meaningful regexp characters, which is then meta-
! 493: // quoted, should not generally match a string the unquoted string does.
! 494: static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
! 495: RE_Options options = RE_Options()) {
! 496: string quoted = RE::QuoteMeta(unquoted);
! 497: RE re(quoted, options);
! 498: CHECK(!re.FullMatch(should_not_match));
! 499: }
! 500:
! 501: // Tests that quoted meta characters match their original strings,
! 502: // and that a few things that shouldn't match indeed do not.
! 503: static void TestQuotaMetaSimple() {
! 504: TestQuoteMeta("foo");
! 505: TestQuoteMeta("foo.bar");
! 506: TestQuoteMeta("foo\\.bar");
! 507: TestQuoteMeta("[1-9]");
! 508: TestQuoteMeta("1.5-2.0?");
! 509: TestQuoteMeta("\\d");
! 510: TestQuoteMeta("Who doesn't like ice cream?");
! 511: TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
! 512: TestQuoteMeta("((?!)xxx).*yyy");
! 513: TestQuoteMeta("([");
! 514: TestQuoteMeta(string("foo\0bar", 7));
! 515: }
! 516:
! 517: static void TestQuoteMetaSimpleNegative() {
! 518: NegativeTestQuoteMeta("foo", "bar");
! 519: NegativeTestQuoteMeta("...", "bar");
! 520: NegativeTestQuoteMeta("\\.", ".");
! 521: NegativeTestQuoteMeta("\\.", "..");
! 522: NegativeTestQuoteMeta("(a)", "a");
! 523: NegativeTestQuoteMeta("(a|b)", "a");
! 524: NegativeTestQuoteMeta("(a|b)", "(a)");
! 525: NegativeTestQuoteMeta("(a|b)", "a|b");
! 526: NegativeTestQuoteMeta("[0-9]", "0");
! 527: NegativeTestQuoteMeta("[0-9]", "0-9");
! 528: NegativeTestQuoteMeta("[0-9]", "[9]");
! 529: NegativeTestQuoteMeta("((?!)xxx)", "xxx");
! 530: }
! 531:
! 532: static void TestQuoteMetaLatin1() {
! 533: TestQuoteMeta("3\xb2 = 9");
! 534: }
! 535:
! 536: static void TestQuoteMetaUtf8() {
! 537: #ifdef SUPPORT_UTF8
! 538: TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
! 539: TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
! 540: TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
! 541: TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
! 542: TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
! 543: TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
! 544: TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
! 545: NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
! 546: "27\\\xc2\\\xb0",
! 547: pcrecpp::UTF8());
! 548: #endif
! 549: }
! 550:
! 551: static void TestQuoteMetaAll() {
! 552: printf("Testing QuoteMeta\n");
! 553: TestQuotaMetaSimple();
! 554: TestQuoteMetaSimpleNegative();
! 555: TestQuoteMetaLatin1();
! 556: TestQuoteMetaUtf8();
! 557: }
! 558:
! 559: //
! 560: // Options tests contributed by
! 561: // Giuseppe Maxia, CTO, Stardata s.r.l.
! 562: // July 2005
! 563: //
! 564: static void GetOneOptionResult(
! 565: const char *option_name,
! 566: const char *regex,
! 567: const char *str,
! 568: RE_Options options,
! 569: bool full,
! 570: string expected) {
! 571:
! 572: printf("Testing Option <%s>\n", option_name);
! 573: if(VERBOSE_TEST)
! 574: printf("/%s/ finds \"%s\" within \"%s\" \n",
! 575: regex,
! 576: expected.c_str(),
! 577: str);
! 578: string captured("");
! 579: if (full)
! 580: RE(regex,options).FullMatch(str, &captured);
! 581: else
! 582: RE(regex,options).PartialMatch(str, &captured);
! 583: CHECK_EQ(captured, expected);
! 584: }
! 585:
! 586: static void TestOneOption(
! 587: const char *option_name,
! 588: const char *regex,
! 589: const char *str,
! 590: RE_Options options,
! 591: bool full,
! 592: bool assertive = true) {
! 593:
! 594: printf("Testing Option <%s>\n", option_name);
! 595: if (VERBOSE_TEST)
! 596: printf("'%s' %s /%s/ \n",
! 597: str,
! 598: (assertive? "matches" : "doesn't match"),
! 599: regex);
! 600: if (assertive) {
! 601: if (full)
! 602: CHECK(RE(regex,options).FullMatch(str));
! 603: else
! 604: CHECK(RE(regex,options).PartialMatch(str));
! 605: } else {
! 606: if (full)
! 607: CHECK(!RE(regex,options).FullMatch(str));
! 608: else
! 609: CHECK(!RE(regex,options).PartialMatch(str));
! 610: }
! 611: }
! 612:
! 613: static void Test_CASELESS() {
! 614: RE_Options options;
! 615: RE_Options options2;
! 616:
! 617: options.set_caseless(true);
! 618: TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
! 619: TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
! 620: TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
! 621:
! 622: TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
! 623: TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
! 624: options.set_caseless(false);
! 625: TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
! 626: }
! 627:
! 628: static void Test_MULTILINE() {
! 629: RE_Options options;
! 630: RE_Options options2;
! 631: const char *str = "HELLO\n" "cruel\n" "world\n";
! 632:
! 633: options.set_multiline(true);
! 634: TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
! 635: TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
! 636: TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
! 637: options.set_multiline(false);
! 638: TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
! 639: }
! 640:
! 641: static void Test_DOTALL() {
! 642: RE_Options options;
! 643: RE_Options options2;
! 644: const char *str = "HELLO\n" "cruel\n" "world";
! 645:
! 646: options.set_dotall(true);
! 647: TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
! 648: TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
! 649: TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
! 650: options.set_dotall(false);
! 651: TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
! 652: }
! 653:
! 654: static void Test_DOLLAR_ENDONLY() {
! 655: RE_Options options;
! 656: RE_Options options2;
! 657: const char *str = "HELLO world\n";
! 658:
! 659: TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
! 660: options.set_dollar_endonly(true);
! 661: TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
! 662: TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
! 663: }
! 664:
! 665: static void Test_EXTRA() {
! 666: RE_Options options;
! 667: const char *str = "HELLO";
! 668:
! 669: options.set_extra(true);
! 670: TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
! 671: TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
! 672: options.set_extra(false);
! 673: TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
! 674: }
! 675:
! 676: static void Test_EXTENDED() {
! 677: RE_Options options;
! 678: RE_Options options2;
! 679: const char *str = "HELLO world";
! 680:
! 681: options.set_extended(true);
! 682: TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
! 683: TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
! 684: TestOneOption("EXTENDED (class)",
! 685: "^ HE L{2} O "
! 686: "\\s+ "
! 687: "\\w+ $ ",
! 688: str,
! 689: options,
! 690: false);
! 691:
! 692: TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
! 693: TestOneOption("EXTENDED (function)",
! 694: "^ HE L{2} O "
! 695: "\\s+ "
! 696: "\\w+ $ ",
! 697: str,
! 698: pcrecpp::EXTENDED(),
! 699: false);
! 700:
! 701: options.set_extended(false);
! 702: TestOneOption("no EXTENDED", "HELLO world", str, options, false);
! 703: }
! 704:
! 705: static void Test_NO_AUTO_CAPTURE() {
! 706: RE_Options options;
! 707: const char *str = "HELLO world";
! 708: string captured;
! 709:
! 710: printf("Testing Option <no NO_AUTO_CAPTURE>\n");
! 711: if (VERBOSE_TEST)
! 712: printf("parentheses capture text\n");
! 713: RE re("(world|universe)$", options);
! 714: CHECK(re.Extract("\\1", str , &captured));
! 715: CHECK_EQ(captured, "world");
! 716: options.set_no_auto_capture(true);
! 717: printf("testing Option <NO_AUTO_CAPTURE>\n");
! 718: if (VERBOSE_TEST)
! 719: printf("parentheses do not capture text\n");
! 720: re.Extract("\\1",str, &captured );
! 721: CHECK_EQ(captured, "world");
! 722: }
! 723:
! 724: static void Test_UNGREEDY() {
! 725: RE_Options options;
! 726: const char *str = "HELLO, 'this' is the 'world'";
! 727:
! 728: options.set_ungreedy(true);
! 729: GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
! 730: GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
! 731: GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
! 732:
! 733: options.set_ungreedy(false);
! 734: GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
! 735: GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
! 736: }
! 737:
! 738: static void Test_all_options() {
! 739: const char *str = "HELLO\n" "cruel\n" "world";
! 740: RE_Options options;
! 741: options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
! 742:
! 743: TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
! 744: options.set_all_options(0);
! 745: TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
! 746: options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
! 747:
! 748: TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
! 749: TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
! 750: " ^ c r u e l $ ",
! 751: str,
! 752: RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
! 753: false);
! 754:
! 755: TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
! 756: " ^ c r u e l $ ",
! 757: str,
! 758: RE_Options()
! 759: .set_multiline(true)
! 760: .set_extended(true),
! 761: false);
! 762:
! 763: options.set_all_options(0);
! 764: TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
! 765:
! 766: }
! 767:
! 768: static void TestOptions() {
! 769: printf("Testing Options\n");
! 770: Test_CASELESS();
! 771: Test_MULTILINE();
! 772: Test_DOTALL();
! 773: Test_DOLLAR_ENDONLY();
! 774: Test_EXTENDED();
! 775: Test_NO_AUTO_CAPTURE();
! 776: Test_UNGREEDY();
! 777: Test_EXTRA();
! 778: Test_all_options();
! 779: }
! 780:
! 781: static void TestConstructors() {
! 782: printf("Testing constructors\n");
! 783:
! 784: RE_Options options;
! 785: options.set_dotall(true);
! 786: const char *str = "HELLO\n" "cruel\n" "world";
! 787:
! 788: RE orig("HELLO.*world", options);
! 789: CHECK(orig.FullMatch(str));
! 790:
! 791: RE copy1(orig);
! 792: CHECK(copy1.FullMatch(str));
! 793:
! 794: RE copy2("not a match");
! 795: CHECK(!copy2.FullMatch(str));
! 796: copy2 = copy1;
! 797: CHECK(copy2.FullMatch(str));
! 798: copy2 = orig;
! 799: CHECK(copy2.FullMatch(str));
! 800:
! 801: // Make sure when we assign to ourselves, nothing bad happens
! 802: orig = orig;
! 803: copy1 = copy1;
! 804: copy2 = copy2;
! 805: CHECK(orig.FullMatch(str));
! 806: CHECK(copy1.FullMatch(str));
! 807: CHECK(copy2.FullMatch(str));
! 808: }
! 809:
! 810: int main(int argc, char** argv) {
! 811: // Treat any flag as --help
! 812: if (argc > 1 && argv[1][0] == '-') {
! 813: printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
! 814: " If 'timingX ###' is specified, run the given timing test\n"
! 815: " with the given number of iterations, rather than running\n"
! 816: " the default corectness test.\n", argv[0]);
! 817: return 0;
! 818: }
! 819:
! 820: if (argc > 1) {
! 821: if ( argc == 2 || atoi(argv[2]) == 0) {
! 822: printf("timing mode needs a num-iters argument\n");
! 823: return 1;
! 824: }
! 825: if (!strcmp(argv[1], "timing1"))
! 826: Timing1(atoi(argv[2]));
! 827: else if (!strcmp(argv[1], "timing2"))
! 828: Timing2(atoi(argv[2]));
! 829: else if (!strcmp(argv[1], "timing3"))
! 830: Timing3(atoi(argv[2]));
! 831: else
! 832: printf("Unknown argument '%s'\n", argv[1]);
! 833: return 0;
! 834: }
! 835:
! 836: printf("PCRE C++ wrapper tests\n");
! 837: printf("Testing FullMatch\n");
! 838:
! 839: int i;
! 840: string s;
! 841:
! 842: /***** FullMatch with no args *****/
! 843:
! 844: CHECK(RE("h.*o").FullMatch("hello"));
! 845: CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
! 846: CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
! 847: CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
! 848: CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
! 849: CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
! 850:
! 851: /***** FullMatch with args *****/
! 852:
! 853: // Zero-arg
! 854: CHECK(RE("\\d+").FullMatch("1001"));
! 855:
! 856: // Single-arg
! 857: CHECK(RE("(\\d+)").FullMatch("1001", &i));
! 858: CHECK_EQ(i, 1001);
! 859: CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
! 860: CHECK_EQ(i, -123);
! 861: CHECK(!RE("()\\d+").FullMatch("10", &i));
! 862: CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
! 863: &i));
! 864:
! 865: // Digits surrounding integer-arg
! 866: CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
! 867: CHECK_EQ(i, 23);
! 868: CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
! 869: CHECK_EQ(i, 1);
! 870: CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
! 871: CHECK_EQ(i, -1);
! 872: CHECK(RE("(\\d)").PartialMatch("1234", &i));
! 873: CHECK_EQ(i, 1);
! 874: CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
! 875: CHECK_EQ(i, -1);
! 876:
! 877: // String-arg
! 878: CHECK(RE("h(.*)o").FullMatch("hello", &s));
! 879: CHECK_EQ(s, string("ell"));
! 880:
! 881: // StringPiece-arg
! 882: StringPiece sp;
! 883: CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
! 884: CHECK_EQ(sp.size(), 4);
! 885: CHECK(memcmp(sp.data(), "ruby", 4) == 0);
! 886: CHECK_EQ(i, 1234);
! 887:
! 888: // Multi-arg
! 889: CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
! 890: CHECK_EQ(s, string("ruby"));
! 891: CHECK_EQ(i, 1234);
! 892:
! 893: // Ignore non-void* NULL arg
! 894: CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
! 895: CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
! 896: CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
! 897: CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
! 898: #ifdef HAVE_LONG_LONG
! 899: CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
! 900: #endif
! 901: CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
! 902: CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
! 903:
! 904: // Fail on non-void* NULL arg if the match doesn't parse for the given type.
! 905: CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
! 906: CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
! 907: CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
! 908: CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
! 909: CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
! 910:
! 911: // Ignored arg
! 912: CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
! 913: CHECK_EQ(s, string("ruby"));
! 914: CHECK_EQ(i, 1234);
! 915:
! 916: // Type tests
! 917: {
! 918: char c;
! 919: CHECK(RE("(H)ello").FullMatch("Hello", &c));
! 920: CHECK_EQ(c, 'H');
! 921: }
! 922: {
! 923: unsigned char c;
! 924: CHECK(RE("(H)ello").FullMatch("Hello", &c));
! 925: CHECK_EQ(c, static_cast<unsigned char>('H'));
! 926: }
! 927: {
! 928: short v;
! 929: CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
! 930: CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
! 931: CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
! 932: CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
! 933: CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
! 934: CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
! 935: }
! 936: {
! 937: unsigned short v;
! 938: CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
! 939: CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
! 940: CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
! 941: CHECK(!RE("(\\d+)").FullMatch("65536", &v));
! 942: }
! 943: {
! 944: int v;
! 945: static const int max_value = 0x7fffffff;
! 946: static const int min_value = -max_value - 1;
! 947: CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
! 948: CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
! 949: CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
! 950: CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
! 951: CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
! 952: CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
! 953: }
! 954: {
! 955: unsigned int v;
! 956: static const unsigned int max_value = 0xfffffffful;
! 957: CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
! 958: CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
! 959: CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
! 960: }
! 961: #ifdef HAVE_LONG_LONG
! 962: # if defined(__MINGW__) || defined(__MINGW32__)
! 963: # define LLD "%I64d"
! 964: # define LLU "%I64u"
! 965: # else
! 966: # define LLD "%lld"
! 967: # define LLU "%llu"
! 968: # endif
! 969: {
! 970: long long v;
! 971: static const long long max_value = 0x7fffffffffffffffLL;
! 972: static const long long min_value = -max_value - 1;
! 973: char buf[32]; // definitely big enough for a long long
! 974:
! 975: CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
! 976: CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
! 977:
! 978: sprintf(buf, LLD, max_value);
! 979: CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
! 980:
! 981: sprintf(buf, LLD, min_value);
! 982: CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
! 983:
! 984: sprintf(buf, LLD, max_value);
! 985: assert(buf[strlen(buf)-1] != '9');
! 986: buf[strlen(buf)-1]++;
! 987: CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
! 988:
! 989: sprintf(buf, LLD, min_value);
! 990: assert(buf[strlen(buf)-1] != '9');
! 991: buf[strlen(buf)-1]++;
! 992: CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
! 993: }
! 994: #endif
! 995: #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
! 996: {
! 997: unsigned long long v;
! 998: long long v2;
! 999: static const unsigned long long max_value = 0xffffffffffffffffULL;
! 1000: char buf[32]; // definitely big enough for a unsigned long long
! 1001:
! 1002: CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
! 1003: CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
! 1004:
! 1005: sprintf(buf, LLU, max_value);
! 1006: CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
! 1007:
! 1008: assert(buf[strlen(buf)-1] != '9');
! 1009: buf[strlen(buf)-1]++;
! 1010: CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
! 1011: }
! 1012: #endif
! 1013: {
! 1014: float v;
! 1015: CHECK(RE("(.*)").FullMatch("100", &v));
! 1016: CHECK(RE("(.*)").FullMatch("-100.", &v));
! 1017: CHECK(RE("(.*)").FullMatch("1e23", &v));
! 1018: }
! 1019: {
! 1020: double v;
! 1021: CHECK(RE("(.*)").FullMatch("100", &v));
! 1022: CHECK(RE("(.*)").FullMatch("-100.", &v));
! 1023: CHECK(RE("(.*)").FullMatch("1e23", &v));
! 1024: }
! 1025:
! 1026: // Check that matching is fully anchored
! 1027: CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
! 1028: CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
! 1029: CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
! 1030: CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
! 1031:
! 1032: // Braces
! 1033: CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
! 1034: CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
! 1035: CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
! 1036:
! 1037: // Complicated RE
! 1038: CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
! 1039: CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
! 1040: CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
! 1041: CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
! 1042:
! 1043: // Check full-match handling (needs '$' tacked on internally)
! 1044: CHECK(RE("fo|foo").FullMatch("fo"));
! 1045: CHECK(RE("fo|foo").FullMatch("foo"));
! 1046: CHECK(RE("fo|foo$").FullMatch("fo"));
! 1047: CHECK(RE("fo|foo$").FullMatch("foo"));
! 1048: CHECK(RE("foo$").FullMatch("foo"));
! 1049: CHECK(!RE("foo\\$").FullMatch("foo$bar"));
! 1050: CHECK(!RE("fo|bar").FullMatch("fox"));
! 1051:
! 1052: // Uncomment the following if we change the handling of '$' to
! 1053: // prevent it from matching a trailing newline
! 1054: if (false) {
! 1055: // Check that we don't get bitten by pcre's special handling of a
! 1056: // '\n' at the end of the string matching '$'
! 1057: CHECK(!RE("foo$").PartialMatch("foo\n"));
! 1058: }
! 1059:
! 1060: // Number of args
! 1061: int a[16];
! 1062: CHECK(RE("").FullMatch(""));
! 1063:
! 1064: memset(a, 0, sizeof(0));
! 1065: CHECK(RE("(\\d){1}").FullMatch("1",
! 1066: &a[0]));
! 1067: CHECK_EQ(a[0], 1);
! 1068:
! 1069: memset(a, 0, sizeof(0));
! 1070: CHECK(RE("(\\d)(\\d)").FullMatch("12",
! 1071: &a[0], &a[1]));
! 1072: CHECK_EQ(a[0], 1);
! 1073: CHECK_EQ(a[1], 2);
! 1074:
! 1075: memset(a, 0, sizeof(0));
! 1076: CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
! 1077: &a[0], &a[1], &a[2]));
! 1078: CHECK_EQ(a[0], 1);
! 1079: CHECK_EQ(a[1], 2);
! 1080: CHECK_EQ(a[2], 3);
! 1081:
! 1082: memset(a, 0, sizeof(0));
! 1083: CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
! 1084: &a[0], &a[1], &a[2], &a[3]));
! 1085: CHECK_EQ(a[0], 1);
! 1086: CHECK_EQ(a[1], 2);
! 1087: CHECK_EQ(a[2], 3);
! 1088: CHECK_EQ(a[3], 4);
! 1089:
! 1090: memset(a, 0, sizeof(0));
! 1091: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
! 1092: &a[0], &a[1], &a[2],
! 1093: &a[3], &a[4]));
! 1094: CHECK_EQ(a[0], 1);
! 1095: CHECK_EQ(a[1], 2);
! 1096: CHECK_EQ(a[2], 3);
! 1097: CHECK_EQ(a[3], 4);
! 1098: CHECK_EQ(a[4], 5);
! 1099:
! 1100: memset(a, 0, sizeof(0));
! 1101: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
! 1102: &a[0], &a[1], &a[2],
! 1103: &a[3], &a[4], &a[5]));
! 1104: CHECK_EQ(a[0], 1);
! 1105: CHECK_EQ(a[1], 2);
! 1106: CHECK_EQ(a[2], 3);
! 1107: CHECK_EQ(a[3], 4);
! 1108: CHECK_EQ(a[4], 5);
! 1109: CHECK_EQ(a[5], 6);
! 1110:
! 1111: memset(a, 0, sizeof(0));
! 1112: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
! 1113: &a[0], &a[1], &a[2], &a[3],
! 1114: &a[4], &a[5], &a[6]));
! 1115: CHECK_EQ(a[0], 1);
! 1116: CHECK_EQ(a[1], 2);
! 1117: CHECK_EQ(a[2], 3);
! 1118: CHECK_EQ(a[3], 4);
! 1119: CHECK_EQ(a[4], 5);
! 1120: CHECK_EQ(a[5], 6);
! 1121: CHECK_EQ(a[6], 7);
! 1122:
! 1123: memset(a, 0, sizeof(0));
! 1124: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
! 1125: "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
! 1126: "1234567890123456",
! 1127: &a[0], &a[1], &a[2], &a[3],
! 1128: &a[4], &a[5], &a[6], &a[7],
! 1129: &a[8], &a[9], &a[10], &a[11],
! 1130: &a[12], &a[13], &a[14], &a[15]));
! 1131: CHECK_EQ(a[0], 1);
! 1132: CHECK_EQ(a[1], 2);
! 1133: CHECK_EQ(a[2], 3);
! 1134: CHECK_EQ(a[3], 4);
! 1135: CHECK_EQ(a[4], 5);
! 1136: CHECK_EQ(a[5], 6);
! 1137: CHECK_EQ(a[6], 7);
! 1138: CHECK_EQ(a[7], 8);
! 1139: CHECK_EQ(a[8], 9);
! 1140: CHECK_EQ(a[9], 0);
! 1141: CHECK_EQ(a[10], 1);
! 1142: CHECK_EQ(a[11], 2);
! 1143: CHECK_EQ(a[12], 3);
! 1144: CHECK_EQ(a[13], 4);
! 1145: CHECK_EQ(a[14], 5);
! 1146: CHECK_EQ(a[15], 6);
! 1147:
! 1148: /***** PartialMatch *****/
! 1149:
! 1150: printf("Testing PartialMatch\n");
! 1151:
! 1152: CHECK(RE("h.*o").PartialMatch("hello"));
! 1153: CHECK(RE("h.*o").PartialMatch("othello"));
! 1154: CHECK(RE("h.*o").PartialMatch("hello!"));
! 1155: CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
! 1156:
! 1157: /***** other tests *****/
! 1158:
! 1159: RadixTests();
! 1160: TestReplace();
! 1161: TestExtract();
! 1162: TestConsume();
! 1163: TestFindAndConsume();
! 1164: TestQuoteMetaAll();
! 1165: TestMatchNumberPeculiarity();
! 1166:
! 1167: // Check the pattern() accessor
! 1168: {
! 1169: const string kPattern = "http://([^/]+)/.*";
! 1170: const RE re(kPattern);
! 1171: CHECK_EQ(kPattern, re.pattern());
! 1172: }
! 1173:
! 1174: // Check RE error field.
! 1175: {
! 1176: RE re("foo");
! 1177: CHECK(re.error().empty()); // Must have no error
! 1178: }
! 1179:
! 1180: #ifdef SUPPORT_UTF8
! 1181: // Check UTF-8 handling
! 1182: {
! 1183: printf("Testing UTF-8 handling\n");
! 1184:
! 1185: // Three Japanese characters (nihongo)
! 1186: const unsigned char utf8_string[] = {
! 1187: 0xe6, 0x97, 0xa5, // 65e5
! 1188: 0xe6, 0x9c, 0xac, // 627c
! 1189: 0xe8, 0xaa, 0x9e, // 8a9e
! 1190: 0
! 1191: };
! 1192: const unsigned char utf8_pattern[] = {
! 1193: '.',
! 1194: 0xe6, 0x9c, 0xac, // 627c
! 1195: '.',
! 1196: 0
! 1197: };
! 1198:
! 1199: // Both should match in either mode, bytes or UTF-8
! 1200: RE re_test1(".........");
! 1201: CHECK(re_test1.FullMatch(utf8_string));
! 1202: RE re_test2("...", pcrecpp::UTF8());
! 1203: CHECK(re_test2.FullMatch(utf8_string));
! 1204:
! 1205: // Check that '.' matches one byte or UTF-8 character
! 1206: // according to the mode.
! 1207: string ss;
! 1208: RE re_test3("(.)");
! 1209: CHECK(re_test3.PartialMatch(utf8_string, &ss));
! 1210: CHECK_EQ(ss, string("\xe6"));
! 1211: RE re_test4("(.)", pcrecpp::UTF8());
! 1212: CHECK(re_test4.PartialMatch(utf8_string, &ss));
! 1213: CHECK_EQ(ss, string("\xe6\x97\xa5"));
! 1214:
! 1215: // Check that string matches itself in either mode
! 1216: RE re_test5(utf8_string);
! 1217: CHECK(re_test5.FullMatch(utf8_string));
! 1218: RE re_test6(utf8_string, pcrecpp::UTF8());
! 1219: CHECK(re_test6.FullMatch(utf8_string));
! 1220:
! 1221: // Check that pattern matches string only in UTF8 mode
! 1222: RE re_test7(utf8_pattern);
! 1223: CHECK(!re_test7.FullMatch(utf8_string));
! 1224: RE re_test8(utf8_pattern, pcrecpp::UTF8());
! 1225: CHECK(re_test8.FullMatch(utf8_string));
! 1226: }
! 1227:
! 1228: // Check that ungreedy, UTF8 regular expressions don't match when they
! 1229: // oughtn't -- see bug 82246.
! 1230: {
! 1231: // This code always worked.
! 1232: const char* pattern = "\\w+X";
! 1233: const string target = "a aX";
! 1234: RE match_sentence(pattern);
! 1235: RE match_sentence_re(pattern, pcrecpp::UTF8());
! 1236:
! 1237: CHECK(!match_sentence.FullMatch(target));
! 1238: CHECK(!match_sentence_re.FullMatch(target));
! 1239: }
! 1240:
! 1241: {
! 1242: const char* pattern = "(?U)\\w+X";
! 1243: const string target = "a aX";
! 1244: RE match_sentence(pattern);
! 1245: RE match_sentence_re(pattern, pcrecpp::UTF8());
! 1246:
! 1247: CHECK(!match_sentence.FullMatch(target));
! 1248: CHECK(!match_sentence_re.FullMatch(target));
! 1249: }
! 1250: #endif /* def SUPPORT_UTF8 */
! 1251:
! 1252: printf("Testing error reporting\n");
! 1253:
! 1254: { RE re("a\\1"); CHECK(!re.error().empty()); }
! 1255: {
! 1256: RE re("a[x");
! 1257: CHECK(!re.error().empty());
! 1258: }
! 1259: {
! 1260: RE re("a[z-a]");
! 1261: CHECK(!re.error().empty());
! 1262: }
! 1263: {
! 1264: RE re("a[[:foobar:]]");
! 1265: CHECK(!re.error().empty());
! 1266: }
! 1267: {
! 1268: RE re("a(b");
! 1269: CHECK(!re.error().empty());
! 1270: }
! 1271: {
! 1272: RE re("a\\");
! 1273: CHECK(!re.error().empty());
! 1274: }
! 1275:
! 1276: // Test that recursion is stopped
! 1277: TestRecursion();
! 1278:
! 1279: // Test Options
! 1280: if (getenv("VERBOSE_TEST") != NULL)
! 1281: VERBOSE_TEST = true;
! 1282: TestOptions();
! 1283:
! 1284: // Test the constructors
! 1285: TestConstructors();
! 1286:
! 1287: // Done
! 1288: printf("OK\n");
! 1289:
! 1290: return 0;
! 1291: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>