Return to html_entity_decode_macroman.phpt CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / standard / tests / strings |
1.1 misho 1: --TEST-- 2: Translation of HTML entities for encoding MacRoman 3: --FILE-- 4: <?php 5: $arr = array( 6: 0x00C4 => array(0x80, "LATIN CAPITAL LETTER A WITH DIAERESIS"), 7: 0x00C5 => array(0x81, "LATIN CAPITAL LETTER A WITH RING ABOVE"), 8: 0x00C7 => array(0x82, "LATIN CAPITAL LETTER C WITH CEDILLA"), 9: 0x00C9 => array(0x83, "LATIN CAPITAL LETTER E WITH ACUTE"), 10: 0x00D1 => array(0x84, "LATIN CAPITAL LETTER N WITH TILDE"), 11: 0x00D6 => array(0x85, "LATIN CAPITAL LETTER O WITH DIAERESIS"), 12: 0x00DC => array(0x86, "LATIN CAPITAL LETTER U WITH DIAERESIS"), 13: 0x00E1 => array(0x87, "LATIN SMALL LETTER A WITH ACUTE"), 14: 0x00E0 => array(0x88, "LATIN SMALL LETTER A WITH GRAVE"), 15: 0x00E2 => array(0x89, "LATIN SMALL LETTER A WITH CIRCUMFLEX"), 16: 0x00E4 => array(0x8A, "LATIN SMALL LETTER A WITH DIAERESIS"), 17: 0x00E3 => array(0x8B, "LATIN SMALL LETTER A WITH TILDE"), 18: 0x00E5 => array(0x8C, "LATIN SMALL LETTER A WITH RING ABOVE"), 19: 0x00E7 => array(0x8D, "LATIN SMALL LETTER C WITH CEDILLA"), 20: 0x00E9 => array(0x8E, "LATIN SMALL LETTER E WITH ACUTE"), 21: 0x00E8 => array(0x8F, "LATIN SMALL LETTER E WITH GRAVE"), 22: 0x00EA => array(0x90, "LATIN SMALL LETTER E WITH CIRCUMFLEX"), 23: 0x00EB => array(0x91, "LATIN SMALL LETTER E WITH DIAERESIS"), 24: 0x00ED => array(0x92, "LATIN SMALL LETTER I WITH ACUTE"), 25: 0x00EC => array(0x93, "LATIN SMALL LETTER I WITH GRAVE"), 26: 0x00EE => array(0x94, "LATIN SMALL LETTER I WITH CIRCUMFLEX"), 27: 0x00EF => array(0x95, "LATIN SMALL LETTER I WITH DIAERESIS"), 28: 0x00F1 => array(0x96, "LATIN SMALL LETTER N WITH TILDE"), 29: 0x00F3 => array(0x97, "LATIN SMALL LETTER O WITH ACUTE"), 30: 0x00F2 => array(0x98, "LATIN SMALL LETTER O WITH GRAVE"), 31: 0x00F4 => array(0x99, "LATIN SMALL LETTER O WITH CIRCUMFLEX"), 32: 0x00F6 => array(0x9A, "LATIN SMALL LETTER O WITH DIAERESIS"), 33: 0x00F5 => array(0x9B, "LATIN SMALL LETTER O WITH TILDE"), 34: 0x00FA => array(0x9C, "LATIN SMALL LETTER U WITH ACUTE"), 35: 0x00F9 => array(0x9D, "LATIN SMALL LETTER U WITH GRAVE"), 36: 0x00FB => array(0x9E, "LATIN SMALL LETTER U WITH CIRCUMFLEX"), 37: 0x00FC => array(0x9F, "LATIN SMALL LETTER U WITH DIAERESIS"), 38: 0x2020 => array(0xA0, "DAGGER"), 39: 0x00B0 => array(0xA1, "DEGREE SIGN"), 40: 0x00A2 => array(0xA2, "CENT SIGN"), 41: 0x00A3 => array(0xA3, "POUND SIGN"), 42: 0x00A7 => array(0xA4, "SECTION SIGN"), 43: 0x2022 => array(0xA5, "BULLET"), 44: 0x00B6 => array(0xA6, "PILCROW SIGN"), 45: 0x00DF => array(0xA7, "LATIN SMALL LETTER SHARP S"), 46: 0x00AE => array(0xA8, "REGISTERED SIGN"), 47: 0x00A9 => array(0xA9, "COPYRIGHT SIGN"), 48: 0x2122 => array(0xAA, "TRADE MARK SIGN"), 49: 0x00B4 => array(0xAB, "ACUTE ACCENT"), 50: 0x00A8 => array(0xAC, "DIAERESIS"), 51: 0x2260 => array(0xAD, "NOT EQUAL TO"), 52: 0x00C6 => array(0xAE, "LATIN CAPITAL LETTER AE"), 53: 0x00D8 => array(0xAF, "LATIN CAPITAL LETTER O WITH STROKE"), 54: 0x221E => array(0xB0, "INFINITY"), 55: 0x00B1 => array(0xB1, "PLUS-MINUS SIGN"), 56: 0x2264 => array(0xB2, "LESS-THAN OR EQUAL TO"), 57: 0x2265 => array(0xB3, "GREATER-THAN OR EQUAL TO"), 58: 0x00A5 => array(0xB4, "YEN SIGN"), 59: 0x00B5 => array(0xB5, "MICRO SIGN"), 60: 0x2202 => array(0xB6, "PARTIAL DIFFERENTIAL"), 61: 0x2211 => array(0xB7, "N-ARY SUMMATION"), 62: 0x220F => array(0xB8, "N-ARY PRODUCT"), 63: 0x03C0 => array(0xB9, "GREEK SMALL LETTER PI"), 64: 0x222B => array(0xBA, "INTEGRAL"), 65: 0x00AA => array(0xBB, "FEMININE ORDINAL INDICATOR"), 66: 0x00BA => array(0xBC, "MASCULINE ORDINAL INDICATOR"), 67: 0x03A9 => array(0xBD, "GREEK CAPITAL LETTER OMEGA"), 68: 0x00E6 => array(0xBE, "LATIN SMALL LETTER AE"), 69: 0x00F8 => array(0xBF, "LATIN SMALL LETTER O WITH STROKE"), 70: 0x00BF => array(0xC0, "INVERTED QUESTION MARK"), 71: 0x00A1 => array(0xC1, "INVERTED EXCLAMATION MARK"), 72: 0x00AC => array(0xC2, "NOT SIGN"), 73: 0x221A => array(0xC3, "SQUARE ROOT"), 74: 0x0192 => array(0xC4, "LATIN SMALL LETTER F WITH HOOK"), 75: 0x2248 => array(0xC5, "ALMOST EQUAL TO"), 76: 0x2206 => array(0xC6, "INCREMENT"), 77: 0x00AB => array(0xC7, "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"), 78: 0x00BB => array(0xC8, "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"), 79: 0x2026 => array(0xC9, "HORIZONTAL ELLIPSIS"), 80: 0x00A0 => array(0xCA, "NO-BREAK SPACE"), 81: 0x00C0 => array(0xCB, "LATIN CAPITAL LETTER A WITH GRAVE"), 82: 0x00C3 => array(0xCC, "LATIN CAPITAL LETTER A WITH TILDE"), 83: 0x00D5 => array(0xCD, "LATIN CAPITAL LETTER O WITH TILDE"), 84: 0x0152 => array(0xCE, "LATIN CAPITAL LIGATURE OE"), 85: 0x0153 => array(0xCF, "LATIN SMALL LIGATURE OE"), 86: 0x2013 => array(0xD0, "EN DASH"), 87: 0x2014 => array(0xD1, "EM DASH"), 88: 0x201C => array(0xD2, "LEFT DOUBLE QUOTATION MARK"), 89: 0x201D => array(0xD3, "RIGHT DOUBLE QUOTATION MARK"), 90: 0x2018 => array(0xD4, "LEFT SINGLE QUOTATION MARK"), 91: 0x2019 => array(0xD5, "RIGHT SINGLE QUOTATION MARK"), 92: 0x00F7 => array(0xD6, "DIVISION SIGN"), 93: 0x25CA => array(0xD7, "LOZENGE"), 94: 0x00FF => array(0xD8, "LATIN SMALL LETTER Y WITH DIAERESIS"), 95: 0x0178 => array(0xD9, "LATIN CAPITAL LETTER Y WITH DIAERESIS"), 96: 0x2044 => array(0xDA, "FRACTION SLASH"), 97: 0x20AC => array(0xDB, "EURO SIGN"), 98: 0x2039 => array(0xDC, "SINGLE LEFT-POINTING ANGLE QUOTATION MARK"), 99: 0x203A => array(0xDD, "SINGLE RIGHT-POINTING ANGLE QUOTATION MARK"), 100: 0xFB01 => array(0xDE, "LATIN SMALL LIGATURE FI"), 101: 0xFB02 => array(0xDF, "LATIN SMALL LIGATURE FL"), 102: 0x2021 => array(0xE0, "DOUBLE DAGGER"), 103: 0x00B7 => array(0xE1, "MIDDLE DOT"), 104: 0x201A => array(0xE2, "SINGLE LOW-9 QUOTATION MARK"), 105: 0x201E => array(0xE3, "DOUBLE LOW-9 QUOTATION MARK"), 106: 0x2030 => array(0xE4, "PER MILLE SIGN"), 107: 0x00C2 => array(0xE5, "LATIN CAPITAL LETTER A WITH CIRCUMFLEX"), 108: 0x00CA => array(0xE6, "LATIN CAPITAL LETTER E WITH CIRCUMFLEX"), 109: 0x00C1 => array(0xE7, "LATIN CAPITAL LETTER A WITH ACUTE"), 110: 0x00CB => array(0xE8, "LATIN CAPITAL LETTER E WITH DIAERESIS"), 111: 0x00C8 => array(0xE9, "LATIN CAPITAL LETTER E WITH GRAVE"), 112: 0x00CD => array(0xEA, "LATIN CAPITAL LETTER I WITH ACUTE"), 113: 0x00CE => array(0xEB, "LATIN CAPITAL LETTER I WITH CIRCUMFLEX"), 114: 0x00CF => array(0xEC, "LATIN CAPITAL LETTER I WITH DIAERESIS"), 115: 0x00CC => array(0xED, "LATIN CAPITAL LETTER I WITH GRAVE"), 116: 0x00D3 => array(0xEE, "LATIN CAPITAL LETTER O WITH ACUTE"), 117: 0x00D4 => array(0xEF, "LATIN CAPITAL LETTER O WITH CIRCUMFLEX"), 118: 0xF8FF => array(0xF0, "Apple logo"), 119: 0x00D2 => array(0xF1, "LATIN CAPITAL LETTER O WITH GRAVE"), 120: 0x00DA => array(0xF2, "LATIN CAPITAL LETTER U WITH ACUTE"), 121: 0x00DB => array(0xF3, "LATIN CAPITAL LETTER U WITH CIRCUMFLEX"), 122: 0x00D9 => array(0xF4, "LATIN CAPITAL LETTER U WITH GRAVE"), 123: 0x0131 => array(0xF5, "LATIN SMALL LETTER DOTLESS I"), 124: 0x02C6 => array(0xF6, "MODIFIER LETTER CIRCUMFLEX ACCENT"), 125: 0x02DC => array(0xF7, "SMALL TILDE"), 126: 0x00AF => array(0xF8, "MACRON"), 127: 0x02D8 => array(0xF9, "BREVE"), 128: 0x02D9 => array(0xFA, "DOT ABOVE"), 129: 0x02DA => array(0xFB, "RING ABOVE"), 130: 0x00B8 => array(0xFC, "CEDILLA"), 131: 0x02DD => array(0xFD, "DOUBLE ACUTE ACCENT"), 132: 0x02DB => array(0xFE, "OGONEK"), 133: 0x02C7 => array(0xFF, "CARON"), 134: ); 135: 136: $res = html_entity_decode("", ENT_QUOTES, 'MacRoman'); 137: echo "Special test for  (shouldn't decode):\n"; 138: echo $res,"\n\n"; 139: 140: foreach ($arr as $u => $v) { 141: $ent = sprintf("&#x%X;", $u); 142: $res = html_entity_decode($ent, ENT_QUOTES, 'MacRoman'); 143: $d = unpack("H*", $res); 144: echo sprintf("%s: %s => %s\n", $v[1], $ent, $d[1]); 145: 146: $ent = sprintf("&#x%X;", $v[0]); 147: $res = html_entity_decode($ent, ENT_QUOTES, 'MacRoman'); 148: if ($res[0] != "&" || $res[1] != "#") 149: $res = unpack("H*", $res)[1]; 150: echo sprintf("%s => %s\n\n", $ent, $res); 151: } 152: --EXPECT-- 153: Special test for  (shouldn't decode): 154:  155: 156: LATIN CAPITAL LETTER A WITH DIAERESIS: Ä => 80 157: € => € 158: 159: LATIN CAPITAL LETTER A WITH RING ABOVE: Å => 81 160:  =>  161: 162: LATIN CAPITAL LETTER C WITH CEDILLA: Ç => 82 163: ‚ => ‚ 164: 165: LATIN CAPITAL LETTER E WITH ACUTE: É => 83 166: ƒ => ƒ 167: 168: LATIN CAPITAL LETTER N WITH TILDE: Ñ => 84 169: „ => „ 170: 171: LATIN CAPITAL LETTER O WITH DIAERESIS: Ö => 85 172: … => … 173: 174: LATIN CAPITAL LETTER U WITH DIAERESIS: Ü => 86 175: † => † 176: 177: LATIN SMALL LETTER A WITH ACUTE: á => 87 178: ‡ => ‡ 179: 180: LATIN SMALL LETTER A WITH GRAVE: à => 88 181: ˆ => ˆ 182: 183: LATIN SMALL LETTER A WITH CIRCUMFLEX: â => 89 184: ‰ => ‰ 185: 186: LATIN SMALL LETTER A WITH DIAERESIS: ä => 8a 187: Š => Š 188: 189: LATIN SMALL LETTER A WITH TILDE: ã => 8b 190: ‹ => ‹ 191: 192: LATIN SMALL LETTER A WITH RING ABOVE: å => 8c 193: Œ => Œ 194: 195: LATIN SMALL LETTER C WITH CEDILLA: ç => 8d 196:  =>  197: 198: LATIN SMALL LETTER E WITH ACUTE: é => 8e 199: Ž => Ž 200: 201: LATIN SMALL LETTER E WITH GRAVE: è => 8f 202:  =>  203: 204: LATIN SMALL LETTER E WITH CIRCUMFLEX: ê => 90 205:  =>  206: 207: LATIN SMALL LETTER E WITH DIAERESIS: ë => 91 208: ‘ => ‘ 209: 210: LATIN SMALL LETTER I WITH ACUTE: í => 92 211: ’ => ’ 212: 213: LATIN SMALL LETTER I WITH GRAVE: ì => 93 214: “ => “ 215: 216: LATIN SMALL LETTER I WITH CIRCUMFLEX: î => 94 217: ” => ” 218: 219: LATIN SMALL LETTER I WITH DIAERESIS: ï => 95 220: • => • 221: 222: LATIN SMALL LETTER N WITH TILDE: ñ => 96 223: – => – 224: 225: LATIN SMALL LETTER O WITH ACUTE: ó => 97 226: — => — 227: 228: LATIN SMALL LETTER O WITH GRAVE: ò => 98 229: ˜ => ˜ 230: 231: LATIN SMALL LETTER O WITH CIRCUMFLEX: ô => 99 232: ™ => ™ 233: 234: LATIN SMALL LETTER O WITH DIAERESIS: ö => 9a 235: š => š 236: 237: LATIN SMALL LETTER O WITH TILDE: õ => 9b 238: › => › 239: 240: LATIN SMALL LETTER U WITH ACUTE: ú => 9c 241: œ => œ 242: 243: LATIN SMALL LETTER U WITH GRAVE: ù => 9d 244:  =>  245: 246: LATIN SMALL LETTER U WITH CIRCUMFLEX: û => 9e 247: ž => ž 248: 249: LATIN SMALL LETTER U WITH DIAERESIS: ü => 9f 250: Ÿ => Ÿ 251: 252: DAGGER: † => a0 253:   => ca 254: 255: DEGREE SIGN: ° => a1 256: ¡ => c1 257: 258: CENT SIGN: ¢ => a2 259: ¢ => a2 260: 261: POUND SIGN: £ => a3 262: £ => a3 263: 264: SECTION SIGN: § => a4 265: ¤ => ¤ 266: 267: BULLET: • => a5 268: ¥ => b4 269: 270: PILCROW SIGN: ¶ => a6 271: ¦ => ¦ 272: 273: LATIN SMALL LETTER SHARP S: ß => a7 274: § => a4 275: 276: REGISTERED SIGN: ® => a8 277: ¨ => ac 278: 279: COPYRIGHT SIGN: © => a9 280: © => a9 281: 282: TRADE MARK SIGN: ™ => aa 283: ª => bb 284: 285: ACUTE ACCENT: ´ => ab 286: « => c7 287: 288: DIAERESIS: ¨ => ac 289: ¬ => c2 290: 291: NOT EQUAL TO: ≠ => ad 292: ­ => ­ 293: 294: LATIN CAPITAL LETTER AE: Æ => ae 295: ® => a8 296: 297: LATIN CAPITAL LETTER O WITH STROKE: Ø => af 298: ¯ => f8 299: 300: INFINITY: ∞ => b0 301: ° => a1 302: 303: PLUS-MINUS SIGN: ± => b1 304: ± => b1 305: 306: LESS-THAN OR EQUAL TO: ≤ => b2 307: ² => ² 308: 309: GREATER-THAN OR EQUAL TO: ≥ => b3 310: ³ => ³ 311: 312: YEN SIGN: ¥ => b4 313: ´ => ab 314: 315: MICRO SIGN: µ => b5 316: µ => b5 317: 318: PARTIAL DIFFERENTIAL: ∂ => b6 319: ¶ => a6 320: 321: N-ARY SUMMATION: ∑ => b7 322: · => e1 323: 324: N-ARY PRODUCT: ∏ => b8 325: ¸ => fc 326: 327: GREEK SMALL LETTER PI: π => b9 328: ¹ => ¹ 329: 330: INTEGRAL: ∫ => ba 331: º => bc 332: 333: FEMININE ORDINAL INDICATOR: ª => bb 334: » => c8 335: 336: MASCULINE ORDINAL INDICATOR: º => bc 337: ¼ => ¼ 338: 339: GREEK CAPITAL LETTER OMEGA: Ω => bd 340: ½ => ½ 341: 342: LATIN SMALL LETTER AE: æ => be 343: ¾ => ¾ 344: 345: LATIN SMALL LETTER O WITH STROKE: ø => bf 346: ¿ => c0 347: 348: INVERTED QUESTION MARK: ¿ => c0 349: À => cb 350: 351: INVERTED EXCLAMATION MARK: ¡ => c1 352: Á => e7 353: 354: NOT SIGN: ¬ => c2 355:  => e5 356: 357: SQUARE ROOT: √ => c3 358: à => cc 359: 360: LATIN SMALL LETTER F WITH HOOK: ƒ => c4 361: Ä => 80 362: 363: ALMOST EQUAL TO: ≈ => c5 364: Å => 81 365: 366: INCREMENT: ∆ => c6 367: Æ => ae 368: 369: LEFT-POINTING DOUBLE ANGLE QUOTATION MARK: « => c7 370: Ç => 82 371: 372: RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK: » => c8 373: È => e9 374: 375: HORIZONTAL ELLIPSIS: … => c9 376: É => 83 377: 378: NO-BREAK SPACE:   => ca 379: Ê => e6 380: 381: LATIN CAPITAL LETTER A WITH GRAVE: À => cb 382: Ë => e8 383: 384: LATIN CAPITAL LETTER A WITH TILDE: à => cc 385: Ì => ed 386: 387: LATIN CAPITAL LETTER O WITH TILDE: Õ => cd 388: Í => ea 389: 390: LATIN CAPITAL LIGATURE OE: Œ => ce 391: Î => eb 392: 393: LATIN SMALL LIGATURE OE: œ => cf 394: Ï => ec 395: 396: EN DASH: – => d0 397: Ð => Ð 398: 399: EM DASH: — => d1 400: Ñ => 84 401: 402: LEFT DOUBLE QUOTATION MARK: “ => d2 403: Ò => f1 404: 405: RIGHT DOUBLE QUOTATION MARK: ” => d3 406: Ó => ee 407: 408: LEFT SINGLE QUOTATION MARK: ‘ => d4 409: Ô => ef 410: 411: RIGHT SINGLE QUOTATION MARK: ’ => d5 412: Õ => cd 413: 414: DIVISION SIGN: ÷ => d6 415: Ö => 85 416: 417: LOZENGE: ◊ => d7 418: × => × 419: 420: LATIN SMALL LETTER Y WITH DIAERESIS: ÿ => d8 421: Ø => af 422: 423: LATIN CAPITAL LETTER Y WITH DIAERESIS: Ÿ => d9 424: Ù => f4 425: 426: FRACTION SLASH: ⁄ => da 427: Ú => f2 428: 429: EURO SIGN: € => db 430: Û => f3 431: 432: SINGLE LEFT-POINTING ANGLE QUOTATION MARK: ‹ => dc 433: Ü => 86 434: 435: SINGLE RIGHT-POINTING ANGLE QUOTATION MARK: › => dd 436: Ý => Ý 437: 438: LATIN SMALL LIGATURE FI: fi => de 439: Þ => Þ 440: 441: LATIN SMALL LIGATURE FL: fl => df 442: ß => a7 443: 444: DOUBLE DAGGER: ‡ => e0 445: à => 88 446: 447: MIDDLE DOT: · => e1 448: á => 87 449: 450: SINGLE LOW-9 QUOTATION MARK: ‚ => e2 451: â => 89 452: 453: DOUBLE LOW-9 QUOTATION MARK: „ => e3 454: ã => 8b 455: 456: PER MILLE SIGN: ‰ => e4 457: ä => 8a 458: 459: LATIN CAPITAL LETTER A WITH CIRCUMFLEX:  => e5 460: å => 8c 461: 462: LATIN CAPITAL LETTER E WITH CIRCUMFLEX: Ê => e6 463: æ => be 464: 465: LATIN CAPITAL LETTER A WITH ACUTE: Á => e7 466: ç => 8d 467: 468: LATIN CAPITAL LETTER E WITH DIAERESIS: Ë => e8 469: è => 8f 470: 471: LATIN CAPITAL LETTER E WITH GRAVE: È => e9 472: é => 8e 473: 474: LATIN CAPITAL LETTER I WITH ACUTE: Í => ea 475: ê => 90 476: 477: LATIN CAPITAL LETTER I WITH CIRCUMFLEX: Î => eb 478: ë => 91 479: 480: LATIN CAPITAL LETTER I WITH DIAERESIS: Ï => ec 481: ì => 93 482: 483: LATIN CAPITAL LETTER I WITH GRAVE: Ì => ed 484: í => 92 485: 486: LATIN CAPITAL LETTER O WITH ACUTE: Ó => ee 487: î => 94 488: 489: LATIN CAPITAL LETTER O WITH CIRCUMFLEX: Ô => ef 490: ï => 95 491: 492: Apple logo:  => f0 493: ð => ð 494: 495: LATIN CAPITAL LETTER O WITH GRAVE: Ò => f1 496: ñ => 96 497: 498: LATIN CAPITAL LETTER U WITH ACUTE: Ú => f2 499: ò => 98 500: 501: LATIN CAPITAL LETTER U WITH CIRCUMFLEX: Û => f3 502: ó => 97 503: 504: LATIN CAPITAL LETTER U WITH GRAVE: Ù => f4 505: ô => 99 506: 507: LATIN SMALL LETTER DOTLESS I: ı => f5 508: õ => 9b 509: 510: MODIFIER LETTER CIRCUMFLEX ACCENT: ˆ => f6 511: ö => 9a 512: 513: SMALL TILDE: ˜ => f7 514: ÷ => d6 515: 516: MACRON: ¯ => f8 517: ø => bf 518: 519: BREVE: ˘ => f9 520: ù => 9d 521: 522: DOT ABOVE: ˙ => fa 523: ú => 9c 524: 525: RING ABOVE: ˚ => fb 526: û => 9e 527: 528: CEDILLA: ¸ => fc 529: ü => 9f 530: 531: DOUBLE ACUTE ACCENT: ˝ => fd 532: ý => ý 533: 534: OGONEK: ˛ => fe 535: þ => þ 536: 537: CARON: ˇ => ff 538: ÿ => d8 539: 540: