Return to scanf.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / standard |
1.1 ! misho 1: /* ! 2: +----------------------------------------------------------------------+ ! 3: | PHP Version 5 | ! 4: +----------------------------------------------------------------------+ ! 5: | Copyright (c) 1997-2012 The PHP Group | ! 6: +----------------------------------------------------------------------+ ! 7: | This source file is subject to version 3.01 of the PHP license, | ! 8: | that is bundled with this package in the file LICENSE, and is | ! 9: | available through the world-wide-web at the following url: | ! 10: | http://www.php.net/license/3_01.txt | ! 11: | If you did not receive a copy of the PHP license and are unable to | ! 12: | obtain it through the world-wide-web, please send a note to | ! 13: | license@php.net so we can mail you a copy immediately. | ! 14: +----------------------------------------------------------------------+ ! 15: | Author: Clayton Collie <clcollie@mindspring.com> | ! 16: +----------------------------------------------------------------------+ ! 17: */ ! 18: ! 19: /* $Id: scanf.c 321634 2012-01-01 13:15:04Z felipe $ */ ! 20: ! 21: /* ! 22: scanf.c -- ! 23: ! 24: This file contains the base code which implements sscanf and by extension ! 25: fscanf. Original code is from TCL8.3.0 and bears the following copyright: ! 26: ! 27: This software is copyrighted by the Regents of the University of ! 28: California, Sun Microsystems, Inc., Scriptics Corporation, ! 29: and other parties. The following terms apply to all files associated ! 30: with the software unless explicitly disclaimed in individual files. ! 31: ! 32: The authors hereby grant permission to use, copy, modify, distribute, ! 33: and license this software and its documentation for any purpose, provided ! 34: that existing copyright notices are retained in all copies and that this ! 35: notice is included verbatim in any distributions. No written agreement, ! 36: license, or royalty fee is required for any of the authorized uses. ! 37: Modifications to this software may be copyrighted by their authors ! 38: and need not follow the licensing terms described here, provided that ! 39: the new terms are clearly indicated on the first page of each file where ! 40: they apply. ! 41: ! 42: IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY ! 43: FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ! 44: ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY ! 45: DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE ! 46: POSSIBILITY OF SUCH DAMAGE. ! 47: ! 48: THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, ! 49: INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, ! 50: FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE ! 51: IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE ! 52: NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR ! 53: MODIFICATIONS. ! 54: ! 55: GOVERNMENT USE: If you are acquiring this software on behalf of the ! 56: U.S. government, the Government shall have only "Restricted Rights" ! 57: in the software and related documentation as defined in the Federal ! 58: Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you ! 59: are acquiring the software on behalf of the Department of Defense, the ! 60: software shall be classified as "Commercial Computer Software" and the ! 61: Government shall have only "Restricted Rights" as defined in Clause ! 62: 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the ! 63: authors grant the U.S. Government and others acting in its behalf ! 64: permission to use and distribute the software in accordance with the ! 65: terms specified in this license. ! 66: */ ! 67: ! 68: #include <stdio.h> ! 69: #include <limits.h> ! 70: #include <ctype.h> ! 71: #include "php.h" ! 72: #include "php_variables.h" ! 73: #ifdef HAVE_LOCALE_H ! 74: #include <locale.h> ! 75: #endif ! 76: #include "zend_execute.h" ! 77: #include "zend_operators.h" ! 78: #include "zend_strtod.h" ! 79: #include "php_globals.h" ! 80: #include "basic_functions.h" ! 81: #include "scanf.h" ! 82: ! 83: /* ! 84: * Flag values used internally by [f|s]canf. ! 85: */ ! 86: #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */ ! 87: #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */ ! 88: #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */ ! 89: #define SCAN_WIDTH 0x8 /* A width value was supplied. */ ! 90: ! 91: #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */ ! 92: #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */ ! 93: #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */ ! 94: #define SCAN_XOK 0x80 /* An 'x' is allowed. */ ! 95: #define SCAN_PTOK 0x100 /* Decimal point is allowed. */ ! 96: #define SCAN_EXPOK 0x200 /* An exponent is allowed. */ ! 97: ! 98: #define UCHAR(x) (zend_uchar)(x) ! 99: ! 100: /* ! 101: * The following structure contains the information associated with ! 102: * a character set. ! 103: */ ! 104: typedef struct CharSet { ! 105: int exclude; /* 1 if this is an exclusion set. */ ! 106: int nchars; ! 107: char *chars; ! 108: int nranges; ! 109: struct Range { ! 110: char start; ! 111: char end; ! 112: } *ranges; ! 113: } CharSet; ! 114: ! 115: /* ! 116: * Declarations for functions used only in this file. ! 117: */ ! 118: static char *BuildCharSet(CharSet *cset, char *format); ! 119: static int CharInSet(CharSet *cset, int ch); ! 120: static void ReleaseCharSet(CharSet *cset); ! 121: static inline void scan_set_error_return(int numVars, zval **return_value); ! 122: ! 123: ! 124: /* {{{ BuildCharSet ! 125: *---------------------------------------------------------------------- ! 126: * ! 127: * BuildCharSet -- ! 128: * ! 129: * This function examines a character set format specification ! 130: * and builds a CharSet containing the individual characters and ! 131: * character ranges specified. ! 132: * ! 133: * Results: ! 134: * Returns the next format position. ! 135: * ! 136: * Side effects: ! 137: * Initializes the charset. ! 138: * ! 139: *---------------------------------------------------------------------- ! 140: */ ! 141: static char * BuildCharSet(CharSet *cset, char *format) ! 142: { ! 143: char *ch, start; ! 144: int nranges; ! 145: char *end; ! 146: ! 147: memset(cset, 0, sizeof(CharSet)); ! 148: ! 149: ch = format; ! 150: if (*ch == '^') { ! 151: cset->exclude = 1; ! 152: ch = ++format; ! 153: } ! 154: end = format + 1; /* verify this - cc */ ! 155: ! 156: /* ! 157: * Find the close bracket so we can overallocate the set. ! 158: */ ! 159: if (*ch == ']') { ! 160: ch = end++; ! 161: } ! 162: nranges = 0; ! 163: while (*ch != ']') { ! 164: if (*ch == '-') { ! 165: nranges++; ! 166: } ! 167: ch = end++; ! 168: } ! 169: ! 170: cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0); ! 171: if (nranges > 0) { ! 172: cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0); ! 173: } else { ! 174: cset->ranges = NULL; ! 175: } ! 176: ! 177: /* ! 178: * Now build the character set. ! 179: */ ! 180: cset->nchars = cset->nranges = 0; ! 181: ch = format++; ! 182: start = *ch; ! 183: if (*ch == ']' || *ch == '-') { ! 184: cset->chars[cset->nchars++] = *ch; ! 185: ch = format++; ! 186: } ! 187: while (*ch != ']') { ! 188: if (*format == '-') { ! 189: /* ! 190: * This may be the first character of a range, so don't add ! 191: * it yet. ! 192: */ ! 193: start = *ch; ! 194: } else if (*ch == '-') { ! 195: /* ! 196: * Check to see if this is the last character in the set, in which ! 197: * case it is not a range and we should add the previous character ! 198: * as well as the dash. ! 199: */ ! 200: if (*format == ']') { ! 201: cset->chars[cset->nchars++] = start; ! 202: cset->chars[cset->nchars++] = *ch; ! 203: } else { ! 204: ch = format++; ! 205: ! 206: /* ! 207: * Check to see if the range is in reverse order. ! 208: */ ! 209: if (start < *ch) { ! 210: cset->ranges[cset->nranges].start = start; ! 211: cset->ranges[cset->nranges].end = *ch; ! 212: } else { ! 213: cset->ranges[cset->nranges].start = *ch; ! 214: cset->ranges[cset->nranges].end = start; ! 215: } ! 216: cset->nranges++; ! 217: } ! 218: } else { ! 219: cset->chars[cset->nchars++] = *ch; ! 220: } ! 221: ch = format++; ! 222: } ! 223: return format; ! 224: } ! 225: /* }}} */ ! 226: ! 227: /* {{{ CharInSet ! 228: *---------------------------------------------------------------------- ! 229: * ! 230: * CharInSet -- ! 231: * ! 232: * Check to see if a character matches the given set. ! 233: * ! 234: * Results: ! 235: * Returns non-zero if the character matches the given set. ! 236: * ! 237: * Side effects: ! 238: * None. ! 239: * ! 240: *---------------------------------------------------------------------- ! 241: */ ! 242: static int CharInSet(CharSet *cset, int c) ! 243: { ! 244: char ch = (char) c; ! 245: int i, match = 0; ! 246: ! 247: for (i = 0; i < cset->nchars; i++) { ! 248: if (cset->chars[i] == ch) { ! 249: match = 1; ! 250: break; ! 251: } ! 252: } ! 253: if (!match) { ! 254: for (i = 0; i < cset->nranges; i++) { ! 255: if ((cset->ranges[i].start <= ch) ! 256: && (ch <= cset->ranges[i].end)) { ! 257: match = 1; ! 258: break; ! 259: } ! 260: } ! 261: } ! 262: return (cset->exclude ? !match : match); ! 263: } ! 264: /* }}} */ ! 265: ! 266: /* {{{ ReleaseCharSet ! 267: *---------------------------------------------------------------------- ! 268: * ! 269: * ReleaseCharSet -- ! 270: * ! 271: * Free the storage associated with a character set. ! 272: * ! 273: * Results: ! 274: * None. ! 275: * ! 276: * Side effects: ! 277: * None. ! 278: * ! 279: *---------------------------------------------------------------------- ! 280: */ ! 281: static void ReleaseCharSet(CharSet *cset) ! 282: { ! 283: efree((char *)cset->chars); ! 284: if (cset->ranges) { ! 285: efree((char *)cset->ranges); ! 286: } ! 287: } ! 288: /* }}} */ ! 289: ! 290: /* {{{ ValidateFormat ! 291: *---------------------------------------------------------------------- ! 292: * ! 293: * ValidateFormat -- ! 294: * ! 295: * Parse the format string and verify that it is properly formed ! 296: * and that there are exactly enough variables on the command line. ! 297: * ! 298: * Results: ! 299: * FAILURE or SUCCESS. ! 300: * ! 301: * Side effects: ! 302: * May set php_error based on abnormal conditions. ! 303: * ! 304: * Parameters : ! 305: * format The format string. ! 306: * numVars The number of variables passed to the scan command. ! 307: * totalSubs The number of variables that will be required. ! 308: * ! 309: *---------------------------------------------------------------------- ! 310: */ ! 311: PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs) ! 312: { ! 313: #define STATIC_LIST_SIZE 16 ! 314: int gotXpg, gotSequential, value, i, flags; ! 315: char *end, *ch = NULL; ! 316: int staticAssign[STATIC_LIST_SIZE]; ! 317: int *nassign = staticAssign; ! 318: int objIndex, xpgSize, nspace = STATIC_LIST_SIZE; ! 319: TSRMLS_FETCH(); ! 320: ! 321: /* ! 322: * Initialize an array that records the number of times a variable ! 323: * is assigned to by the format string. We use this to detect if ! 324: * a variable is multiply assigned or left unassigned. ! 325: */ ! 326: if (numVars > nspace) { ! 327: nassign = (int*)safe_emalloc(sizeof(int), numVars, 0); ! 328: nspace = numVars; ! 329: } ! 330: for (i = 0; i < nspace; i++) { ! 331: nassign[i] = 0; ! 332: } ! 333: ! 334: xpgSize = objIndex = gotXpg = gotSequential = 0; ! 335: ! 336: while (*format != '\0') { ! 337: ch = format++; ! 338: flags = 0; ! 339: ! 340: if (*ch != '%') { ! 341: continue; ! 342: } ! 343: ch = format++; ! 344: if (*ch == '%') { ! 345: continue; ! 346: } ! 347: if (*ch == '*') { ! 348: flags |= SCAN_SUPPRESS; ! 349: ch = format++; ! 350: goto xpgCheckDone; ! 351: } ! 352: ! 353: if ( isdigit( (int)*ch ) ) { ! 354: /* ! 355: * Check for an XPG3-style %n$ specification. Note: there ! 356: * must not be a mixture of XPG3 specs and non-XPG3 specs ! 357: * in the same format string. ! 358: */ ! 359: value = strtoul(format-1, &end, 10); ! 360: if (*end != '$') { ! 361: goto notXpg; ! 362: } ! 363: format = end+1; ! 364: ch = format++; ! 365: gotXpg = 1; ! 366: if (gotSequential) { ! 367: goto mixedXPG; ! 368: } ! 369: objIndex = value - 1; ! 370: if ((objIndex < 0) || (numVars && (objIndex >= numVars))) { ! 371: goto badIndex; ! 372: } else if (numVars == 0) { ! 373: /* ! 374: * In the case where no vars are specified, the user can ! 375: * specify %9999$ legally, so we have to consider special ! 376: * rules for growing the assign array. 'value' is ! 377: * guaranteed to be > 0. ! 378: */ ! 379: ! 380: /* set a lower artificial limit on this ! 381: * in the interest of security and resource friendliness ! 382: * 255 arguments should be more than enough. - cc ! 383: */ ! 384: if (value > SCAN_MAX_ARGS) { ! 385: goto badIndex; ! 386: } ! 387: ! 388: xpgSize = (xpgSize > value) ? xpgSize : value; ! 389: } ! 390: goto xpgCheckDone; ! 391: } ! 392: ! 393: notXpg: ! 394: gotSequential = 1; ! 395: if (gotXpg) { ! 396: mixedXPG: ! 397: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers"); ! 398: goto error; ! 399: } ! 400: ! 401: xpgCheckDone: ! 402: /* ! 403: * Parse any width specifier. ! 404: */ ! 405: if (isdigit(UCHAR(*ch))) { ! 406: value = strtoul(format-1, &format, 10); ! 407: flags |= SCAN_WIDTH; ! 408: ch = format++; ! 409: } ! 410: ! 411: /* ! 412: * Ignore size specifier. ! 413: */ ! 414: if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) { ! 415: ch = format++; ! 416: } ! 417: ! 418: if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) { ! 419: goto badIndex; ! 420: } ! 421: ! 422: /* ! 423: * Handle the various field types. ! 424: */ ! 425: switch (*ch) { ! 426: case 'n': ! 427: case 'd': ! 428: case 'D': ! 429: case 'i': ! 430: case 'o': ! 431: case 'x': ! 432: case 'X': ! 433: case 'u': ! 434: case 'f': ! 435: case 'e': ! 436: case 'E': ! 437: case 'g': ! 438: case 's': ! 439: break; ! 440: ! 441: case 'c': ! 442: /* we differ here with the TCL implementation in allowing for */ ! 443: /* a character width specification, to be more consistent with */ ! 444: /* ANSI. since Zend auto allocates space for vars, this is no */ ! 445: /* problem - cc */ ! 446: /* ! 447: if (flags & SCAN_WIDTH) { ! 448: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion"); ! 449: goto error; ! 450: } ! 451: */ ! 452: break; ! 453: ! 454: case '[': ! 455: if (*format == '\0') { ! 456: goto badSet; ! 457: } ! 458: ch = format++; ! 459: if (*ch == '^') { ! 460: if (*format == '\0') { ! 461: goto badSet; ! 462: } ! 463: ch = format++; ! 464: } ! 465: if (*ch == ']') { ! 466: if (*format == '\0') { ! 467: goto badSet; ! 468: } ! 469: ch = format++; ! 470: } ! 471: while (*ch != ']') { ! 472: if (*format == '\0') { ! 473: goto badSet; ! 474: } ! 475: ch = format++; ! 476: } ! 477: break; ! 478: badSet: ! 479: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string"); ! 480: goto error; ! 481: ! 482: default: { ! 483: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch); ! 484: goto error; ! 485: } ! 486: } ! 487: ! 488: if (!(flags & SCAN_SUPPRESS)) { ! 489: if (objIndex >= nspace) { ! 490: /* ! 491: * Expand the nassign buffer. If we are using XPG specifiers, ! 492: * make sure that we grow to a large enough size. xpgSize is ! 493: * guaranteed to be at least one larger than objIndex. ! 494: */ ! 495: value = nspace; ! 496: if (xpgSize) { ! 497: nspace = xpgSize; ! 498: } else { ! 499: nspace += STATIC_LIST_SIZE; ! 500: } ! 501: if (nassign == staticAssign) { ! 502: nassign = (void *)safe_emalloc(nspace, sizeof(int), 0); ! 503: for (i = 0; i < STATIC_LIST_SIZE; ++i) { ! 504: nassign[i] = staticAssign[i]; ! 505: } ! 506: } else { ! 507: nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int)); ! 508: } ! 509: for (i = value; i < nspace; i++) { ! 510: nassign[i] = 0; ! 511: } ! 512: } ! 513: nassign[objIndex]++; ! 514: objIndex++; ! 515: } ! 516: } /* while (*format != '\0') */ ! 517: ! 518: /* ! 519: * Verify that all of the variable were assigned exactly once. ! 520: */ ! 521: if (numVars == 0) { ! 522: if (xpgSize) { ! 523: numVars = xpgSize; ! 524: } else { ! 525: numVars = objIndex; ! 526: } ! 527: } ! 528: if (totalSubs) { ! 529: *totalSubs = numVars; ! 530: } ! 531: for (i = 0; i < numVars; i++) { ! 532: if (nassign[i] > 1) { ! 533: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers"); ! 534: goto error; ! 535: } else if (!xpgSize && (nassign[i] == 0)) { ! 536: /* ! 537: * If the space is empty, and xpgSize is 0 (means XPG wasn't ! 538: * used, and/or numVars != 0), then too many vars were given ! 539: */ ! 540: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers"); ! 541: goto error; ! 542: } ! 543: } ! 544: ! 545: if (nassign != staticAssign) { ! 546: efree((char *)nassign); ! 547: } ! 548: return SCAN_SUCCESS; ! 549: ! 550: badIndex: ! 551: if (gotXpg) { ! 552: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range"); ! 553: } else { ! 554: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers"); ! 555: } ! 556: ! 557: error: ! 558: if (nassign != staticAssign) { ! 559: efree((char *)nassign); ! 560: } ! 561: return SCAN_ERROR_INVALID_FORMAT; ! 562: #undef STATIC_LIST_SIZE ! 563: } ! 564: /* }}} */ ! 565: ! 566: /* {{{ php_sscanf_internal ! 567: * This is the internal function which does processing on behalf of ! 568: * both sscanf() and fscanf() ! 569: * ! 570: * parameters : ! 571: * string literal string to be processed ! 572: * format format string ! 573: * argCount total number of elements in the args array ! 574: * args arguments passed in from user function (f|s)scanf ! 575: * varStart offset (in args) of 1st variable passed in to (f|s)scanf ! 576: * return_value set with the results of the scan ! 577: */ ! 578: ! 579: PHPAPI int php_sscanf_internal( char *string, char *format, ! 580: int argCount, zval ***args, ! 581: int varStart, zval **return_value TSRMLS_DC) ! 582: { ! 583: int numVars, nconversions, totalVars = -1; ! 584: int i, result; ! 585: long value; ! 586: int objIndex; ! 587: char *end, *baseString; ! 588: zval **current; ! 589: char op = 0; ! 590: int base = 0; ! 591: int underflow = 0; ! 592: size_t width; ! 593: long (*fn)() = NULL; ! 594: char *ch, sch; ! 595: int flags; ! 596: char buf[64]; /* Temporary buffer to hold scanned number ! 597: * strings before they are passed to strtoul() */ ! 598: ! 599: /* do some sanity checking */ ! 600: if ((varStart > argCount) || (varStart < 0)){ ! 601: varStart = SCAN_MAX_ARGS + 1; ! 602: } ! 603: numVars = argCount - varStart; ! 604: if (numVars < 0) { ! 605: numVars = 0; ! 606: } ! 607: ! 608: #if 0 ! 609: zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>", ! 610: string, format, numVars, varStart); ! 611: #endif ! 612: /* ! 613: * Check for errors in the format string. ! 614: */ ! 615: if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) { ! 616: scan_set_error_return( numVars, return_value ); ! 617: return SCAN_ERROR_INVALID_FORMAT; ! 618: } ! 619: ! 620: objIndex = numVars ? varStart : 0; ! 621: ! 622: /* ! 623: * If any variables are passed, make sure they are all passed by reference ! 624: */ ! 625: if (numVars) { ! 626: for (i = varStart;i < argCount;i++){ ! 627: if ( ! PZVAL_IS_REF( *args[ i ] ) ) { ! 628: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i); ! 629: scan_set_error_return(numVars, return_value); ! 630: return SCAN_ERROR_VAR_PASSED_BYVAL; ! 631: } ! 632: } ! 633: } ! 634: ! 635: /* ! 636: * Allocate space for the result objects. Only happens when no variables ! 637: * are specified ! 638: */ ! 639: if (!numVars) { ! 640: zval *tmp; ! 641: ! 642: /* allocate an array for return */ ! 643: array_init(*return_value); ! 644: ! 645: for (i = 0; i < totalVars; i++) { ! 646: MAKE_STD_ZVAL(tmp); ! 647: ZVAL_NULL(tmp); ! 648: if (add_next_index_zval(*return_value, tmp) == FAILURE) { ! 649: scan_set_error_return(0, return_value); ! 650: return FAILURE; ! 651: } ! 652: } ! 653: varStart = 0; /* Array index starts from 0 */ ! 654: } ! 655: ! 656: baseString = string; ! 657: ! 658: /* ! 659: * Iterate over the format string filling in the result objects until ! 660: * we reach the end of input, the end of the format string, or there ! 661: * is a mismatch. ! 662: */ ! 663: nconversions = 0; ! 664: /* note ! - we need to limit the loop for objIndex to keep it in bounds */ ! 665: ! 666: while (*format != '\0') { ! 667: ch = format++; ! 668: flags = 0; ! 669: ! 670: /* ! 671: * If we see whitespace in the format, skip whitespace in the string. ! 672: */ ! 673: if ( isspace( (int)*ch ) ) { ! 674: sch = *string; ! 675: while ( isspace( (int)sch ) ) { ! 676: if (*string == '\0') { ! 677: goto done; ! 678: } ! 679: string++; ! 680: sch = *string; ! 681: } ! 682: continue; ! 683: } ! 684: ! 685: if (*ch != '%') { ! 686: literal: ! 687: if (*string == '\0') { ! 688: underflow = 1; ! 689: goto done; ! 690: } ! 691: sch = *string; ! 692: string++; ! 693: if (*ch != sch) { ! 694: goto done; ! 695: } ! 696: continue; ! 697: } ! 698: ! 699: ch = format++; ! 700: if (*ch == '%') { ! 701: goto literal; ! 702: } ! 703: ! 704: /* ! 705: * Check for assignment suppression ('*') or an XPG3-style ! 706: * assignment ('%n$'). ! 707: */ ! 708: if (*ch == '*') { ! 709: flags |= SCAN_SUPPRESS; ! 710: ch = format++; ! 711: } else if ( isdigit(UCHAR(*ch))) { ! 712: value = strtoul(format-1, &end, 10); ! 713: if (*end == '$') { ! 714: format = end+1; ! 715: ch = format++; ! 716: objIndex = varStart + value - 1; ! 717: } ! 718: } ! 719: ! 720: /* ! 721: * Parse any width specifier. ! 722: */ ! 723: if ( isdigit(UCHAR(*ch))) { ! 724: width = strtoul(format-1, &format, 10); ! 725: ch = format++; ! 726: } else { ! 727: width = 0; ! 728: } ! 729: ! 730: /* ! 731: * Ignore size specifier. ! 732: */ ! 733: if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) { ! 734: ch = format++; ! 735: } ! 736: ! 737: /* ! 738: * Handle the various field types. ! 739: */ ! 740: switch (*ch) { ! 741: case 'n': ! 742: if (!(flags & SCAN_SUPPRESS)) { ! 743: if (numVars && objIndex >= argCount) { ! 744: break; ! 745: } else if (numVars) { ! 746: zend_uint refcount; ! 747: ! 748: current = args[objIndex++]; ! 749: refcount = Z_REFCOUNT_PP(current); ! 750: zval_dtor( *current ); ! 751: ZVAL_LONG( *current, (long)(string - baseString) ); ! 752: Z_SET_REFCOUNT_PP(current, refcount); ! 753: Z_SET_ISREF_PP(current); ! 754: } else { ! 755: add_index_long(*return_value, objIndex++, string - baseString); ! 756: } ! 757: } ! 758: nconversions++; ! 759: continue; ! 760: ! 761: case 'd': ! 762: case 'D': ! 763: op = 'i'; ! 764: base = 10; ! 765: fn = (long (*)())strtol; ! 766: break; ! 767: case 'i': ! 768: op = 'i'; ! 769: base = 0; ! 770: fn = (long (*)())strtol; ! 771: break; ! 772: case 'o': ! 773: op = 'i'; ! 774: base = 8; ! 775: fn = (long (*)())strtol; ! 776: break; ! 777: case 'x': ! 778: case 'X': ! 779: op = 'i'; ! 780: base = 16; ! 781: fn = (long (*)())strtol; ! 782: break; ! 783: case 'u': ! 784: op = 'i'; ! 785: base = 10; ! 786: flags |= SCAN_UNSIGNED; ! 787: fn = (long (*)())strtoul; ! 788: break; ! 789: ! 790: case 'f': ! 791: case 'e': ! 792: case 'E': ! 793: case 'g': ! 794: op = 'f'; ! 795: break; ! 796: ! 797: case 's': ! 798: op = 's'; ! 799: break; ! 800: ! 801: case 'c': ! 802: op = 's'; ! 803: flags |= SCAN_NOSKIP; ! 804: /*-cc-*/ ! 805: if (0 == width) { ! 806: width = 1; ! 807: } ! 808: /*-cc-*/ ! 809: break; ! 810: case '[': ! 811: op = '['; ! 812: flags |= SCAN_NOSKIP; ! 813: break; ! 814: } /* switch */ ! 815: ! 816: /* ! 817: * At this point, we will need additional characters from the ! 818: * string to proceed. ! 819: */ ! 820: if (*string == '\0') { ! 821: underflow = 1; ! 822: goto done; ! 823: } ! 824: ! 825: /* ! 826: * Skip any leading whitespace at the beginning of a field unless ! 827: * the format suppresses this behavior. ! 828: */ ! 829: if (!(flags & SCAN_NOSKIP)) { ! 830: while (*string != '\0') { ! 831: sch = *string; ! 832: if (! isspace((int)sch) ) { ! 833: break; ! 834: } ! 835: string++; ! 836: } ! 837: if (*string == '\0') { ! 838: underflow = 1; ! 839: goto done; ! 840: } ! 841: } ! 842: ! 843: /* ! 844: * Perform the requested scanning operation. ! 845: */ ! 846: switch (op) { ! 847: case 'c': ! 848: case 's': ! 849: /* ! 850: * Scan a string up to width characters or whitespace. ! 851: */ ! 852: if (width == 0) { ! 853: width = (size_t) ~0; ! 854: } ! 855: end = string; ! 856: while (*end != '\0') { ! 857: sch = *end; ! 858: if ( isspace( (int)sch ) ) { ! 859: break; ! 860: } ! 861: end++; ! 862: if (--width == 0) { ! 863: break; ! 864: } ! 865: } ! 866: if (!(flags & SCAN_SUPPRESS)) { ! 867: if (numVars && objIndex >= argCount) { ! 868: break; ! 869: } else if (numVars) { ! 870: zend_uint refcount; ! 871: ! 872: current = args[objIndex++]; ! 873: refcount = Z_REFCOUNT_PP(current); ! 874: zval_dtor( *current ); ! 875: ZVAL_STRINGL( *current, string, end-string, 1); ! 876: Z_SET_REFCOUNT_PP(current, refcount); ! 877: Z_SET_ISREF_PP(current); ! 878: } else { ! 879: add_index_stringl( *return_value, objIndex++, string, end-string, 1); ! 880: } ! 881: } ! 882: string = end; ! 883: break; ! 884: ! 885: case '[': { ! 886: CharSet cset; ! 887: ! 888: if (width == 0) { ! 889: width = (size_t) ~0; ! 890: } ! 891: end = string; ! 892: ! 893: format = BuildCharSet(&cset, format); ! 894: while (*end != '\0') { ! 895: sch = *end; ! 896: if (!CharInSet(&cset, (int)sch)) { ! 897: break; ! 898: } ! 899: end++; ! 900: if (--width == 0) { ! 901: break; ! 902: } ! 903: } ! 904: ReleaseCharSet(&cset); ! 905: ! 906: if (string == end) { ! 907: /* ! 908: * Nothing matched the range, stop processing ! 909: */ ! 910: goto done; ! 911: } ! 912: if (!(flags & SCAN_SUPPRESS)) { ! 913: if (numVars && objIndex >= argCount) { ! 914: break; ! 915: } else if (numVars) { ! 916: current = args[objIndex++]; ! 917: zval_dtor( *current ); ! 918: ZVAL_STRINGL( *current, string, end-string, 1); ! 919: } else { ! 920: add_index_stringl(*return_value, objIndex++, string, end-string, 1); ! 921: } ! 922: } ! 923: string = end; ! 924: break; ! 925: } ! 926: /* ! 927: case 'c': ! 928: / Scan a single character./ ! 929: ! 930: sch = *string; ! 931: string++; ! 932: if (!(flags & SCAN_SUPPRESS)) { ! 933: if (numVars) { ! 934: char __buf[2]; ! 935: __buf[0] = sch; ! 936: __buf[1] = '\0';; ! 937: current = args[objIndex++]; ! 938: zval_dtor(*current); ! 939: ZVAL_STRINGL( *current, __buf, 1, 1); ! 940: } else { ! 941: add_index_stringl(*return_value, objIndex++, &sch, 1, 1); ! 942: } ! 943: } ! 944: break; ! 945: */ ! 946: case 'i': ! 947: /* ! 948: * Scan an unsigned or signed integer. ! 949: */ ! 950: /*-cc-*/ ! 951: buf[0] = '\0'; ! 952: /*-cc-*/ ! 953: if ((width == 0) || (width > sizeof(buf) - 1)) { ! 954: width = sizeof(buf) - 1; ! 955: } ! 956: ! 957: flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO; ! 958: for (end = buf; width > 0; width--) { ! 959: switch (*string) { ! 960: /* ! 961: * The 0 digit has special meaning at the beginning of ! 962: * a number. If we are unsure of the base, it ! 963: * indicates that we are in base 8 or base 16 (if it is ! 964: * followed by an 'x'). ! 965: */ ! 966: case '0': ! 967: /*-cc-*/ ! 968: if (base == 16) { ! 969: flags |= SCAN_XOK; ! 970: } ! 971: /*-cc-*/ ! 972: if (base == 0) { ! 973: base = 8; ! 974: flags |= SCAN_XOK; ! 975: } ! 976: if (flags & SCAN_NOZERO) { ! 977: flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO); ! 978: } else { ! 979: flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); ! 980: } ! 981: goto addToInt; ! 982: ! 983: case '1': case '2': case '3': case '4': ! 984: case '5': case '6': case '7': ! 985: if (base == 0) { ! 986: base = 10; ! 987: } ! 988: flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); ! 989: goto addToInt; ! 990: ! 991: case '8': case '9': ! 992: if (base == 0) { ! 993: base = 10; ! 994: } ! 995: if (base <= 8) { ! 996: break; ! 997: } ! 998: flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); ! 999: goto addToInt; ! 1000: ! 1001: case 'A': case 'B': case 'C': ! 1002: case 'D': case 'E': case 'F': ! 1003: case 'a': case 'b': case 'c': ! 1004: case 'd': case 'e': case 'f': ! 1005: if (base <= 10) { ! 1006: break; ! 1007: } ! 1008: flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); ! 1009: goto addToInt; ! 1010: ! 1011: case '+': case '-': ! 1012: if (flags & SCAN_SIGNOK) { ! 1013: flags &= ~SCAN_SIGNOK; ! 1014: goto addToInt; ! 1015: } ! 1016: break; ! 1017: ! 1018: case 'x': case 'X': ! 1019: if ((flags & SCAN_XOK) && (end == buf+1)) { ! 1020: base = 16; ! 1021: flags &= ~SCAN_XOK; ! 1022: goto addToInt; ! 1023: } ! 1024: break; ! 1025: } ! 1026: ! 1027: /* ! 1028: * We got an illegal character so we are done accumulating. ! 1029: */ ! 1030: break; ! 1031: ! 1032: addToInt: ! 1033: /* ! 1034: * Add the character to the temporary buffer. ! 1035: */ ! 1036: *end++ = *string++; ! 1037: if (*string == '\0') { ! 1038: break; ! 1039: } ! 1040: } ! 1041: ! 1042: /* ! 1043: * Check to see if we need to back up because we only got a ! 1044: * sign or a trailing x after a 0. ! 1045: */ ! 1046: if (flags & SCAN_NODIGITS) { ! 1047: if (*string == '\0') { ! 1048: underflow = 1; ! 1049: } ! 1050: goto done; ! 1051: } else if (end[-1] == 'x' || end[-1] == 'X') { ! 1052: end--; ! 1053: string--; ! 1054: } ! 1055: ! 1056: /* ! 1057: * Scan the value from the temporary buffer. If we are ! 1058: * returning a large unsigned value, we have to convert it back ! 1059: * to a string since PHP only supports signed values. ! 1060: */ ! 1061: if (!(flags & SCAN_SUPPRESS)) { ! 1062: *end = '\0'; ! 1063: value = (long) (*fn)(buf, NULL, base); ! 1064: if ((flags & SCAN_UNSIGNED) && (value < 0)) { ! 1065: snprintf(buf, sizeof(buf), "%lu", value); /* INTL: ISO digit */ ! 1066: if (numVars && objIndex >= argCount) { ! 1067: break; ! 1068: } else if (numVars) { ! 1069: /* change passed value type to string */ ! 1070: current = args[objIndex++]; ! 1071: zval_dtor(*current); ! 1072: ZVAL_STRING( *current, buf, 1 ); ! 1073: } else { ! 1074: add_index_string(*return_value, objIndex++, buf, 1); ! 1075: } ! 1076: } else { ! 1077: if (numVars && objIndex >= argCount) { ! 1078: break; ! 1079: } else if (numVars) { ! 1080: current = args[objIndex++]; ! 1081: zval_dtor(*current); ! 1082: ZVAL_LONG(*current, value); ! 1083: } else { ! 1084: add_index_long(*return_value, objIndex++, value); ! 1085: } ! 1086: } ! 1087: } ! 1088: break; ! 1089: ! 1090: case 'f': ! 1091: /* ! 1092: * Scan a floating point number ! 1093: */ ! 1094: buf[0] = '\0'; /* call me pedantic */ ! 1095: if ((width == 0) || (width > sizeof(buf) - 1)) { ! 1096: width = sizeof(buf) - 1; ! 1097: } ! 1098: flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK; ! 1099: for (end = buf; width > 0; width--) { ! 1100: switch (*string) { ! 1101: case '0': case '1': case '2': case '3': ! 1102: case '4': case '5': case '6': case '7': ! 1103: case '8': case '9': ! 1104: flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS); ! 1105: goto addToFloat; ! 1106: case '+': ! 1107: case '-': ! 1108: if (flags & SCAN_SIGNOK) { ! 1109: flags &= ~SCAN_SIGNOK; ! 1110: goto addToFloat; ! 1111: } ! 1112: break; ! 1113: case '.': ! 1114: if (flags & SCAN_PTOK) { ! 1115: flags &= ~(SCAN_SIGNOK | SCAN_PTOK); ! 1116: goto addToFloat; ! 1117: } ! 1118: break; ! 1119: case 'e': ! 1120: case 'E': ! 1121: /* ! 1122: * An exponent is not allowed until there has ! 1123: * been at least one digit. ! 1124: */ ! 1125: if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) { ! 1126: flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK)) ! 1127: | SCAN_SIGNOK | SCAN_NODIGITS; ! 1128: goto addToFloat; ! 1129: } ! 1130: break; ! 1131: } ! 1132: ! 1133: /* ! 1134: * We got an illegal character so we are done accumulating. ! 1135: */ ! 1136: break; ! 1137: ! 1138: addToFloat: ! 1139: /* ! 1140: * Add the character to the temporary buffer. ! 1141: */ ! 1142: *end++ = *string++; ! 1143: if (*string == '\0') { ! 1144: break; ! 1145: } ! 1146: } ! 1147: ! 1148: /* ! 1149: * Check to see if we need to back up because we saw a ! 1150: * trailing 'e' or sign. ! 1151: */ ! 1152: if (flags & SCAN_NODIGITS) { ! 1153: if (flags & SCAN_EXPOK) { ! 1154: /* ! 1155: * There were no digits at all so scanning has ! 1156: * failed and we are done. ! 1157: */ ! 1158: if (*string == '\0') { ! 1159: underflow = 1; ! 1160: } ! 1161: goto done; ! 1162: } ! 1163: ! 1164: /* ! 1165: * We got a bad exponent ('e' and maybe a sign). ! 1166: */ ! 1167: end--; ! 1168: string--; ! 1169: if (*end != 'e' && *end != 'E') { ! 1170: end--; ! 1171: string--; ! 1172: } ! 1173: } ! 1174: ! 1175: /* ! 1176: * Scan the value from the temporary buffer. ! 1177: */ ! 1178: if (!(flags & SCAN_SUPPRESS)) { ! 1179: double dvalue; ! 1180: *end = '\0'; ! 1181: dvalue = zend_strtod(buf, NULL); ! 1182: if (numVars && objIndex >= argCount) { ! 1183: break; ! 1184: } else if (numVars) { ! 1185: current = args[objIndex++]; ! 1186: zval_dtor(*current); ! 1187: ZVAL_DOUBLE(*current, dvalue); ! 1188: } else { ! 1189: add_index_double( *return_value, objIndex++, dvalue ); ! 1190: } ! 1191: } ! 1192: break; ! 1193: } /* switch (op) */ ! 1194: nconversions++; ! 1195: } /* while (*format != '\0') */ ! 1196: ! 1197: done: ! 1198: result = SCAN_SUCCESS; ! 1199: ! 1200: if (underflow && (0==nconversions)) { ! 1201: scan_set_error_return( numVars, return_value ); ! 1202: result = SCAN_ERROR_EOF; ! 1203: } else if (numVars) { ! 1204: convert_to_long( *return_value ); ! 1205: Z_LVAL_PP(return_value) = nconversions; ! 1206: } else if (nconversions < totalVars) { ! 1207: /* TODO: not all elements converted. we need to prune the list - cc */ ! 1208: } ! 1209: return result; ! 1210: } ! 1211: /* }}} */ ! 1212: ! 1213: /* the compiler choked when i tried to make this a macro */ ! 1214: static inline void scan_set_error_return(int numVars, zval **return_value) /* {{{ */ ! 1215: { ! 1216: if (numVars) { ! 1217: Z_TYPE_PP(return_value) = IS_LONG; ! 1218: Z_LVAL_PP(return_value) = SCAN_ERROR_EOF; /* EOF marker */ ! 1219: } else { ! 1220: /* convert_to_null calls destructor */ ! 1221: convert_to_null( *return_value ); ! 1222: } ! 1223: } ! 1224: /* }}} */ ! 1225: ! 1226: /* ! 1227: * Local variables: ! 1228: * tab-width: 4 ! 1229: * c-basic-offset: 4 ! 1230: * End: ! 1231: * vim600: sw=4 ts=4 fdm=marker ! 1232: * vim<600: sw=4 ts=4 ! 1233: */