Return to scanf.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / standard |
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
5: | Copyright (c) 1997-2012 The PHP Group |
6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Author: Clayton Collie <clcollie@mindspring.com> |
16: +----------------------------------------------------------------------+
17: */
18:
1.1.1.2 ! misho 19: /* $Id$ */
1.1 misho 20:
21: /*
22: scanf.c --
23:
24: This file contains the base code which implements sscanf and by extension
25: fscanf. Original code is from TCL8.3.0 and bears the following copyright:
26:
27: This software is copyrighted by the Regents of the University of
28: California, Sun Microsystems, Inc., Scriptics Corporation,
29: and other parties. The following terms apply to all files associated
30: with the software unless explicitly disclaimed in individual files.
31:
32: The authors hereby grant permission to use, copy, modify, distribute,
33: and license this software and its documentation for any purpose, provided
34: that existing copyright notices are retained in all copies and that this
35: notice is included verbatim in any distributions. No written agreement,
36: license, or royalty fee is required for any of the authorized uses.
37: Modifications to this software may be copyrighted by their authors
38: and need not follow the licensing terms described here, provided that
39: the new terms are clearly indicated on the first page of each file where
40: they apply.
41:
42: IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
43: FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
44: ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
45: DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
46: POSSIBILITY OF SUCH DAMAGE.
47:
48: THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
49: INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
50: FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
51: IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
52: NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
53: MODIFICATIONS.
54:
55: GOVERNMENT USE: If you are acquiring this software on behalf of the
56: U.S. government, the Government shall have only "Restricted Rights"
57: in the software and related documentation as defined in the Federal
58: Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
59: are acquiring the software on behalf of the Department of Defense, the
60: software shall be classified as "Commercial Computer Software" and the
61: Government shall have only "Restricted Rights" as defined in Clause
62: 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
63: authors grant the U.S. Government and others acting in its behalf
64: permission to use and distribute the software in accordance with the
65: terms specified in this license.
66: */
67:
68: #include <stdio.h>
69: #include <limits.h>
70: #include <ctype.h>
71: #include "php.h"
72: #include "php_variables.h"
73: #ifdef HAVE_LOCALE_H
74: #include <locale.h>
75: #endif
76: #include "zend_execute.h"
77: #include "zend_operators.h"
78: #include "zend_strtod.h"
79: #include "php_globals.h"
80: #include "basic_functions.h"
81: #include "scanf.h"
82:
83: /*
84: * Flag values used internally by [f|s]canf.
85: */
86: #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
87: #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
88: #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
89: #define SCAN_WIDTH 0x8 /* A width value was supplied. */
90:
91: #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
92: #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
93: #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
94: #define SCAN_XOK 0x80 /* An 'x' is allowed. */
95: #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
96: #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
97:
98: #define UCHAR(x) (zend_uchar)(x)
99:
100: /*
101: * The following structure contains the information associated with
102: * a character set.
103: */
104: typedef struct CharSet {
105: int exclude; /* 1 if this is an exclusion set. */
106: int nchars;
107: char *chars;
108: int nranges;
109: struct Range {
110: char start;
111: char end;
112: } *ranges;
113: } CharSet;
114:
115: /*
116: * Declarations for functions used only in this file.
117: */
118: static char *BuildCharSet(CharSet *cset, char *format);
119: static int CharInSet(CharSet *cset, int ch);
120: static void ReleaseCharSet(CharSet *cset);
121: static inline void scan_set_error_return(int numVars, zval **return_value);
122:
123:
124: /* {{{ BuildCharSet
125: *----------------------------------------------------------------------
126: *
127: * BuildCharSet --
128: *
129: * This function examines a character set format specification
130: * and builds a CharSet containing the individual characters and
131: * character ranges specified.
132: *
133: * Results:
134: * Returns the next format position.
135: *
136: * Side effects:
137: * Initializes the charset.
138: *
139: *----------------------------------------------------------------------
140: */
141: static char * BuildCharSet(CharSet *cset, char *format)
142: {
143: char *ch, start;
144: int nranges;
145: char *end;
146:
147: memset(cset, 0, sizeof(CharSet));
148:
149: ch = format;
150: if (*ch == '^') {
151: cset->exclude = 1;
152: ch = ++format;
153: }
154: end = format + 1; /* verify this - cc */
155:
156: /*
157: * Find the close bracket so we can overallocate the set.
158: */
159: if (*ch == ']') {
160: ch = end++;
161: }
162: nranges = 0;
163: while (*ch != ']') {
164: if (*ch == '-') {
165: nranges++;
166: }
167: ch = end++;
168: }
169:
170: cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
171: if (nranges > 0) {
172: cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
173: } else {
174: cset->ranges = NULL;
175: }
176:
177: /*
178: * Now build the character set.
179: */
180: cset->nchars = cset->nranges = 0;
181: ch = format++;
182: start = *ch;
183: if (*ch == ']' || *ch == '-') {
184: cset->chars[cset->nchars++] = *ch;
185: ch = format++;
186: }
187: while (*ch != ']') {
188: if (*format == '-') {
189: /*
190: * This may be the first character of a range, so don't add
191: * it yet.
192: */
193: start = *ch;
194: } else if (*ch == '-') {
195: /*
196: * Check to see if this is the last character in the set, in which
197: * case it is not a range and we should add the previous character
198: * as well as the dash.
199: */
200: if (*format == ']') {
201: cset->chars[cset->nchars++] = start;
202: cset->chars[cset->nchars++] = *ch;
203: } else {
204: ch = format++;
205:
206: /*
207: * Check to see if the range is in reverse order.
208: */
209: if (start < *ch) {
210: cset->ranges[cset->nranges].start = start;
211: cset->ranges[cset->nranges].end = *ch;
212: } else {
213: cset->ranges[cset->nranges].start = *ch;
214: cset->ranges[cset->nranges].end = start;
215: }
216: cset->nranges++;
217: }
218: } else {
219: cset->chars[cset->nchars++] = *ch;
220: }
221: ch = format++;
222: }
223: return format;
224: }
225: /* }}} */
226:
227: /* {{{ CharInSet
228: *----------------------------------------------------------------------
229: *
230: * CharInSet --
231: *
232: * Check to see if a character matches the given set.
233: *
234: * Results:
235: * Returns non-zero if the character matches the given set.
236: *
237: * Side effects:
238: * None.
239: *
240: *----------------------------------------------------------------------
241: */
242: static int CharInSet(CharSet *cset, int c)
243: {
244: char ch = (char) c;
245: int i, match = 0;
246:
247: for (i = 0; i < cset->nchars; i++) {
248: if (cset->chars[i] == ch) {
249: match = 1;
250: break;
251: }
252: }
253: if (!match) {
254: for (i = 0; i < cset->nranges; i++) {
255: if ((cset->ranges[i].start <= ch)
256: && (ch <= cset->ranges[i].end)) {
257: match = 1;
258: break;
259: }
260: }
261: }
262: return (cset->exclude ? !match : match);
263: }
264: /* }}} */
265:
266: /* {{{ ReleaseCharSet
267: *----------------------------------------------------------------------
268: *
269: * ReleaseCharSet --
270: *
271: * Free the storage associated with a character set.
272: *
273: * Results:
274: * None.
275: *
276: * Side effects:
277: * None.
278: *
279: *----------------------------------------------------------------------
280: */
281: static void ReleaseCharSet(CharSet *cset)
282: {
283: efree((char *)cset->chars);
284: if (cset->ranges) {
285: efree((char *)cset->ranges);
286: }
287: }
288: /* }}} */
289:
290: /* {{{ ValidateFormat
291: *----------------------------------------------------------------------
292: *
293: * ValidateFormat --
294: *
295: * Parse the format string and verify that it is properly formed
296: * and that there are exactly enough variables on the command line.
297: *
298: * Results:
299: * FAILURE or SUCCESS.
300: *
301: * Side effects:
302: * May set php_error based on abnormal conditions.
303: *
304: * Parameters :
305: * format The format string.
306: * numVars The number of variables passed to the scan command.
307: * totalSubs The number of variables that will be required.
308: *
309: *----------------------------------------------------------------------
310: */
311: PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
312: {
313: #define STATIC_LIST_SIZE 16
314: int gotXpg, gotSequential, value, i, flags;
315: char *end, *ch = NULL;
316: int staticAssign[STATIC_LIST_SIZE];
317: int *nassign = staticAssign;
318: int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
319: TSRMLS_FETCH();
320:
321: /*
322: * Initialize an array that records the number of times a variable
323: * is assigned to by the format string. We use this to detect if
324: * a variable is multiply assigned or left unassigned.
325: */
326: if (numVars > nspace) {
327: nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
328: nspace = numVars;
329: }
330: for (i = 0; i < nspace; i++) {
331: nassign[i] = 0;
332: }
333:
334: xpgSize = objIndex = gotXpg = gotSequential = 0;
335:
336: while (*format != '\0') {
337: ch = format++;
338: flags = 0;
339:
340: if (*ch != '%') {
341: continue;
342: }
343: ch = format++;
344: if (*ch == '%') {
345: continue;
346: }
347: if (*ch == '*') {
348: flags |= SCAN_SUPPRESS;
349: ch = format++;
350: goto xpgCheckDone;
351: }
352:
353: if ( isdigit( (int)*ch ) ) {
354: /*
355: * Check for an XPG3-style %n$ specification. Note: there
356: * must not be a mixture of XPG3 specs and non-XPG3 specs
357: * in the same format string.
358: */
359: value = strtoul(format-1, &end, 10);
360: if (*end != '$') {
361: goto notXpg;
362: }
363: format = end+1;
364: ch = format++;
365: gotXpg = 1;
366: if (gotSequential) {
367: goto mixedXPG;
368: }
369: objIndex = value - 1;
370: if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
371: goto badIndex;
372: } else if (numVars == 0) {
373: /*
374: * In the case where no vars are specified, the user can
375: * specify %9999$ legally, so we have to consider special
376: * rules for growing the assign array. 'value' is
377: * guaranteed to be > 0.
378: */
379:
380: /* set a lower artificial limit on this
381: * in the interest of security and resource friendliness
382: * 255 arguments should be more than enough. - cc
383: */
384: if (value > SCAN_MAX_ARGS) {
385: goto badIndex;
386: }
387:
388: xpgSize = (xpgSize > value) ? xpgSize : value;
389: }
390: goto xpgCheckDone;
391: }
392:
393: notXpg:
394: gotSequential = 1;
395: if (gotXpg) {
396: mixedXPG:
397: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
398: goto error;
399: }
400:
401: xpgCheckDone:
402: /*
403: * Parse any width specifier.
404: */
405: if (isdigit(UCHAR(*ch))) {
406: value = strtoul(format-1, &format, 10);
407: flags |= SCAN_WIDTH;
408: ch = format++;
409: }
410:
411: /*
412: * Ignore size specifier.
413: */
414: if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
415: ch = format++;
416: }
417:
418: if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
419: goto badIndex;
420: }
421:
422: /*
423: * Handle the various field types.
424: */
425: switch (*ch) {
426: case 'n':
427: case 'd':
428: case 'D':
429: case 'i':
430: case 'o':
431: case 'x':
432: case 'X':
433: case 'u':
434: case 'f':
435: case 'e':
436: case 'E':
437: case 'g':
438: case 's':
439: break;
440:
441: case 'c':
442: /* we differ here with the TCL implementation in allowing for */
443: /* a character width specification, to be more consistent with */
444: /* ANSI. since Zend auto allocates space for vars, this is no */
445: /* problem - cc */
446: /*
447: if (flags & SCAN_WIDTH) {
448: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion");
449: goto error;
450: }
451: */
452: break;
453:
454: case '[':
455: if (*format == '\0') {
456: goto badSet;
457: }
458: ch = format++;
459: if (*ch == '^') {
460: if (*format == '\0') {
461: goto badSet;
462: }
463: ch = format++;
464: }
465: if (*ch == ']') {
466: if (*format == '\0') {
467: goto badSet;
468: }
469: ch = format++;
470: }
471: while (*ch != ']') {
472: if (*format == '\0') {
473: goto badSet;
474: }
475: ch = format++;
476: }
477: break;
478: badSet:
479: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
480: goto error;
481:
482: default: {
483: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
484: goto error;
485: }
486: }
487:
488: if (!(flags & SCAN_SUPPRESS)) {
489: if (objIndex >= nspace) {
490: /*
491: * Expand the nassign buffer. If we are using XPG specifiers,
492: * make sure that we grow to a large enough size. xpgSize is
493: * guaranteed to be at least one larger than objIndex.
494: */
495: value = nspace;
496: if (xpgSize) {
497: nspace = xpgSize;
498: } else {
499: nspace += STATIC_LIST_SIZE;
500: }
501: if (nassign == staticAssign) {
502: nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
503: for (i = 0; i < STATIC_LIST_SIZE; ++i) {
504: nassign[i] = staticAssign[i];
505: }
506: } else {
507: nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
508: }
509: for (i = value; i < nspace; i++) {
510: nassign[i] = 0;
511: }
512: }
513: nassign[objIndex]++;
514: objIndex++;
515: }
516: } /* while (*format != '\0') */
517:
518: /*
519: * Verify that all of the variable were assigned exactly once.
520: */
521: if (numVars == 0) {
522: if (xpgSize) {
523: numVars = xpgSize;
524: } else {
525: numVars = objIndex;
526: }
527: }
528: if (totalSubs) {
529: *totalSubs = numVars;
530: }
531: for (i = 0; i < numVars; i++) {
532: if (nassign[i] > 1) {
533: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
534: goto error;
535: } else if (!xpgSize && (nassign[i] == 0)) {
536: /*
537: * If the space is empty, and xpgSize is 0 (means XPG wasn't
538: * used, and/or numVars != 0), then too many vars were given
539: */
540: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
541: goto error;
542: }
543: }
544:
545: if (nassign != staticAssign) {
546: efree((char *)nassign);
547: }
548: return SCAN_SUCCESS;
549:
550: badIndex:
551: if (gotXpg) {
552: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
553: } else {
554: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
555: }
556:
557: error:
558: if (nassign != staticAssign) {
559: efree((char *)nassign);
560: }
561: return SCAN_ERROR_INVALID_FORMAT;
562: #undef STATIC_LIST_SIZE
563: }
564: /* }}} */
565:
566: /* {{{ php_sscanf_internal
567: * This is the internal function which does processing on behalf of
568: * both sscanf() and fscanf()
569: *
570: * parameters :
571: * string literal string to be processed
572: * format format string
573: * argCount total number of elements in the args array
574: * args arguments passed in from user function (f|s)scanf
575: * varStart offset (in args) of 1st variable passed in to (f|s)scanf
576: * return_value set with the results of the scan
577: */
578:
579: PHPAPI int php_sscanf_internal( char *string, char *format,
580: int argCount, zval ***args,
581: int varStart, zval **return_value TSRMLS_DC)
582: {
583: int numVars, nconversions, totalVars = -1;
584: int i, result;
585: long value;
586: int objIndex;
587: char *end, *baseString;
588: zval **current;
589: char op = 0;
590: int base = 0;
591: int underflow = 0;
592: size_t width;
593: long (*fn)() = NULL;
594: char *ch, sch;
595: int flags;
596: char buf[64]; /* Temporary buffer to hold scanned number
597: * strings before they are passed to strtoul() */
598:
599: /* do some sanity checking */
600: if ((varStart > argCount) || (varStart < 0)){
601: varStart = SCAN_MAX_ARGS + 1;
602: }
603: numVars = argCount - varStart;
604: if (numVars < 0) {
605: numVars = 0;
606: }
607:
608: #if 0
609: zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
610: string, format, numVars, varStart);
611: #endif
612: /*
613: * Check for errors in the format string.
614: */
615: if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
616: scan_set_error_return( numVars, return_value );
617: return SCAN_ERROR_INVALID_FORMAT;
618: }
619:
620: objIndex = numVars ? varStart : 0;
621:
622: /*
623: * If any variables are passed, make sure they are all passed by reference
624: */
625: if (numVars) {
626: for (i = varStart;i < argCount;i++){
627: if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
628: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
629: scan_set_error_return(numVars, return_value);
630: return SCAN_ERROR_VAR_PASSED_BYVAL;
631: }
632: }
633: }
634:
635: /*
636: * Allocate space for the result objects. Only happens when no variables
637: * are specified
638: */
639: if (!numVars) {
640: zval *tmp;
641:
642: /* allocate an array for return */
643: array_init(*return_value);
644:
645: for (i = 0; i < totalVars; i++) {
646: MAKE_STD_ZVAL(tmp);
647: ZVAL_NULL(tmp);
648: if (add_next_index_zval(*return_value, tmp) == FAILURE) {
649: scan_set_error_return(0, return_value);
650: return FAILURE;
651: }
652: }
653: varStart = 0; /* Array index starts from 0 */
654: }
655:
656: baseString = string;
657:
658: /*
659: * Iterate over the format string filling in the result objects until
660: * we reach the end of input, the end of the format string, or there
661: * is a mismatch.
662: */
663: nconversions = 0;
664: /* note ! - we need to limit the loop for objIndex to keep it in bounds */
665:
666: while (*format != '\0') {
667: ch = format++;
668: flags = 0;
669:
670: /*
671: * If we see whitespace in the format, skip whitespace in the string.
672: */
673: if ( isspace( (int)*ch ) ) {
674: sch = *string;
675: while ( isspace( (int)sch ) ) {
676: if (*string == '\0') {
677: goto done;
678: }
679: string++;
680: sch = *string;
681: }
682: continue;
683: }
684:
685: if (*ch != '%') {
686: literal:
687: if (*string == '\0') {
688: underflow = 1;
689: goto done;
690: }
691: sch = *string;
692: string++;
693: if (*ch != sch) {
694: goto done;
695: }
696: continue;
697: }
698:
699: ch = format++;
700: if (*ch == '%') {
701: goto literal;
702: }
703:
704: /*
705: * Check for assignment suppression ('*') or an XPG3-style
706: * assignment ('%n$').
707: */
708: if (*ch == '*') {
709: flags |= SCAN_SUPPRESS;
710: ch = format++;
711: } else if ( isdigit(UCHAR(*ch))) {
712: value = strtoul(format-1, &end, 10);
713: if (*end == '$') {
714: format = end+1;
715: ch = format++;
716: objIndex = varStart + value - 1;
717: }
718: }
719:
720: /*
721: * Parse any width specifier.
722: */
723: if ( isdigit(UCHAR(*ch))) {
724: width = strtoul(format-1, &format, 10);
725: ch = format++;
726: } else {
727: width = 0;
728: }
729:
730: /*
731: * Ignore size specifier.
732: */
733: if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
734: ch = format++;
735: }
736:
737: /*
738: * Handle the various field types.
739: */
740: switch (*ch) {
741: case 'n':
742: if (!(flags & SCAN_SUPPRESS)) {
743: if (numVars && objIndex >= argCount) {
744: break;
745: } else if (numVars) {
746: zend_uint refcount;
747:
748: current = args[objIndex++];
749: refcount = Z_REFCOUNT_PP(current);
750: zval_dtor( *current );
751: ZVAL_LONG( *current, (long)(string - baseString) );
752: Z_SET_REFCOUNT_PP(current, refcount);
753: Z_SET_ISREF_PP(current);
754: } else {
755: add_index_long(*return_value, objIndex++, string - baseString);
756: }
757: }
758: nconversions++;
759: continue;
760:
761: case 'd':
762: case 'D':
763: op = 'i';
764: base = 10;
765: fn = (long (*)())strtol;
766: break;
767: case 'i':
768: op = 'i';
769: base = 0;
770: fn = (long (*)())strtol;
771: break;
772: case 'o':
773: op = 'i';
774: base = 8;
775: fn = (long (*)())strtol;
776: break;
777: case 'x':
778: case 'X':
779: op = 'i';
780: base = 16;
781: fn = (long (*)())strtol;
782: break;
783: case 'u':
784: op = 'i';
785: base = 10;
786: flags |= SCAN_UNSIGNED;
787: fn = (long (*)())strtoul;
788: break;
789:
790: case 'f':
791: case 'e':
792: case 'E':
793: case 'g':
794: op = 'f';
795: break;
796:
797: case 's':
798: op = 's';
799: break;
800:
801: case 'c':
802: op = 's';
803: flags |= SCAN_NOSKIP;
804: /*-cc-*/
805: if (0 == width) {
806: width = 1;
807: }
808: /*-cc-*/
809: break;
810: case '[':
811: op = '[';
812: flags |= SCAN_NOSKIP;
813: break;
814: } /* switch */
815:
816: /*
817: * At this point, we will need additional characters from the
818: * string to proceed.
819: */
820: if (*string == '\0') {
821: underflow = 1;
822: goto done;
823: }
824:
825: /*
826: * Skip any leading whitespace at the beginning of a field unless
827: * the format suppresses this behavior.
828: */
829: if (!(flags & SCAN_NOSKIP)) {
830: while (*string != '\0') {
831: sch = *string;
832: if (! isspace((int)sch) ) {
833: break;
834: }
835: string++;
836: }
837: if (*string == '\0') {
838: underflow = 1;
839: goto done;
840: }
841: }
842:
843: /*
844: * Perform the requested scanning operation.
845: */
846: switch (op) {
847: case 'c':
848: case 's':
849: /*
850: * Scan a string up to width characters or whitespace.
851: */
852: if (width == 0) {
853: width = (size_t) ~0;
854: }
855: end = string;
856: while (*end != '\0') {
857: sch = *end;
858: if ( isspace( (int)sch ) ) {
859: break;
860: }
861: end++;
862: if (--width == 0) {
863: break;
864: }
865: }
866: if (!(flags & SCAN_SUPPRESS)) {
867: if (numVars && objIndex >= argCount) {
868: break;
869: } else if (numVars) {
870: zend_uint refcount;
871:
872: current = args[objIndex++];
873: refcount = Z_REFCOUNT_PP(current);
874: zval_dtor( *current );
875: ZVAL_STRINGL( *current, string, end-string, 1);
876: Z_SET_REFCOUNT_PP(current, refcount);
877: Z_SET_ISREF_PP(current);
878: } else {
879: add_index_stringl( *return_value, objIndex++, string, end-string, 1);
880: }
881: }
882: string = end;
883: break;
884:
885: case '[': {
886: CharSet cset;
887:
888: if (width == 0) {
889: width = (size_t) ~0;
890: }
891: end = string;
892:
893: format = BuildCharSet(&cset, format);
894: while (*end != '\0') {
895: sch = *end;
896: if (!CharInSet(&cset, (int)sch)) {
897: break;
898: }
899: end++;
900: if (--width == 0) {
901: break;
902: }
903: }
904: ReleaseCharSet(&cset);
905:
906: if (string == end) {
907: /*
908: * Nothing matched the range, stop processing
909: */
910: goto done;
911: }
912: if (!(flags & SCAN_SUPPRESS)) {
913: if (numVars && objIndex >= argCount) {
914: break;
915: } else if (numVars) {
916: current = args[objIndex++];
917: zval_dtor( *current );
918: ZVAL_STRINGL( *current, string, end-string, 1);
919: } else {
920: add_index_stringl(*return_value, objIndex++, string, end-string, 1);
921: }
922: }
923: string = end;
924: break;
925: }
926: /*
927: case 'c':
928: / Scan a single character./
929:
930: sch = *string;
931: string++;
932: if (!(flags & SCAN_SUPPRESS)) {
933: if (numVars) {
934: char __buf[2];
935: __buf[0] = sch;
936: __buf[1] = '\0';;
937: current = args[objIndex++];
938: zval_dtor(*current);
939: ZVAL_STRINGL( *current, __buf, 1, 1);
940: } else {
941: add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
942: }
943: }
944: break;
945: */
946: case 'i':
947: /*
948: * Scan an unsigned or signed integer.
949: */
950: /*-cc-*/
951: buf[0] = '\0';
952: /*-cc-*/
953: if ((width == 0) || (width > sizeof(buf) - 1)) {
954: width = sizeof(buf) - 1;
955: }
956:
957: flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
958: for (end = buf; width > 0; width--) {
959: switch (*string) {
960: /*
961: * The 0 digit has special meaning at the beginning of
962: * a number. If we are unsure of the base, it
963: * indicates that we are in base 8 or base 16 (if it is
964: * followed by an 'x').
965: */
966: case '0':
967: /*-cc-*/
968: if (base == 16) {
969: flags |= SCAN_XOK;
970: }
971: /*-cc-*/
972: if (base == 0) {
973: base = 8;
974: flags |= SCAN_XOK;
975: }
976: if (flags & SCAN_NOZERO) {
977: flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
978: } else {
979: flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
980: }
981: goto addToInt;
982:
983: case '1': case '2': case '3': case '4':
984: case '5': case '6': case '7':
985: if (base == 0) {
986: base = 10;
987: }
988: flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
989: goto addToInt;
990:
991: case '8': case '9':
992: if (base == 0) {
993: base = 10;
994: }
995: if (base <= 8) {
996: break;
997: }
998: flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
999: goto addToInt;
1000:
1001: case 'A': case 'B': case 'C':
1002: case 'D': case 'E': case 'F':
1003: case 'a': case 'b': case 'c':
1004: case 'd': case 'e': case 'f':
1005: if (base <= 10) {
1006: break;
1007: }
1008: flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1009: goto addToInt;
1010:
1011: case '+': case '-':
1012: if (flags & SCAN_SIGNOK) {
1013: flags &= ~SCAN_SIGNOK;
1014: goto addToInt;
1015: }
1016: break;
1017:
1018: case 'x': case 'X':
1019: if ((flags & SCAN_XOK) && (end == buf+1)) {
1020: base = 16;
1021: flags &= ~SCAN_XOK;
1022: goto addToInt;
1023: }
1024: break;
1025: }
1026:
1027: /*
1028: * We got an illegal character so we are done accumulating.
1029: */
1030: break;
1031:
1032: addToInt:
1033: /*
1034: * Add the character to the temporary buffer.
1035: */
1036: *end++ = *string++;
1037: if (*string == '\0') {
1038: break;
1039: }
1040: }
1041:
1042: /*
1043: * Check to see if we need to back up because we only got a
1044: * sign or a trailing x after a 0.
1045: */
1046: if (flags & SCAN_NODIGITS) {
1047: if (*string == '\0') {
1048: underflow = 1;
1049: }
1050: goto done;
1051: } else if (end[-1] == 'x' || end[-1] == 'X') {
1052: end--;
1053: string--;
1054: }
1055:
1056: /*
1057: * Scan the value from the temporary buffer. If we are
1058: * returning a large unsigned value, we have to convert it back
1059: * to a string since PHP only supports signed values.
1060: */
1061: if (!(flags & SCAN_SUPPRESS)) {
1062: *end = '\0';
1063: value = (long) (*fn)(buf, NULL, base);
1064: if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1065: snprintf(buf, sizeof(buf), "%lu", value); /* INTL: ISO digit */
1066: if (numVars && objIndex >= argCount) {
1067: break;
1068: } else if (numVars) {
1069: /* change passed value type to string */
1070: current = args[objIndex++];
1071: zval_dtor(*current);
1072: ZVAL_STRING( *current, buf, 1 );
1073: } else {
1074: add_index_string(*return_value, objIndex++, buf, 1);
1075: }
1076: } else {
1077: if (numVars && objIndex >= argCount) {
1078: break;
1079: } else if (numVars) {
1080: current = args[objIndex++];
1081: zval_dtor(*current);
1082: ZVAL_LONG(*current, value);
1083: } else {
1084: add_index_long(*return_value, objIndex++, value);
1085: }
1086: }
1087: }
1088: break;
1089:
1090: case 'f':
1091: /*
1092: * Scan a floating point number
1093: */
1094: buf[0] = '\0'; /* call me pedantic */
1095: if ((width == 0) || (width > sizeof(buf) - 1)) {
1096: width = sizeof(buf) - 1;
1097: }
1098: flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1099: for (end = buf; width > 0; width--) {
1100: switch (*string) {
1101: case '0': case '1': case '2': case '3':
1102: case '4': case '5': case '6': case '7':
1103: case '8': case '9':
1104: flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1105: goto addToFloat;
1106: case '+':
1107: case '-':
1108: if (flags & SCAN_SIGNOK) {
1109: flags &= ~SCAN_SIGNOK;
1110: goto addToFloat;
1111: }
1112: break;
1113: case '.':
1114: if (flags & SCAN_PTOK) {
1115: flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1116: goto addToFloat;
1117: }
1118: break;
1119: case 'e':
1120: case 'E':
1121: /*
1122: * An exponent is not allowed until there has
1123: * been at least one digit.
1124: */
1125: if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1126: flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1127: | SCAN_SIGNOK | SCAN_NODIGITS;
1128: goto addToFloat;
1129: }
1130: break;
1131: }
1132:
1133: /*
1134: * We got an illegal character so we are done accumulating.
1135: */
1136: break;
1137:
1138: addToFloat:
1139: /*
1140: * Add the character to the temporary buffer.
1141: */
1142: *end++ = *string++;
1143: if (*string == '\0') {
1144: break;
1145: }
1146: }
1147:
1148: /*
1149: * Check to see if we need to back up because we saw a
1150: * trailing 'e' or sign.
1151: */
1152: if (flags & SCAN_NODIGITS) {
1153: if (flags & SCAN_EXPOK) {
1154: /*
1155: * There were no digits at all so scanning has
1156: * failed and we are done.
1157: */
1158: if (*string == '\0') {
1159: underflow = 1;
1160: }
1161: goto done;
1162: }
1163:
1164: /*
1165: * We got a bad exponent ('e' and maybe a sign).
1166: */
1167: end--;
1168: string--;
1169: if (*end != 'e' && *end != 'E') {
1170: end--;
1171: string--;
1172: }
1173: }
1174:
1175: /*
1176: * Scan the value from the temporary buffer.
1177: */
1178: if (!(flags & SCAN_SUPPRESS)) {
1179: double dvalue;
1180: *end = '\0';
1181: dvalue = zend_strtod(buf, NULL);
1182: if (numVars && objIndex >= argCount) {
1183: break;
1184: } else if (numVars) {
1185: current = args[objIndex++];
1186: zval_dtor(*current);
1187: ZVAL_DOUBLE(*current, dvalue);
1188: } else {
1189: add_index_double( *return_value, objIndex++, dvalue );
1190: }
1191: }
1192: break;
1193: } /* switch (op) */
1194: nconversions++;
1195: } /* while (*format != '\0') */
1196:
1197: done:
1198: result = SCAN_SUCCESS;
1199:
1200: if (underflow && (0==nconversions)) {
1201: scan_set_error_return( numVars, return_value );
1202: result = SCAN_ERROR_EOF;
1203: } else if (numVars) {
1204: convert_to_long( *return_value );
1205: Z_LVAL_PP(return_value) = nconversions;
1206: } else if (nconversions < totalVars) {
1207: /* TODO: not all elements converted. we need to prune the list - cc */
1208: }
1209: return result;
1210: }
1211: /* }}} */
1212:
1213: /* the compiler choked when i tried to make this a macro */
1214: static inline void scan_set_error_return(int numVars, zval **return_value) /* {{{ */
1215: {
1216: if (numVars) {
1217: Z_TYPE_PP(return_value) = IS_LONG;
1218: Z_LVAL_PP(return_value) = SCAN_ERROR_EOF; /* EOF marker */
1219: } else {
1220: /* convert_to_null calls destructor */
1221: convert_to_null( *return_value );
1222: }
1223: }
1224: /* }}} */
1225:
1226: /*
1227: * Local variables:
1228: * tab-width: 4
1229: * c-basic-offset: 4
1230: * End:
1231: * vim600: sw=4 ts=4 fdm=marker
1232: * vim<600: sw=4 ts=4
1233: */