Annotation of embedaddon/php/ext/ereg/regex/split.c, revision 1.1.1.1
1.1 misho 1: #include <stdio.h>
2: #include <string.h>
3:
4: /*
5: - split - divide a string into fields, like awk split()
6: = int split(char *string, char *fields[], int nfields, char *sep);
7: */
8: int /* number of fields, including overflow */
9: split(string, fields, nfields, sep)
10: char *string;
11: char *fields[]; /* list is not NULL-terminated */
12: int nfields; /* number of entries available in fields[] */
13: char *sep; /* "" white, "c" single char, "ab" [ab]+ */
14: {
15: register char *p = string;
16: register char c; /* latest character */
17: register char sepc = sep[0];
18: register char sepc2;
19: register int fn;
20: register char **fp = fields;
21: register char *sepp;
22: register int trimtrail;
23:
24: /* white space */
25: if (sepc == '\0') {
26: while ((c = *p++) == ' ' || c == '\t')
27: continue;
28: p--;
29: trimtrail = 1;
30: sep = " \t"; /* note, code below knows this is 2 long */
31: sepc = ' ';
32: } else
33: trimtrail = 0;
34: sepc2 = sep[1]; /* now we can safely pick this up */
35:
36: /* catch empties */
37: if (*p == '\0')
38: return(0);
39:
40: /* single separator */
41: if (sepc2 == '\0') {
42: fn = nfields;
43: for (;;) {
44: *fp++ = p;
45: fn--;
46: if (fn == 0)
47: break;
48: while ((c = *p++) != sepc)
49: if (c == '\0')
50: return(nfields - fn);
51: *(p-1) = '\0';
52: }
53: /* we have overflowed the fields vector -- just count them */
54: fn = nfields;
55: for (;;) {
56: while ((c = *p++) != sepc)
57: if (c == '\0')
58: return(fn);
59: fn++;
60: }
61: /* not reached */
62: }
63:
64: /* two separators */
65: if (sep[2] == '\0') {
66: fn = nfields;
67: for (;;) {
68: *fp++ = p;
69: fn--;
70: while ((c = *p++) != sepc && c != sepc2)
71: if (c == '\0') {
72: if (trimtrail && **(fp-1) == '\0')
73: fn++;
74: return(nfields - fn);
75: }
76: if (fn == 0)
77: break;
78: *(p-1) = '\0';
79: while ((c = *p++) == sepc || c == sepc2)
80: continue;
81: p--;
82: }
83: /* we have overflowed the fields vector -- just count them */
84: fn = nfields;
85: while (c != '\0') {
86: while ((c = *p++) == sepc || c == sepc2)
87: continue;
88: p--;
89: fn++;
90: while ((c = *p++) != '\0' && c != sepc && c != sepc2)
91: continue;
92: }
93: /* might have to trim trailing white space */
94: if (trimtrail) {
95: p--;
96: while ((c = *--p) == sepc || c == sepc2)
97: continue;
98: p++;
99: if (*p != '\0') {
100: if (fn == nfields+1)
101: *p = '\0';
102: fn--;
103: }
104: }
105: return(fn);
106: }
107:
108: /* n separators */
109: fn = 0;
110: for (;;) {
111: if (fn < nfields)
112: *fp++ = p;
113: fn++;
114: for (;;) {
115: c = *p++;
116: if (c == '\0')
117: return(fn);
118: sepp = sep;
119: while ((sepc = *sepp++) != '\0' && sepc != c)
120: continue;
121: if (sepc != '\0') /* it was a separator */
122: break;
123: }
124: if (fn < nfields)
125: *(p-1) = '\0';
126: for (;;) {
127: c = *p++;
128: sepp = sep;
129: while ((sepc = *sepp++) != '\0' && sepc != c)
130: continue;
131: if (sepc == '\0') /* it wasn't a separator */
132: break;
133: }
134: p--;
135: }
136:
137: /* not reached */
138: }
139:
140: #ifdef TEST_SPLIT
141:
142:
143: /*
144: * test program
145: * pgm runs regression
146: * pgm sep splits stdin lines by sep
147: * pgm str sep splits str by sep
148: * pgm str sep n splits str by sep n times
149: */
150: int
151: main(argc, argv)
152: int argc;
153: char *argv[];
154: {
155: char buf[512];
156: register int n;
157: # define MNF 10
158: char *fields[MNF];
159:
160: if (argc > 4)
161: for (n = atoi(argv[3]); n > 0; n--) {
162: (void) strcpy(buf, argv[1]);
163: }
164: else if (argc > 3)
165: for (n = atoi(argv[3]); n > 0; n--) {
166: (void) strcpy(buf, argv[1]);
167: (void) split(buf, fields, MNF, argv[2]);
168: }
169: else if (argc > 2)
170: dosplit(argv[1], argv[2]);
171: else if (argc > 1)
172: while (fgets(buf, sizeof(buf), stdin) != NULL) {
173: buf[strlen(buf)-1] = '\0'; /* stomp newline */
174: dosplit(buf, argv[1]);
175: }
176: else
177: regress();
178:
179: exit(0);
180: }
181:
182: dosplit(string, seps)
183: char *string;
184: char *seps;
185: {
186: # define NF 5
187: char *fields[NF];
188: register int nf;
189:
190: nf = split(string, fields, NF, seps);
191: print(nf, NF, fields);
192: }
193:
194: print(nf, nfp, fields)
195: int nf;
196: int nfp;
197: char *fields[];
198: {
199: register int fn;
200: register int bound;
201:
202: bound = (nf > nfp) ? nfp : nf;
203: printf("%d:\t", nf);
204: for (fn = 0; fn < bound; fn++)
205: printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
206: }
207:
208: #define RNF 5 /* some table entries know this */
209: struct {
210: char *str;
211: char *seps;
212: int nf;
213: char *fi[RNF];
214: } tests[] = {
215: "", " ", 0, { "" },
216: " ", " ", 2, { "", "" },
217: "x", " ", 1, { "x" },
218: "xy", " ", 1, { "xy" },
219: "x y", " ", 2, { "x", "y" },
220: "abc def g ", " ", 5, { "abc", "def", "", "g", "" },
221: " a bcd", " ", 4, { "", "", "a", "bcd" },
222: "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
223: " a b c d ", " ", 6, { "", "a", "b", "c", "d " },
224:
225: "", " _", 0, { "" },
226: " ", " _", 2, { "", "" },
227: "x", " _", 1, { "x" },
228: "x y", " _", 2, { "x", "y" },
229: "ab _ cd", " _", 2, { "ab", "cd" },
230: " a_b c ", " _", 5, { "", "a", "b", "c", "" },
231: "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" },
232: " a b c d ", " _", 6, { "", "a", "b", "c", "d " },
233:
234: "", " _~", 0, { "" },
235: " ", " _~", 2, { "", "" },
236: "x", " _~", 1, { "x" },
237: "x y", " _~", 2, { "x", "y" },
238: "ab _~ cd", " _~", 2, { "ab", "cd" },
239: " a_b c~", " _~", 5, { "", "a", "b", "c", "" },
240: "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" },
241: "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " },
242:
243: "", " _~-", 0, { "" },
244: " ", " _~-", 2, { "", "" },
245: "x", " _~-", 1, { "x" },
246: "x y", " _~-", 2, { "x", "y" },
247: "ab _~- cd", " _~-", 2, { "ab", "cd" },
248: " a_b c~", " _~-", 5, { "", "a", "b", "c", "" },
249: "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" },
250: "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " },
251:
252: "", " ", 0, { "" },
253: " ", " ", 2, { "", "" },
254: "x", " ", 1, { "x" },
255: "xy", " ", 1, { "xy" },
256: "x y", " ", 2, { "x", "y" },
257: "abc def g ", " ", 4, { "abc", "def", "g", "" },
258: " a bcd", " ", 3, { "", "a", "bcd" },
259: "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
260: " a b c d ", " ", 6, { "", "a", "b", "c", "d " },
261:
262: "", "", 0, { "" },
263: " ", "", 0, { "" },
264: "x", "", 1, { "x" },
265: "xy", "", 1, { "xy" },
266: "x y", "", 2, { "x", "y" },
267: "abc def g ", "", 3, { "abc", "def", "g" },
268: "\t a bcd", "", 2, { "a", "bcd" },
269: " a \tb\t c ", "", 3, { "a", "b", "c" },
270: "a b c d e ", "", 5, { "a", "b", "c", "d", "e" },
271: "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" },
272: " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " },
273:
274: NULL, NULL, 0, { NULL },
275: };
276:
277: regress()
278: {
279: char buf[512];
280: register int n;
281: char *fields[RNF+1];
282: register int nf;
283: register int i;
284: register int printit;
285: register char *f;
286:
287: for (n = 0; tests[n].str != NULL; n++) {
288: (void) strcpy(buf, tests[n].str);
289: fields[RNF] = NULL;
290: nf = split(buf, fields, RNF, tests[n].seps);
291: printit = 0;
292: if (nf != tests[n].nf) {
293: printf("split `%s' by `%s' gave %d fields, not %d\n",
294: tests[n].str, tests[n].seps, nf, tests[n].nf);
295: printit = 1;
296: } else if (fields[RNF] != NULL) {
297: printf("split() went beyond array end\n");
298: printit = 1;
299: } else {
300: for (i = 0; i < nf && i < RNF; i++) {
301: f = fields[i];
302: if (f == NULL)
303: f = "(NULL)";
304: if (strcmp(f, tests[n].fi[i]) != 0) {
305: printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
306: tests[n].str, tests[n].seps,
307: i, fields[i], tests[n].fi[i]);
308: printit = 1;
309: }
310: }
311: }
312: if (printit)
313: print(nf, RNF, fields);
314: }
315: }
316: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>