Annotation of embedaddon/ntp/ntpd/ntp_scanner.c, revision 1.1.1.1
1.1 misho 1:
2: /* ntp_scanner.c
3: *
4: * The source code for a simple lexical analyzer.
5: *
6: * Written By: Sachin Kamboj
7: * University of Delaware
8: * Newark, DE 19711
9: * Copyright (c) 2006
10: */
11:
12: #ifdef HAVE_CONFIG_H
13: # include <config.h>
14: #endif
15:
16: #include <stdio.h>
17: #include <ctype.h>
18: #include <stdlib.h>
19: #include <errno.h>
20: #include <string.h>
21:
22: #include "ntp_config.h"
23: #include "ntpsim.h"
24: #include "ntp_scanner.h"
25: #include "ntp_parser.h"
26: #include "ntp_debug.h"
27:
28: /* ntp_keyword.h declares finite state machine and token text */
29: #include "ntp_keyword.h"
30:
31:
32:
33: /* SCANNER GLOBAL VARIABLES
34: * ------------------------
35: */
36:
37: #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */
38: char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */
39: extern int input_from_file;
40:
41:
42:
43:
44: /* CONSTANTS
45: * ---------
46: */
47:
48:
49: /* SCANNER GLOBAL VARIABLES
50: * ------------------------
51: */
52: const char special_chars[] = "{}(),;|=";
53:
54:
55: /* FUNCTIONS
56: * ---------
57: */
58:
59: int get_next_char(void);
60: static int is_keyword(char *lexeme, follby *pfollowedby);
61:
62:
63:
64: /*
65: * keyword() - Return the keyword associated with token T_ identifier.
66: * See also token_name() for the string-ized T_ identifier.
67: * Example: keyword(T_Server) returns "server"
68: * token_name(T_Server) returns "T_Server"
69: */
70: const char *
71: keyword(
72: int token
73: )
74: {
75: int i;
76: const char *text;
77:
78: i = token - LOWEST_KEYWORD_ID;
79:
80: if (i >= 0 && i < COUNTOF(keyword_text))
81: text = keyword_text[i];
82: else
83: text = NULL;
84:
85: return (text != NULL)
86: ? text
87: : "(keyword not found)";
88: }
89:
90:
91: /* FILE INTERFACE
92: * --------------
93: * We define a couple of wrapper functions around the standard C fgetc
94: * and ungetc functions in order to include positional bookkeeping
95: */
96:
97: struct FILE_INFO *
98: F_OPEN(
99: const char *path,
100: const char *mode
101: )
102: {
103: struct FILE_INFO *my_info;
104:
105: my_info = emalloc(sizeof *my_info);
106:
107: my_info->line_no = 1;
108: my_info->col_no = 0;
109: my_info->prev_line_col_no = 0;
110: my_info->prev_token_col_no = 0;
111: my_info->fname = path;
112:
113: my_info->fd = fopen(path, mode);
114: if (NULL == my_info->fd) {
115: free(my_info);
116: return NULL;
117: }
118: return my_info;
119: }
120:
121: int
122: FGETC(
123: struct FILE_INFO *stream
124: )
125: {
126: int ch = fgetc(stream->fd);
127:
128: ++stream->col_no;
129: if (ch == '\n') {
130: stream->prev_line_col_no = stream->col_no;
131: ++stream->line_no;
132: stream->col_no = 1;
133: }
134: return ch;
135: }
136:
137: /* BUGS: 1. Function will fail on more than one line of pushback
138: * 2. No error checking is done to see if ungetc fails
139: * SK: I don't think its worth fixing these bugs for our purposes ;-)
140: */
141: int
142: UNGETC(
143: int ch,
144: struct FILE_INFO *stream
145: )
146: {
147: if (ch == '\n') {
148: stream->col_no = stream->prev_line_col_no;
149: stream->prev_line_col_no = -1;
150: --stream->line_no;
151: }
152: --stream->col_no;
153: return ungetc(ch, stream->fd);
154: }
155:
156: int
157: FCLOSE(
158: struct FILE_INFO *stream
159: )
160: {
161: int ret_val = fclose(stream->fd);
162:
163: if (!ret_val)
164: free(stream);
165: return ret_val;
166: }
167:
168: /* STREAM INTERFACE
169: * ----------------
170: * Provide a wrapper for the stream functions so that the
171: * stream can either read from a file or from a character
172: * array.
173: * NOTE: This is not very efficient for reading from character
174: * arrays, but needed to allow remote configuration where the
175: * configuration command is provided through ntpq.
176: *
177: * The behavior of there two functions is determined by the
178: * input_from_file flag.
179: */
180:
181: int
182: get_next_char(
183: void
184: )
185: {
186: char ch;
187:
188: if (input_from_file)
189: return FGETC(ip_file);
190: else {
191: if (remote_config.buffer[remote_config.pos] == '\0')
192: return EOF;
193: else {
194: ip_file->col_no++;
195: ch = remote_config.buffer[remote_config.pos++];
196: if (ch == '\n') {
197: ip_file->prev_line_col_no = ip_file->col_no;
198: ++ip_file->line_no;
199: ip_file->col_no = 1;
200: }
201: return ch;
202: }
203: }
204: }
205:
206: void
207: push_back_char(
208: int ch
209: )
210: {
211: if (input_from_file)
212: UNGETC(ch, ip_file);
213: else {
214: if (ch == '\n') {
215: ip_file->col_no = ip_file->prev_line_col_no;
216: ip_file->prev_line_col_no = -1;
217: --ip_file->line_no;
218: }
219: --ip_file->col_no;
220:
221: remote_config.pos--;
222: }
223: }
224:
225:
226:
227: /* STATE MACHINES
228: * --------------
229: */
230:
231: /* Keywords */
232: static int
233: is_keyword(
234: char *lexeme,
235: follby *pfollowedby
236: )
237: {
238: follby fb;
239: int curr_s; /* current state index */
240: int token;
241: int i;
242:
243: curr_s = SCANNER_INIT_S;
244: token = 0;
245:
246: for (i = 0; lexeme[i]; i++) {
247: while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
248: curr_s = SS_OTHER_N(sst[curr_s]);
249:
250: if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
251: if ('\0' == lexeme[i + 1]
252: && FOLLBY_NON_ACCEPTING
253: != SS_FB(sst[curr_s])) {
254: fb = SS_FB(sst[curr_s]);
255: *pfollowedby = fb;
256: token = curr_s;
257: break;
258: }
259: curr_s = SS_MATCH_N(sst[curr_s]);
260: } else
261: break;
262: }
263:
264: return token;
265: }
266:
267:
268: /* Integer */
269: static int
270: is_integer(
271: char *lexeme
272: )
273: {
274: int i = 0;
275:
276: /* Allow a leading minus sign */
277: if (lexeme[i] == '-')
278: ++i;
279:
280: /* Check that all the remaining characters are digits */
281: for (; lexeme[i]; ++i) {
282: if (!isdigit(lexeme[i]))
283: return 0;
284: }
285: return 1;
286: }
287:
288:
289: /* Double */
290: static int
291: is_double(
292: char *lexeme
293: )
294: {
295: u_int num_digits = 0; /* Number of digits read */
296: u_int i;
297:
298: i = 0;
299:
300: /* Check for an optional '+' or '-' */
301: if ('+' == lexeme[i] || '-' == lexeme[i])
302: i++;
303:
304: /* Read the integer part */
305: for (; lexeme[i] && isdigit(lexeme[i]); i++)
306: num_digits++;
307:
308: /* Check for the required decimal point */
309: if ('.' == lexeme[i])
310: i++;
311: else
312: return 0;
313:
314: /* Check for any digits after the decimal point */
315: for (; lexeme[i] && isdigit(lexeme[i]); i++)
316: num_digits++;
317:
318: /*
319: * The number of digits in both the decimal part and the
320: * fraction part must not be zero at this point
321: */
322: if (!num_digits)
323: return 0;
324:
325: /* Check if we are done */
326: if (!lexeme[i])
327: return 1;
328:
329: /* There is still more input, read the exponent */
330: if ('e' == tolower(lexeme[i]))
331: i++;
332: else
333: return 0;
334:
335: /* Read an optional Sign */
336: if ('+' == lexeme[i] || '-' == lexeme[i])
337: i++;
338:
339: /* Now read the exponent part */
340: while (lexeme[i] && isdigit(lexeme[i]))
341: i++;
342:
343: /* Check if we are done */
344: if (!lexeme[i])
345: return 1;
346: else
347: return 0;
348: }
349:
350:
351: /* is_special() - Test whether a character is a token */
352: static inline int
353: is_special(
354: int ch
355: )
356: {
357: return (int)strchr(special_chars, ch);
358: }
359:
360:
361: static int
362: is_EOC(
363: int ch
364: )
365: {
366: if ((old_config_style && (ch == '\n')) ||
367: (!old_config_style && (ch == ';')))
368: return 1;
369: return 0;
370: }
371:
372:
373: char *
374: quote_if_needed(char *str)
375: {
376: char *ret;
377: size_t len;
378: size_t octets;
379:
380: len = strlen(str);
381: octets = len + 2 + 1;
382: ret = emalloc(octets);
383: if ('"' != str[0]
384: && (strcspn(str, special_chars) < len
385: || strchr(str, ' ') != NULL)) {
386: snprintf(ret, octets, "\"%s\"", str);
387: } else
388: strncpy(ret, str, octets);
389:
390: return ret;
391: }
392:
393:
394: static int
395: create_string_token(
396: char *lexeme
397: )
398: {
399: char *pch;
400:
401: /*
402: * ignore end of line whitespace
403: */
404: pch = lexeme;
405: while (*pch && isspace(*pch))
406: pch++;
407:
408: if (!*pch) {
409: yylval.Integer = T_EOC;
410: return yylval.Integer;
411: }
412:
413: yylval.String = estrdup(lexeme);
414: return T_String;
415: }
416:
417:
418: /*
419: * yylex() - function that does the actual scanning.
420: * Bison expects this function to be called yylex and for it to take no
421: * input and return an int.
422: * Conceptually yylex "returns" yylval as well as the actual return
423: * value representing the token or type.
424: */
425: int
426: yylex(
427: void
428: )
429: {
430: int i, instring = 0;
431: int yylval_was_set = 0;
432: int token; /* The return value/the recognized token */
433: int ch;
434: static follby followedby = FOLLBY_TOKEN;
435:
436: do {
437: /* Ignore whitespace at the beginning */
438: while (EOF != (ch = get_next_char()) &&
439: isspace(ch) &&
440: !is_EOC(ch))
441: ; /* Null Statement */
442:
443: if (EOF == ch) {
444:
445: if (!input_from_file || !curr_include_level)
446: return 0;
447:
448: FCLOSE(fp[curr_include_level]);
449: ip_file = fp[--curr_include_level];
450: token = T_EOC;
451: goto normal_return;
452:
453: } else if (is_EOC(ch)) {
454:
455: /* end FOLLBY_STRINGS_TO_EOC effect */
456: followedby = FOLLBY_TOKEN;
457: token = T_EOC;
458: goto normal_return;
459:
460: } else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
461: /* special chars are their own token values */
462: token = ch;
463: /*
464: * '=' implies a single string following as in:
465: * setvar Owner = "The Boss" default
466: * This could alternatively be handled by
467: * removing '=' from special_chars and adding
468: * it to the keyword table.
469: */
470: if ('=' == ch)
471: followedby = FOLLBY_STRING;
472: yytext[0] = (char)ch;
473: yytext[1] = '\0';
474: goto normal_return;
475: } else
476: push_back_char(ch);
477:
478: /* save the position of start of the token */
479: ip_file->prev_token_line_no = ip_file->line_no;
480: ip_file->prev_token_col_no = ip_file->col_no;
481:
482: /* Read in the lexeme */
483: i = 0;
484: while (EOF != (ch = get_next_char())) {
485:
486: yytext[i] = (char)ch;
487:
488: /* Break on whitespace or a special character */
489: if (isspace(ch) || is_EOC(ch)
490: || '"' == ch
491: || (FOLLBY_TOKEN == followedby
492: && is_special(ch)))
493: break;
494:
495: /* Read the rest of the line on reading a start
496: of comment character */
497: if ('#' == ch) {
498: while (EOF != (ch = get_next_char())
499: && '\n' != ch)
500: ; /* Null Statement */
501: break;
502: }
503:
504: i++;
505: if (i >= COUNTOF(yytext))
506: goto lex_too_long;
507: }
508: /* Pick up all of the string inside between " marks, to
509: * end of line. If we make it to EOL without a
510: * terminating " assume it for them.
511: *
512: * XXX - HMS: I'm not sure we want to assume the closing "
513: */
514: if ('"' == ch) {
515: instring = 1;
516: while (EOF != (ch = get_next_char()) &&
517: ch != '"' && ch != '\n') {
518: yytext[i++] = (char)ch;
519: if (i >= COUNTOF(yytext))
520: goto lex_too_long;
521: }
522: /*
523: * yytext[i] will be pushed back as not part of
524: * this lexeme, but any closing quote should
525: * not be pushed back, so we read another char.
526: */
527: if ('"' == ch)
528: ch = get_next_char();
529: }
530: /* Pushback the last character read that is not a part
531: * of this lexeme.
532: * If the last character read was an EOF, pushback a
533: * newline character. This is to prevent a parse error
534: * when there is no newline at the end of a file.
535: */
536: if (EOF == ch)
537: push_back_char('\n');
538: else
539: push_back_char(ch);
540: yytext[i] = '\0';
541: } while (i == 0);
542:
543: /* Now return the desired token */
544:
545: /* First make sure that the parser is *not* expecting a string
546: * as the next token (based on the previous token that was
547: * returned) and that we haven't read a string.
548: */
549:
550: if (followedby == FOLLBY_TOKEN && !instring) {
551: token = is_keyword(yytext, &followedby);
552: if (token)
553: goto normal_return;
554: else if (is_integer(yytext)) {
555: yylval_was_set = 1;
556: errno = 0;
557: if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
558: && ((errno == EINVAL) || (errno == ERANGE))) {
559: msyslog(LOG_ERR,
560: "Integer cannot be represented: %s",
561: yytext);
562: exit(1);
563: } else {
564: token = T_Integer;
565: goto normal_return;
566: }
567: }
568: else if (is_double(yytext)) {
569: yylval_was_set = 1;
570: errno = 0;
571: if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
572: msyslog(LOG_ERR,
573: "Double too large to represent: %s",
574: yytext);
575: exit(1);
576: } else {
577: token = T_Double;
578: goto normal_return;
579: }
580: } else {
581: /* Default: Everything is a string */
582: yylval_was_set = 1;
583: token = create_string_token(yytext);
584: goto normal_return;
585: }
586: }
587:
588: /*
589: * Either followedby is not FOLLBY_TOKEN or this lexeme is part
590: * of a string. Hence, we need to return T_String.
591: *
592: * _Except_ we might have a -4 or -6 flag on a an association
593: * configuration line (server, peer, pool, etc.).
594: *
595: * This is a terrible hack, but the grammar is ambiguous so we
596: * don't have a choice. [SK]
597: *
598: * The ambiguity is in the keyword scanner, not ntp_parser.y.
599: * We do not require server addresses be quoted in ntp.conf,
600: * complicating the scanner's job. To avoid trying (and
601: * failing) to match an IP address or DNS name to a keyword,
602: * the association keywords use FOLLBY_STRING in the keyword
603: * table, which tells the scanner to force the next token to be
604: * a T_String, so it does not try to match a keyword but rather
605: * expects a string when -4/-6 modifiers to server, peer, etc.
606: * are encountered.
607: * restrict -4 and restrict -6 parsing works correctly without
608: * this hack, as restrict uses FOLLBY_TOKEN. [DH]
609: */
610: if ('-' == yytext[0]) {
611: if ('4' == yytext[1]) {
612: token = T_Ipv4_flag;
613: goto normal_return;
614: } else if ('6' == yytext[1]) {
615: token = T_Ipv6_flag;
616: goto normal_return;
617: }
618: }
619:
620: instring = 0;
621: if (FOLLBY_STRING == followedby)
622: followedby = FOLLBY_TOKEN;
623:
624: yylval_was_set = 1;
625: token = create_string_token(yytext);
626:
627: normal_return:
628: if (T_EOC == token)
629: DPRINTF(4,("\t<end of command>\n"));
630: else
631: DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
632: token_name(token)));
633:
634: if (!yylval_was_set)
635: yylval.Integer = token;
636:
637: return token;
638:
639: lex_too_long:
640: yytext[min(sizeof(yytext) - 1, 50)] = 0;
641: msyslog(LOG_ERR,
642: "configuration item on line %d longer than limit of %lu, began with '%s'",
643: ip_file->line_no, (u_long)(sizeof(yytext) - 1), yytext);
644:
645: /*
646: * If we hit the length limit reading the startup configuration
647: * file, abort.
648: */
649: if (input_from_file)
650: exit(sizeof(yytext) - 1);
651:
652: /*
653: * If it's runtime configuration via ntpq :config treat it as
654: * if the configuration text ended before the too-long lexeme,
655: * hostname, or string.
656: */
657: yylval.Integer = 0;
658: return 0;
659: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>