File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / ntp / ntpd / ntp_scanner.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue May 29 12:08:38 2012 UTC (12 years, 2 months ago) by misho
Branches: ntp, MAIN
CVS tags: v4_2_6p5p0, v4_2_6p5, HEAD
ntp 4.2.6p5

    1: 
    2: /* ntp_scanner.c
    3:  *
    4:  * The source code for a simple lexical analyzer. 
    5:  *
    6:  * Written By:	Sachin Kamboj
    7:  *		University of Delaware
    8:  *		Newark, DE 19711
    9:  * Copyright (c) 2006
   10:  */
   11: 
   12: #ifdef HAVE_CONFIG_H
   13: # include <config.h>
   14: #endif
   15: 
   16: #include <stdio.h>
   17: #include <ctype.h>
   18: #include <stdlib.h>
   19: #include <errno.h>
   20: #include <string.h>
   21: 
   22: #include "ntp_config.h"
   23: #include "ntpsim.h"
   24: #include "ntp_scanner.h"
   25: #include "ntp_parser.h"
   26: #include "ntp_debug.h"
   27: 
   28: /* ntp_keyword.h declares finite state machine and token text */
   29: #include "ntp_keyword.h"
   30: 
   31: 
   32: 
   33: /* SCANNER GLOBAL VARIABLES 
   34:  * ------------------------
   35:  */
   36: 
   37: #define MAX_LEXEME (1024 + 1)	/* The maximum size of a lexeme */
   38: char yytext[MAX_LEXEME];	/* Buffer for storing the input text/lexeme */
   39: extern int input_from_file;
   40: 
   41: 
   42: 
   43: 
   44: /* CONSTANTS 
   45:  * ---------
   46:  */
   47: 
   48: 
   49: /* SCANNER GLOBAL VARIABLES 
   50:  * ------------------------
   51:  */
   52: const char special_chars[] = "{}(),;|=";
   53: 
   54: 
   55: /* FUNCTIONS
   56:  * ---------
   57:  */
   58: 
   59: int get_next_char(void);
   60: static int is_keyword(char *lexeme, follby *pfollowedby);
   61: 
   62: 
   63: 
   64: /*
   65:  * keyword() - Return the keyword associated with token T_ identifier.
   66:  *	       See also token_name() for the string-ized T_ identifier.
   67:  *	       Example: keyword(T_Server) returns "server"
   68:  *			token_name(T_Server) returns "T_Server"
   69:  */
   70: const char *
   71: keyword(
   72: 	int token
   73: 	)
   74: {
   75: 	int i;
   76: 	const char *text;
   77: 
   78: 	i = token - LOWEST_KEYWORD_ID;
   79: 
   80: 	if (i >= 0 && i < COUNTOF(keyword_text))
   81: 		text = keyword_text[i];
   82: 	else
   83: 		text = NULL;
   84: 
   85: 	return (text != NULL)
   86: 		   ? text
   87: 		   : "(keyword not found)";
   88: }
   89: 
   90: 
   91: /* FILE INTERFACE
   92:  * --------------
   93:  * We define a couple of wrapper functions around the standard C fgetc
   94:  * and ungetc functions in order to include positional bookkeeping
   95:  */
   96: 
   97: struct FILE_INFO *
   98: F_OPEN(
   99: 	const char *path,
  100: 	const char *mode
  101: 	)
  102: {
  103: 	struct FILE_INFO *my_info;
  104: 
  105: 	my_info = emalloc(sizeof *my_info);
  106: 
  107: 	my_info->line_no = 1;
  108: 	my_info->col_no = 0;
  109: 	my_info->prev_line_col_no = 0;
  110: 	my_info->prev_token_col_no = 0;
  111: 	my_info->fname = path;
  112: 
  113: 	my_info->fd = fopen(path, mode);
  114: 	if (NULL == my_info->fd) {
  115: 		free(my_info);
  116: 		return NULL;
  117: 	}
  118: 	return my_info;
  119: }
  120: 
  121: int
  122: FGETC(
  123: 	struct FILE_INFO *stream
  124: 	)
  125: {
  126: 	int ch = fgetc(stream->fd);
  127: 
  128: 	++stream->col_no;
  129: 	if (ch == '\n') {
  130: 		stream->prev_line_col_no = stream->col_no;
  131: 		++stream->line_no;
  132: 		stream->col_no = 1;
  133: 	}
  134: 	return ch;
  135: }
  136: 
  137: /* BUGS: 1. Function will fail on more than one line of pushback
  138:  *       2. No error checking is done to see if ungetc fails
  139:  * SK: I don't think its worth fixing these bugs for our purposes ;-)
  140:  */
  141: int
  142: UNGETC(
  143: 	int ch,
  144: 	struct FILE_INFO *stream
  145: 	)
  146: {
  147: 	if (ch == '\n') {
  148: 		stream->col_no = stream->prev_line_col_no;
  149: 		stream->prev_line_col_no = -1;
  150: 		--stream->line_no;
  151: 	}
  152: 	--stream->col_no;
  153: 	return ungetc(ch, stream->fd);
  154: }
  155: 
  156: int
  157: FCLOSE(
  158: 	struct FILE_INFO *stream
  159: 	)
  160: {
  161: 	int ret_val = fclose(stream->fd);
  162: 
  163: 	if (!ret_val)
  164: 		free(stream);
  165: 	return ret_val;
  166: }
  167: 
  168: /* STREAM INTERFACE 
  169:  * ----------------
  170:  * Provide a wrapper for the stream functions so that the
  171:  * stream can either read from a file or from a character
  172:  * array. 
  173:  * NOTE: This is not very efficient for reading from character
  174:  * arrays, but needed to allow remote configuration where the
  175:  * configuration command is provided through ntpq.
  176:  * 
  177:  * The behavior of there two functions is determined by the 
  178:  * input_from_file flag.
  179:  */
  180: 
  181: int
  182: get_next_char(
  183: 	void
  184: 	)
  185: {
  186: 	char ch;
  187: 
  188: 	if (input_from_file)
  189: 		return FGETC(ip_file);
  190: 	else {
  191: 		if (remote_config.buffer[remote_config.pos] == '\0') 
  192: 			return EOF;
  193: 		else {
  194: 			ip_file->col_no++;
  195: 			ch = remote_config.buffer[remote_config.pos++];
  196: 			if (ch == '\n') {
  197: 				ip_file->prev_line_col_no = ip_file->col_no;
  198: 				++ip_file->line_no;
  199: 				ip_file->col_no = 1;
  200: 			}
  201: 			return ch;
  202: 		}
  203: 	}
  204: }
  205: 
  206: void
  207: push_back_char(
  208: 	int ch
  209: 	)
  210: {
  211: 	if (input_from_file)
  212: 		UNGETC(ch, ip_file);
  213: 	else {
  214: 		if (ch == '\n') {
  215: 			ip_file->col_no = ip_file->prev_line_col_no;
  216: 			ip_file->prev_line_col_no = -1;
  217: 			--ip_file->line_no;
  218: 		}
  219: 		--ip_file->col_no;
  220: 
  221: 		remote_config.pos--;
  222: 	}
  223: }
  224: 
  225:  
  226: 
  227: /* STATE MACHINES 
  228:  * --------------
  229:  */
  230: 
  231: /* Keywords */
  232: static int
  233: is_keyword(
  234: 	char *lexeme,
  235: 	follby *pfollowedby
  236: 	)
  237: {
  238: 	follby fb;
  239: 	int curr_s;		/* current state index */
  240: 	int token;
  241: 	int i;
  242: 
  243: 	curr_s = SCANNER_INIT_S;
  244: 	token = 0;
  245: 
  246: 	for (i = 0; lexeme[i]; i++) {
  247: 		while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
  248: 			curr_s = SS_OTHER_N(sst[curr_s]);
  249: 
  250: 		if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
  251: 			if ('\0' == lexeme[i + 1]
  252: 			    && FOLLBY_NON_ACCEPTING 
  253: 			       != SS_FB(sst[curr_s])) {
  254: 				fb = SS_FB(sst[curr_s]);
  255: 				*pfollowedby = fb;
  256: 				token = curr_s;
  257: 				break;
  258: 			}
  259: 			curr_s = SS_MATCH_N(sst[curr_s]);
  260: 		} else
  261: 			break;
  262: 	}
  263: 
  264: 	return token;
  265: }
  266: 
  267: 
  268: /* Integer */
  269: static int
  270: is_integer(
  271: 	char *lexeme
  272: 	)
  273: {
  274: 	int i = 0;
  275: 
  276: 	/* Allow a leading minus sign */
  277: 	if (lexeme[i] == '-')
  278: 		++i;
  279: 
  280: 	/* Check that all the remaining characters are digits */
  281: 	for (; lexeme[i]; ++i) {
  282: 		if (!isdigit(lexeme[i]))
  283: 			return 0;
  284: 	}
  285: 	return 1;
  286: }
  287: 
  288: 
  289: /* Double */
  290: static int
  291: is_double(
  292: 	char *lexeme
  293: 	)
  294: {
  295: 	u_int num_digits = 0;  /* Number of digits read */
  296: 	u_int i;
  297: 
  298: 	i = 0;
  299: 
  300: 	/* Check for an optional '+' or '-' */
  301: 	if ('+' == lexeme[i] || '-' == lexeme[i])
  302: 		i++;
  303: 
  304: 	/* Read the integer part */
  305: 	for (; lexeme[i] && isdigit(lexeme[i]); i++)
  306: 		num_digits++;
  307: 
  308: 	/* Check for the required decimal point */
  309: 	if ('.' == lexeme[i])
  310: 		i++;
  311: 	else
  312: 		return 0;
  313: 
  314: 	/* Check for any digits after the decimal point */
  315: 	for (; lexeme[i] && isdigit(lexeme[i]); i++)
  316: 		num_digits++;
  317: 
  318: 	/*
  319: 	 * The number of digits in both the decimal part and the
  320: 	 * fraction part must not be zero at this point 
  321: 	 */
  322: 	if (!num_digits)
  323: 		return 0;
  324: 
  325: 	/* Check if we are done */
  326: 	if (!lexeme[i])
  327: 		return 1;
  328: 
  329: 	/* There is still more input, read the exponent */
  330: 	if ('e' == tolower(lexeme[i]))
  331: 		i++;
  332: 	else
  333: 		return 0;
  334: 
  335: 	/* Read an optional Sign */
  336: 	if ('+' == lexeme[i] || '-' == lexeme[i])
  337: 		i++;
  338: 
  339: 	/* Now read the exponent part */
  340: 	while (lexeme[i] && isdigit(lexeme[i]))
  341: 		i++;
  342: 
  343: 	/* Check if we are done */
  344: 	if (!lexeme[i])
  345: 		return 1;
  346: 	else
  347: 		return 0;
  348: }
  349: 
  350: 
  351: /* is_special() - Test whether a character is a token */
  352: static inline int
  353: is_special(
  354: 	int ch
  355: 	)
  356: {
  357: 	return (int)strchr(special_chars, ch);
  358: }
  359: 
  360: 
  361: static int
  362: is_EOC(
  363: 	int ch
  364: 	)
  365: {
  366: 	if ((old_config_style && (ch == '\n')) ||
  367: 	    (!old_config_style && (ch == ';')))
  368: 		return 1;
  369: 	return 0;
  370: }
  371: 
  372: 
  373: char *
  374: quote_if_needed(char *str)
  375: {
  376: 	char *ret;
  377: 	size_t len;
  378: 	size_t octets;
  379: 
  380: 	len = strlen(str);
  381: 	octets = len + 2 + 1;
  382: 	ret = emalloc(octets);
  383: 	if ('"' != str[0] 
  384: 	    && (strcspn(str, special_chars) < len 
  385: 		|| strchr(str, ' ') != NULL)) {
  386: 		snprintf(ret, octets, "\"%s\"", str);
  387: 	} else
  388: 		strncpy(ret, str, octets);
  389: 
  390: 	return ret;
  391: }
  392: 
  393: 
  394: static int
  395: create_string_token(
  396: 	char *lexeme
  397: 	)
  398: {
  399: 	char *pch;
  400: 
  401: 	/*
  402: 	 * ignore end of line whitespace
  403: 	 */
  404: 	pch = lexeme;
  405: 	while (*pch && isspace(*pch))
  406: 		pch++;
  407: 
  408: 	if (!*pch) {
  409: 		yylval.Integer = T_EOC;
  410: 		return yylval.Integer;
  411: 	}
  412: 
  413: 	yylval.String = estrdup(lexeme);
  414: 	return T_String;
  415: }
  416: 
  417: 
  418: /*
  419:  * yylex() - function that does the actual scanning.
  420:  * Bison expects this function to be called yylex and for it to take no
  421:  * input and return an int.
  422:  * Conceptually yylex "returns" yylval as well as the actual return
  423:  * value representing the token or type.
  424:  */
  425: int
  426: yylex(
  427: 	void
  428: 	)
  429: {
  430: 	int i, instring = 0;
  431: 	int yylval_was_set = 0;
  432: 	int token;		/* The return value/the recognized token */
  433: 	int ch;
  434: 	static follby followedby = FOLLBY_TOKEN;
  435: 
  436: 	do {
  437: 		/* Ignore whitespace at the beginning */
  438: 		while (EOF != (ch = get_next_char()) &&
  439: 		       isspace(ch) &&
  440: 		       !is_EOC(ch))
  441: 			; /* Null Statement */
  442: 
  443: 		if (EOF == ch) {
  444: 
  445: 			if (!input_from_file || !curr_include_level) 
  446: 				return 0;
  447: 
  448: 			FCLOSE(fp[curr_include_level]);
  449: 			ip_file = fp[--curr_include_level];
  450: 			token = T_EOC;
  451: 			goto normal_return;
  452: 
  453: 		} else if (is_EOC(ch)) {
  454: 
  455: 			/* end FOLLBY_STRINGS_TO_EOC effect */
  456: 			followedby = FOLLBY_TOKEN;
  457: 			token = T_EOC;
  458: 			goto normal_return;
  459: 
  460: 		} else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
  461: 			/* special chars are their own token values */
  462: 			token = ch;
  463: 			/*
  464: 			 * '=' implies a single string following as in:
  465: 			 * setvar Owner = "The Boss" default
  466: 			 * This could alternatively be handled by
  467: 			 * removing '=' from special_chars and adding
  468: 			 * it to the keyword table.
  469: 			 */
  470: 			if ('=' == ch)
  471: 				followedby = FOLLBY_STRING;
  472: 			yytext[0] = (char)ch;
  473: 			yytext[1] = '\0';
  474: 			goto normal_return;
  475: 		} else
  476: 			push_back_char(ch);
  477: 
  478: 		/* save the position of start of the token */
  479: 		ip_file->prev_token_line_no = ip_file->line_no;
  480: 		ip_file->prev_token_col_no = ip_file->col_no;
  481: 
  482: 		/* Read in the lexeme */
  483: 		i = 0;
  484: 		while (EOF != (ch = get_next_char())) {
  485: 
  486: 			yytext[i] = (char)ch;
  487: 
  488: 			/* Break on whitespace or a special character */
  489: 			if (isspace(ch) || is_EOC(ch) 
  490: 			    || '"' == ch
  491: 			    || (FOLLBY_TOKEN == followedby
  492: 				&& is_special(ch)))
  493: 				break;
  494: 
  495: 			/* Read the rest of the line on reading a start
  496: 			   of comment character */
  497: 			if ('#' == ch) {
  498: 				while (EOF != (ch = get_next_char())
  499: 				       && '\n' != ch)
  500: 					; /* Null Statement */
  501: 				break;
  502: 			}
  503: 
  504: 			i++;
  505: 			if (i >= COUNTOF(yytext))
  506: 				goto lex_too_long;
  507: 		}
  508: 		/* Pick up all of the string inside between " marks, to
  509: 		 * end of line.  If we make it to EOL without a
  510: 		 * terminating " assume it for them.
  511: 		 *
  512: 		 * XXX - HMS: I'm not sure we want to assume the closing "
  513: 		 */
  514: 		if ('"' == ch) {
  515: 			instring = 1;
  516: 			while (EOF != (ch = get_next_char()) &&
  517: 			       ch != '"' && ch != '\n') {
  518: 				yytext[i++] = (char)ch;
  519: 				if (i >= COUNTOF(yytext))
  520: 					goto lex_too_long;
  521: 			}
  522: 			/*
  523: 			 * yytext[i] will be pushed back as not part of
  524: 			 * this lexeme, but any closing quote should
  525: 			 * not be pushed back, so we read another char.
  526: 			 */
  527: 			if ('"' == ch)
  528: 				ch = get_next_char();
  529: 		}
  530: 		/* Pushback the last character read that is not a part
  531: 		 * of this lexeme.
  532: 		 * If the last character read was an EOF, pushback a
  533: 		 * newline character. This is to prevent a parse error
  534: 		 * when there is no newline at the end of a file.
  535: 		 */
  536: 		if (EOF == ch)
  537: 			push_back_char('\n');
  538: 		else
  539: 			push_back_char(ch); 
  540: 		yytext[i] = '\0';
  541: 	} while (i == 0);
  542: 
  543: 	/* Now return the desired token */
  544: 	
  545: 	/* First make sure that the parser is *not* expecting a string
  546: 	 * as the next token (based on the previous token that was
  547: 	 * returned) and that we haven't read a string.
  548: 	 */
  549: 	
  550: 	if (followedby == FOLLBY_TOKEN && !instring) {
  551: 		token = is_keyword(yytext, &followedby);
  552: 		if (token)
  553: 			goto normal_return;
  554: 		else if (is_integer(yytext)) {
  555: 			yylval_was_set = 1;
  556: 			errno = 0;
  557: 			if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
  558: 			    && ((errno == EINVAL) || (errno == ERANGE))) {
  559: 				msyslog(LOG_ERR, 
  560: 					"Integer cannot be represented: %s",
  561: 					yytext);
  562: 				exit(1);
  563: 			} else {
  564: 				token = T_Integer;
  565: 				goto normal_return;
  566: 			}
  567: 		}
  568: 		else if (is_double(yytext)) {
  569: 			yylval_was_set = 1;
  570: 			errno = 0;
  571: 			if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
  572: 				msyslog(LOG_ERR,
  573: 					"Double too large to represent: %s",
  574: 					yytext);
  575: 				exit(1);
  576: 			} else {
  577: 				token = T_Double;
  578: 				goto normal_return;
  579: 			}
  580: 		} else {
  581: 			/* Default: Everything is a string */
  582: 			yylval_was_set = 1;
  583: 			token = create_string_token(yytext);
  584: 			goto normal_return;
  585: 		}
  586: 	}
  587: 
  588: 	/*
  589: 	 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
  590: 	 * of a string.  Hence, we need to return T_String.
  591: 	 * 
  592: 	 * _Except_ we might have a -4 or -6 flag on a an association
  593: 	 * configuration line (server, peer, pool, etc.).
  594: 	 *
  595: 	 * This is a terrible hack, but the grammar is ambiguous so we
  596: 	 * don't have a choice.  [SK]
  597: 	 *
  598: 	 * The ambiguity is in the keyword scanner, not ntp_parser.y.
  599: 	 * We do not require server addresses be quoted in ntp.conf,
  600: 	 * complicating the scanner's job.  To avoid trying (and
  601: 	 * failing) to match an IP address or DNS name to a keyword,
  602: 	 * the association keywords use FOLLBY_STRING in the keyword
  603: 	 * table, which tells the scanner to force the next token to be
  604: 	 * a T_String, so it does not try to match a keyword but rather
  605: 	 * expects a string when -4/-6 modifiers to server, peer, etc.
  606: 	 * are encountered.
  607: 	 * restrict -4 and restrict -6 parsing works correctly without
  608: 	 * this hack, as restrict uses FOLLBY_TOKEN.  [DH]
  609: 	 */
  610: 	if ('-' == yytext[0]) {
  611: 		if ('4' == yytext[1]) {
  612: 			token = T_Ipv4_flag;
  613: 			goto normal_return;
  614: 		} else if ('6' == yytext[1]) {
  615: 			token = T_Ipv6_flag;
  616: 			goto normal_return;
  617: 		}
  618: 	}
  619: 
  620: 	instring = 0;
  621: 	if (FOLLBY_STRING == followedby)
  622: 		followedby = FOLLBY_TOKEN;
  623: 
  624: 	yylval_was_set = 1;
  625: 	token = create_string_token(yytext);
  626: 
  627: normal_return:
  628: 	if (T_EOC == token)
  629: 		DPRINTF(4,("\t<end of command>\n"));
  630: 	else
  631: 		DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
  632: 			    token_name(token)));
  633: 
  634: 	if (!yylval_was_set)
  635: 		yylval.Integer = token;
  636: 
  637: 	return token;
  638: 
  639: lex_too_long:
  640: 	yytext[min(sizeof(yytext) - 1, 50)] = 0;
  641: 	msyslog(LOG_ERR, 
  642: 		"configuration item on line %d longer than limit of %lu, began with '%s'",
  643: 		ip_file->line_no, (u_long)(sizeof(yytext) - 1), yytext);
  644: 
  645: 	/*
  646: 	 * If we hit the length limit reading the startup configuration
  647: 	 * file, abort.
  648: 	 */
  649: 	if (input_from_file)
  650: 		exit(sizeof(yytext) - 1);
  651: 
  652: 	/*
  653: 	 * If it's runtime configuration via ntpq :config treat it as
  654: 	 * if the configuration text ended before the too-long lexeme,
  655: 	 * hostname, or string.
  656: 	 */
  657: 	yylval.Integer = 0;
  658: 	return 0;
  659: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>