1: /* Copyright (C) 1989, 2000 Aladdin Enterprises. All rights reserved. */
2:
3: /*$Id: ansi2knr.c,v 1.1.1.1 2012/05/29 12:08:38 misho Exp $*/
4: /* Convert ANSI C function definitions to K&R ("traditional C") syntax */
5:
6: /*
7: ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
8: WARRANTY. No author or distributor accepts responsibility to anyone for the
9: consequences of using it or for whether it serves any particular purpose or
10: works at all, unless he says so in writing. Refer to the GNU General Public
11: License (the "GPL") for full details.
12:
13: Everyone is granted permission to copy, modify and redistribute ansi2knr,
14: but only under the conditions described in the GPL. A copy of this license
15: is supposed to have been given to you along with ansi2knr so you can know
16: your rights and responsibilities. It should be in a file named COPYLEFT,
17: or, if there is no file named COPYLEFT, a file named COPYING. Among other
18: things, the copyright notice and this notice must be preserved on all
19: copies.
20:
21: We explicitly state here what we believe is already implied by the GPL: if
22: the ansi2knr program is distributed as a separate set of sources and a
23: separate executable file which are aggregated on a storage medium together
24: with another program, this in itself does not bring the other program under
25: the GPL, nor does the mere fact that such a program or the procedures for
26: constructing it invoke the ansi2knr executable bring any other part of the
27: program under the GPL.
28: */
29:
30: /*
31: * Usage:
32: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
33: * --filename provides the file name for the #line directive in the output,
34: * overriding input_file (if present).
35: * If no input_file is supplied, input is read from stdin.
36: * If no output_file is supplied, output goes to stdout.
37: * There are no error messages.
38: *
39: * ansi2knr recognizes function definitions by seeing a non-keyword
40: * identifier at the left margin, followed by a left parenthesis, with a
41: * right parenthesis as the last character on the line, and with a left
42: * brace as the first token on the following line (ignoring possible
43: * intervening comments and/or preprocessor directives), except that a line
44: * consisting of only
45: * identifier1(identifier2)
46: * will not be considered a function definition unless identifier2 is
47: * the word "void", and a line consisting of
48: * identifier1(identifier2, <<arbitrary>>)
49: * will not be considered a function definition.
50: * ansi2knr will recognize a multi-line header provided that no intervening
51: * line ends with a left or right brace or a semicolon. These algorithms
52: * ignore whitespace, comments, and preprocessor directives, except that
53: * the function name must be the first thing on the line. The following
54: * constructs will confuse it:
55: * - Any other construct that starts at the left margin and
56: * follows the above syntax (such as a macro or function call).
57: * - Some macros that tinker with the syntax of function headers.
58: */
59:
60: /*
61: * The original and principal author of ansi2knr is L. Peter Deutsch
62: * <ghost@aladdin.com>. Other authors are noted in the change history
63: * that follows (in reverse chronological order):
64:
65: lpd 2000-04-12 backs out Eggert's changes because of bugs:
66: - concatlits didn't declare the type of its bufend argument;
67: - concatlits didn't recognize when it was inside a comment;
68: - scanstring could scan backward past the beginning of the string; when
69: - the check for \ + newline in scanstring was unnecessary.
70:
71: 2000-03-05 Paul Eggert <eggert@twinsun.com>
72:
73: Add support for concatenated string literals.
74: * ansi2knr.c (concatlits): New decl.
75: (main): Invoke concatlits to concatenate string literals.
76: (scanstring): Handle backslash-newline correctly. Work with
77: character constants. Fix bug when scanning backwards through
78: backslash-quote. Check for unterminated strings.
79: (convert1): Parse character constants, too.
80: (appendline, concatlits): New functions.
81: * ansi2knr.1: Document this.
82:
83: lpd 1999-08-17 added code to allow preprocessor directives
84: wherever comments are allowed
85: lpd 1999-04-12 added minor fixes from Pavel Roskin
86: <pavel_roskin@geocities.com> for clean compilation with
87: gcc -W -Wall
88: lpd 1999-03-22 added hack to recognize lines consisting of
89: identifier1(identifier2, xxx) as *not* being procedures
90: lpd 1999-02-03 made indentation of preprocessor commands consistent
91: lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
92: endless loop; quoted strings within an argument list
93: confused the parser
94: lpd 1999-01-24 added a check for write errors on the output,
95: suggested by Jim Meyering <meyering@ascend.com>
96: lpd 1998-11-09 added further hack to recognize identifier(void)
97: as being a procedure
98: lpd 1998-10-23 added hack to recognize lines consisting of
99: identifier1(identifier2) as *not* being procedures
100: lpd 1997-12-08 made input_file optional; only closes input and/or
101: output file if not stdin or stdout respectively; prints
102: usage message on stderr rather than stdout; adds
103: --filename switch (changes suggested by
104: <ceder@lysator.liu.se>)
105: lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
106: compilers that don't understand void, as suggested by
107: Tom Lane
108: lpd 1996-01-15 changed to require that the first non-comment token
109: on the line following a function header be a left brace,
110: to reduce sensitivity to macros, as suggested by Tom Lane
111: <tgl@sss.pgh.pa.us>
112: lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
113: undefined preprocessor symbols as 0; changed all #ifdefs
114: for configuration symbols to #ifs
115: lpd 1995-04-05 changed copyright notice to make it clear that
116: including ansi2knr in a program does not bring the entire
117: program under the GPL
118: lpd 1994-12-18 added conditionals for systems where ctype macros
119: don't handle 8-bit characters properly, suggested by
120: Francois Pinard <pinard@iro.umontreal.ca>;
121: removed --varargs switch (this is now the default)
122: lpd 1994-10-10 removed CONFIG_BROKETS conditional
123: lpd 1994-07-16 added some conditionals to help GNU `configure',
124: suggested by Francois Pinard <pinard@iro.umontreal.ca>;
125: properly erase prototype args in function parameters,
126: contributed by Jim Avera <jima@netcom.com>;
127: correct error in writeblanks (it shouldn't erase EOLs)
128: lpd 1989-xx-xx original version
129: */
130:
131: /* Most of the conditionals here are to make ansi2knr work with */
132: /* or without the GNU configure machinery. */
133:
134: #if HAVE_CONFIG_H
135: # include <config.h>
136: #endif
137:
138: #include <stdio.h>
139: #include <ctype.h>
140:
141: #if HAVE_CONFIG_H
142:
143: /*
144: For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
145: This will define HAVE_CONFIG_H and so, activate the following lines.
146: */
147:
148: # if STDC_HEADERS || HAVE_STRING_H
149: # include <string.h>
150: # else
151: # include <strings.h>
152: # endif
153:
154: #else /* not HAVE_CONFIG_H */
155:
156: /* Otherwise do it the hard way */
157:
158: # ifdef BSD
159: # include <strings.h>
160: # else
161: # ifdef VMS
162: extern int strlen(), strncmp();
163: # else
164: # include <string.h>
165: # endif
166: # endif
167:
168: #endif /* not HAVE_CONFIG_H */
169:
170: #if STDC_HEADERS
171: # include <stdlib.h>
172: #else
173: /*
174: malloc and free should be declared in stdlib.h,
175: but if you've got a K&R compiler, they probably aren't.
176: */
177: # ifdef MSDOS
178: # include <malloc.h>
179: # else
180: # ifdef VMS
181: extern char *malloc();
182: extern void free();
183: # else
184: extern char *malloc();
185: extern int free();
186: # endif
187: # endif
188:
189: #endif
190:
191: /* Define NULL (for *very* old compilers). */
192: #ifndef NULL
193: # define NULL (0)
194: #endif
195:
196: /*
197: * The ctype macros don't always handle 8-bit characters correctly.
198: * Compensate for this here.
199: */
200: #ifdef isascii
201: # undef HAVE_ISASCII /* just in case */
202: # define HAVE_ISASCII 1
203: #else
204: #endif
205: #if STDC_HEADERS || !HAVE_ISASCII
206: # define is_ascii(c) 1
207: #else
208: # define is_ascii(c) isascii(c)
209: #endif
210:
211: #define is_space(c) (is_ascii(c) && isspace(c))
212: #define is_alpha(c) (is_ascii(c) && isalpha(c))
213: #define is_alnum(c) (is_ascii(c) && isalnum(c))
214:
215: /* Scanning macros */
216: #define isidchar(ch) (is_alnum(ch) || (ch) == '_')
217: #define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
218:
219: /* Forward references */
220: char *ppdirforward();
221: char *ppdirbackward();
222: char *skipspace();
223: char *scanstring();
224: int writeblanks();
225: int test1();
226: int convert1();
227:
228: /* The main program */
229: int
230: main(argc, argv)
231: int argc;
232: char *argv[];
233: { FILE *in = stdin;
234: FILE *out = stdout;
235: char *filename = 0;
236: char *program_name = argv[0];
237: char *output_name = 0;
238: #define bufsize 5000 /* arbitrary size */
239: char *buf;
240: char *line;
241: char *more;
242: char *usage =
243: "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
244: /*
245: * In previous versions, ansi2knr recognized a --varargs switch.
246: * If this switch was supplied, ansi2knr would attempt to convert
247: * a ... argument to va_alist and va_dcl; if this switch was not
248: * supplied, ansi2knr would simply drop any such arguments.
249: * Now, ansi2knr always does this conversion, and we only
250: * check for this switch for backward compatibility.
251: */
252: int convert_varargs = 1;
253: int output_error;
254:
255: while ( argc > 1 && argv[1][0] == '-' ) {
256: if ( !strcmp(argv[1], "--varargs") ) {
257: convert_varargs = 1;
258: argc--;
259: argv++;
260: continue;
261: }
262: if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
263: filename = argv[2];
264: argc -= 2;
265: argv += 2;
266: continue;
267: }
268: fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
269: argv[1]);
270: fprintf(stderr, usage);
271: exit(1);
272: }
273: switch ( argc )
274: {
275: default:
276: fprintf(stderr, usage);
277: exit(0);
278: case 3:
279: output_name = argv[2];
280: out = fopen(output_name, "w");
281: if ( out == NULL ) {
282: fprintf(stderr, "%s: Cannot open output file %s\n",
283: program_name, output_name);
284: exit(1);
285: }
286: /* falls through */
287: case 2:
288: in = fopen(argv[1], "r");
289: if ( in == NULL ) {
290: fprintf(stderr, "%s: Cannot open input file %s\n",
291: program_name, argv[1]);
292: exit(1);
293: }
294: if ( filename == 0 )
295: filename = argv[1];
296: /* falls through */
297: case 1:
298: break;
299: }
300: if ( filename )
301: fprintf(out, "#line 1 \"%s\"\n", filename);
302: buf = malloc(bufsize);
303: if ( buf == NULL )
304: {
305: fprintf(stderr, "Unable to allocate read buffer!\n");
306: exit(1);
307: }
308: line = buf;
309: while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
310: {
311: test: line += strlen(line);
312: switch ( test1(buf) )
313: {
314: case 2: /* a function header */
315: convert1(buf, out, 1, convert_varargs);
316: break;
317: case 1: /* a function */
318: /* Check for a { at the start of the next line. */
319: more = ++line;
320: f: if ( line >= buf + (bufsize - 1) ) /* overflow check */
321: goto wl;
322: if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
323: goto wl;
324: switch ( *skipspace(ppdirforward(more), 1) )
325: {
326: case '{':
327: /* Definitely a function header. */
328: convert1(buf, out, 0, convert_varargs);
329: fputs(more, out);
330: break;
331: case 0:
332: /* The next line was blank or a comment: */
333: /* keep scanning for a non-comment. */
334: line += strlen(line);
335: goto f;
336: default:
337: /* buf isn't a function header, but */
338: /* more might be. */
339: fputs(buf, out);
340: strcpy(buf, more);
341: line = buf;
342: goto test;
343: }
344: break;
345: case -1: /* maybe the start of a function */
346: if ( line != buf + (bufsize - 1) ) /* overflow check */
347: continue;
348: /* falls through */
349: default: /* not a function */
350: wl: fputs(buf, out);
351: break;
352: }
353: line = buf;
354: }
355: if ( line != buf )
356: fputs(buf, out);
357: free(buf);
358: if ( output_name ) {
359: output_error = ferror(out);
360: output_error |= fclose(out);
361: } else { /* out == stdout */
362: fflush(out);
363: output_error = ferror(out);
364: }
365: if ( output_error ) {
366: fprintf(stderr, "%s: error writing to %s\n", program_name,
367: (output_name ? output_name : "stdout"));
368: exit(1);
369: }
370: if ( in != stdin )
371: fclose(in);
372: return 0;
373: }
374:
375: /*
376: * Skip forward or backward over one or more preprocessor directives.
377: */
378: char *
379: ppdirforward(p)
380: char *p;
381: {
382: for (; *p == '#'; ++p) {
383: for (; *p != '\r' && *p != '\n'; ++p)
384: if (*p == 0)
385: return p;
386: if (*p == '\r' && p[1] == '\n')
387: ++p;
388: }
389: return p;
390: }
391: char *
392: ppdirbackward(p, limit)
393: char *p;
394: char *limit;
395: {
396: char *np = p;
397:
398: for (;; p = --np) {
399: if (*np == '\n' && np[-1] == '\r')
400: --np;
401: for (; np > limit && np[-1] != '\r' && np[-1] != '\n'; --np)
402: if (np[-1] == 0)
403: return np;
404: if (*np != '#')
405: return p;
406: }
407: }
408:
409: /*
410: * Skip over whitespace, comments, and preprocessor directives,
411: * in either direction.
412: */
413: char *
414: skipspace(p, dir)
415: char *p;
416: int dir; /* 1 for forward, -1 for backward */
417: {
418: for ( ; ; ) {
419: while ( is_space(*p) )
420: p += dir;
421: if ( !(*p == '/' && p[dir] == '*') )
422: break;
423: p += dir; p += dir;
424: while ( !(*p == '*' && p[dir] == '/') ) {
425: if ( *p == 0 )
426: return p; /* multi-line comment?? */
427: p += dir;
428: }
429: p += dir; p += dir;
430: }
431: return p;
432: }
433:
434: /* Scan over a quoted string, in either direction. */
435: char *
436: scanstring(p, dir)
437: char *p;
438: int dir;
439: {
440: for (p += dir; ; p += dir)
441: if (*p == '"' && p[-dir] != '\\')
442: return p + dir;
443: }
444:
445: /*
446: * Write blanks over part of a string.
447: * Don't overwrite end-of-line characters.
448: */
449: int
450: writeblanks(start, end)
451: char *start;
452: char *end;
453: { char *p;
454: for ( p = start; p < end; p++ )
455: if ( *p != '\r' && *p != '\n' )
456: *p = ' ';
457: return 0;
458: }
459:
460: /*
461: * Test whether the string in buf is a function definition.
462: * The string may contain and/or end with a newline.
463: * Return as follows:
464: * 0 - definitely not a function definition;
465: * 1 - definitely a function definition;
466: * 2 - definitely a function prototype (NOT USED);
467: * -1 - may be the beginning of a function definition,
468: * append another line and look again.
469: * The reason we don't attempt to convert function prototypes is that
470: * Ghostscript's declaration-generating macros look too much like
471: * prototypes, and confuse the algorithms.
472: */
473: int
474: test1(buf)
475: char *buf;
476: { char *p = buf;
477: char *bend;
478: char *endfn;
479: int contin;
480:
481: if ( !isidfirstchar(*p) )
482: return 0; /* no name at left margin */
483: bend = skipspace(ppdirbackward(buf + strlen(buf) - 1, buf), -1);
484: switch ( *bend )
485: {
486: case ';': contin = 0 /*2*/; break;
487: case ')': contin = 1; break;
488: case '{': return 0; /* not a function */
489: case '}': return 0; /* not a function */
490: default: contin = -1;
491: }
492: while ( isidchar(*p) )
493: p++;
494: endfn = p;
495: p = skipspace(p, 1);
496: if ( *p++ != '(' )
497: return 0; /* not a function */
498: p = skipspace(p, 1);
499: if ( *p == ')' )
500: return 0; /* no parameters */
501: /* Check that the apparent function name isn't a keyword. */
502: /* We only need to check for keywords that could be followed */
503: /* by a left parenthesis (which, unfortunately, is most of them). */
504: { static char *words[] =
505: { "asm", "auto", "case", "char", "const", "double",
506: "extern", "float", "for", "if", "int", "long",
507: "register", "return", "short", "signed", "sizeof",
508: "static", "switch", "typedef", "unsigned",
509: "void", "volatile", "while", 0
510: };
511: char **key = words;
512: char *kp;
513: unsigned len = endfn - buf;
514:
515: while ( (kp = *key) != 0 )
516: { if ( strlen(kp) == len && !strncmp(kp, buf, len) )
517: return 0; /* name is a keyword */
518: key++;
519: }
520: }
521: {
522: char *id = p;
523: int len;
524: /*
525: * Check for identifier1(identifier2) and not
526: * identifier1(void), or identifier1(identifier2, xxxx).
527: */
528:
529: while ( isidchar(*p) )
530: p++;
531: len = p - id;
532: p = skipspace(p, 1);
533: if (*p == ',' ||
534: (*p == ')' && (len != 4 || strncmp(id, "void", 4)))
535: )
536: return 0; /* not a function */
537: }
538: /*
539: * If the last significant character was a ), we need to count
540: * parentheses, because it might be part of a formal parameter
541: * that is a procedure.
542: */
543: if (contin > 0) {
544: int level = 0;
545:
546: for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
547: level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
548: if (level > 0)
549: contin = -1;
550: }
551: return contin;
552: }
553:
554: /* Convert a recognized function definition or header to K&R syntax. */
555: int
556: convert1(buf, out, header, convert_varargs)
557: char *buf;
558: FILE *out;
559: int header; /* Boolean */
560: int convert_varargs; /* Boolean */
561: { char *endfn;
562: char *p;
563: /*
564: * The breaks table contains pointers to the beginning and end
565: * of each argument.
566: */
567: char **breaks;
568: unsigned num_breaks = 2; /* for testing */
569: char **btop;
570: char **bp;
571: char **ap;
572: char *vararg = 0;
573:
574: /* Pre-ANSI implementations don't agree on whether strchr */
575: /* is called strchr or index, so we open-code it here. */
576: for ( endfn = buf; *(endfn++) != '('; )
577: ;
578: top: p = endfn;
579: breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
580: if ( breaks == NULL )
581: { /* Couldn't allocate break table, give up */
582: fprintf(stderr, "Unable to allocate break table!\n");
583: fputs(buf, out);
584: return -1;
585: }
586: btop = breaks + num_breaks * 2 - 2;
587: bp = breaks;
588: /* Parse the argument list */
589: do
590: { int level = 0;
591: char *lp = NULL;
592: char *rp = NULL;
593: char *end = NULL;
594:
595: if ( bp >= btop )
596: { /* Filled up break table. */
597: /* Allocate a bigger one and start over. */
598: free((char *)breaks);
599: num_breaks <<= 1;
600: goto top;
601: }
602: *bp++ = p;
603: /* Find the end of the argument */
604: for ( ; end == NULL; p++ )
605: { switch(*p)
606: {
607: case ',':
608: if ( !level ) end = p;
609: break;
610: case '(':
611: if ( !level ) lp = p;
612: level++;
613: break;
614: case ')':
615: if ( --level < 0 ) end = p;
616: else rp = p;
617: break;
618: case '/':
619: if (p[1] == '*')
620: p = skipspace(p, 1) - 1;
621: break;
622: case '"':
623: p = scanstring(p, 1) - 1;
624: break;
625: default:
626: ;
627: }
628: }
629: /* Erase any embedded prototype parameters. */
630: if ( lp && rp )
631: writeblanks(lp + 1, rp);
632: p--; /* back up over terminator */
633: /* Find the name being declared. */
634: /* This is complicated because of procedure and */
635: /* array modifiers. */
636: for ( ; ; )
637: { p = skipspace(p - 1, -1);
638: switch ( *p )
639: {
640: case ']': /* skip array dimension(s) */
641: case ')': /* skip procedure args OR name */
642: { int level = 1;
643: while ( level )
644: switch ( *--p )
645: {
646: case ']': case ')':
647: level++;
648: break;
649: case '[': case '(':
650: level--;
651: break;
652: case '/':
653: if (p > buf && p[-1] == '*')
654: p = skipspace(p, -1) + 1;
655: break;
656: case '"':
657: p = scanstring(p, -1) + 1;
658: break;
659: default: ;
660: }
661: }
662: if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
663: { /* We found the name being declared */
664: while ( !isidfirstchar(*p) )
665: p = skipspace(p, 1) + 1;
666: goto found;
667: }
668: break;
669: default:
670: goto found;
671: }
672: }
673: found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
674: { if ( convert_varargs )
675: { *bp++ = "va_alist";
676: vararg = p-2;
677: }
678: else
679: { p++;
680: if ( bp == breaks + 1 ) /* sole argument */
681: writeblanks(breaks[0], p);
682: else
683: writeblanks(bp[-1] - 1, p);
684: bp--;
685: }
686: }
687: else
688: { while ( isidchar(*p) ) p--;
689: *bp++ = p+1;
690: }
691: p = end;
692: }
693: while ( *p++ == ',' );
694: *bp = p;
695: /* Make a special check for 'void' arglist */
696: if ( bp == breaks+2 )
697: { p = skipspace(breaks[0], 1);
698: if ( !strncmp(p, "void", 4) )
699: { p = skipspace(p+4, 1);
700: if ( p == breaks[2] - 1 )
701: { bp = breaks; /* yup, pretend arglist is empty */
702: writeblanks(breaks[0], p + 1);
703: }
704: }
705: }
706: /* Put out the function name and left parenthesis. */
707: p = buf;
708: while ( p != endfn ) putc(*p, out), p++;
709: /* Put out the declaration. */
710: if ( header )
711: { fputs(");", out);
712: for ( p = breaks[0]; *p; p++ )
713: if ( *p == '\r' || *p == '\n' )
714: putc(*p, out);
715: }
716: else
717: { for ( ap = breaks+1; ap < bp; ap += 2 )
718: { p = *ap;
719: while ( isidchar(*p) )
720: putc(*p, out), p++;
721: if ( ap < bp - 1 )
722: fputs(", ", out);
723: }
724: fputs(") ", out);
725: /* Put out the argument declarations */
726: for ( ap = breaks+2; ap <= bp; ap += 2 )
727: (*ap)[-1] = ';';
728: if ( vararg != 0 )
729: { *vararg = 0;
730: fputs(breaks[0], out); /* any prior args */
731: fputs("va_dcl", out); /* the final arg */
732: fputs(bp[0], out);
733: }
734: else
735: fputs(breaks[0], out);
736: }
737: free((char *)breaks);
738: return 0;
739: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>