1: #!/usr/bin/python -u
2: #
3: # This is the API builder, it parses the C sources and build the
4: # API formal description in XML.
5: #
6: # See Copyright for the status of this software.
7: #
8: # daniel@veillard.com
9: #
10: import os, sys
11: import string
12: import glob
13:
14: debug=0
15: #debugsym='ignorableWhitespaceSAXFunc'
16: debugsym=None
17:
18: #
19: # C parser analysis code
20: #
21: ignored_files = {
22: "trio": "too many non standard macros",
23: "trio.c": "too many non standard macros",
24: "trionan.c": "too many non standard macros",
25: "triostr.c": "too many non standard macros",
26: "acconfig.h": "generated portability layer",
27: "config.h": "generated portability layer",
28: "libxml.h": "internal only",
29: "testOOM.c": "out of memory tester",
30: "testOOMlib.h": "out of memory tester",
31: "testOOMlib.c": "out of memory tester",
32: "rngparser.c": "not yet integrated",
33: "rngparser.h": "not yet integrated",
34: "elfgcchack.h": "not a normal header",
35: "testHTML.c": "test tool",
36: "testReader.c": "test tool",
37: "testSchemas.c": "test tool",
38: "testXPath.c": "test tool",
39: "testAutomata.c": "test tool",
40: "testModule.c": "test tool",
41: "testRegexp.c": "test tool",
42: "testThreads.c": "test tool",
43: "testC14N.c": "test tool",
44: "testRelax.c": "test tool",
45: "testThreadsWin32.c": "test tool",
46: "testSAX.c": "test tool",
47: "testURI.c": "test tool",
48: "testapi.c": "generated regression tests",
49: "runtest.c": "regression tests program",
50: "runsuite.c": "regression tests program",
51: "tst.c": "not part of the library",
52: "test.c": "not part of the library",
53: "testdso.c": "test for dynamid shared libraries",
54: "testrecurse.c": "test for entities recursions",
55: "xzlib.h": "Internal API only 2.8.0",
56: "buf.h": "Internal API only 2.9.0",
57: "enc.h": "Internal API only 2.9.0",
58: "/save.h": "Internal API only 2.9.0",
59: "timsort.h": "Internal header only for xpath.c 2.9.0",
60: }
61:
62: ignored_words = {
63: "WINAPI": (0, "Windows keyword"),
64: "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
65: "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
66: "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
67: "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
68: "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
69: "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
70: "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
71: "XMLCALL": (0, "Special macro for win32 calls"),
72: "XSLTCALL": (0, "Special macro for win32 calls"),
73: "XMLCDECL": (0, "Special macro for win32 calls"),
74: "EXSLTCALL": (0, "Special macro for win32 calls"),
75: "__declspec": (3, "Windows keyword"),
76: "__stdcall": (0, "Windows keyword"),
77: "ATTRIBUTE_UNUSED": (0, "macro keyword"),
78: "LIBEXSLT_PUBLIC": (0, "macro keyword"),
79: "X_IN_Y": (5, "macro function builder"),
80: "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
81: "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
82: "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
83: "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
84: }
85:
86: def escape(raw):
87: raw = raw.replace('&', '&')
88: raw = raw.replace('<', '<')
89: raw = raw.replace('>', '>')
90: raw = raw.replace("'", ''')
91: raw = raw.replace('"', '"')
92: return raw
93:
94: def uniq(items):
95: d = {}
96: for item in items:
97: d[item]=1
98: return list(d.keys())
99:
100: class identifier:
101: def __init__(self, name, header=None, module=None, type=None, lineno = 0,
102: info=None, extra=None, conditionals = None):
103: self.name = name
104: self.header = header
105: self.module = module
106: self.type = type
107: self.info = info
108: self.extra = extra
109: self.lineno = lineno
110: self.static = 0
111: if conditionals == None or len(conditionals) == 0:
112: self.conditionals = None
113: else:
114: self.conditionals = conditionals[:]
115: if self.name == debugsym:
116: print("=> define %s : %s" % (debugsym, (module, type, info,
117: extra, conditionals)))
118:
119: def __repr__(self):
120: r = "%s %s:" % (self.type, self.name)
121: if self.static:
122: r = r + " static"
123: if self.module != None:
124: r = r + " from %s" % (self.module)
125: if self.info != None:
126: r = r + " " + repr(self.info)
127: if self.extra != None:
128: r = r + " " + repr(self.extra)
129: if self.conditionals != None:
130: r = r + " " + repr(self.conditionals)
131: return r
132:
133:
134: def set_header(self, header):
135: self.header = header
136: def set_module(self, module):
137: self.module = module
138: def set_type(self, type):
139: self.type = type
140: def set_info(self, info):
141: self.info = info
142: def set_extra(self, extra):
143: self.extra = extra
144: def set_lineno(self, lineno):
145: self.lineno = lineno
146: def set_static(self, static):
147: self.static = static
148: def set_conditionals(self, conditionals):
149: if conditionals == None or len(conditionals) == 0:
150: self.conditionals = None
151: else:
152: self.conditionals = conditionals[:]
153:
154: def get_name(self):
155: return self.name
156: def get_header(self):
157: return self.module
158: def get_module(self):
159: return self.module
160: def get_type(self):
161: return self.type
162: def get_info(self):
163: return self.info
164: def get_lineno(self):
165: return self.lineno
166: def get_extra(self):
167: return self.extra
168: def get_static(self):
169: return self.static
170: def get_conditionals(self):
171: return self.conditionals
172:
173: def update(self, header, module, type = None, info = None, extra=None,
174: conditionals=None):
175: if self.name == debugsym:
176: print("=> update %s : %s" % (debugsym, (module, type, info,
177: extra, conditionals)))
178: if header != None and self.header == None:
179: self.set_header(module)
180: if module != None and (self.module == None or self.header == self.module):
181: self.set_module(module)
182: if type != None and self.type == None:
183: self.set_type(type)
184: if info != None:
185: self.set_info(info)
186: if extra != None:
187: self.set_extra(extra)
188: if conditionals != None:
189: self.set_conditionals(conditionals)
190:
191: class index:
192: def __init__(self, name = "noname"):
193: self.name = name
194: self.identifiers = {}
195: self.functions = {}
196: self.variables = {}
197: self.includes = {}
198: self.structs = {}
199: self.enums = {}
200: self.typedefs = {}
201: self.macros = {}
202: self.references = {}
203: self.info = {}
204:
205: def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
206: if name[0:2] == '__':
207: return None
208: d = None
209: try:
210: d = self.identifiers[name]
211: d.update(header, module, type, lineno, info, extra, conditionals)
212: except:
213: d = identifier(name, header, module, type, lineno, info, extra, conditionals)
214: self.identifiers[name] = d
215:
216: if d != None and static == 1:
217: d.set_static(1)
218:
219: if d != None and name != None and type != None:
220: self.references[name] = d
221:
222: if name == debugsym:
223: print("New ref: %s" % (d))
224:
225: return d
226:
227: def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
228: if name[0:2] == '__':
229: return None
230: d = None
231: try:
232: d = self.identifiers[name]
233: d.update(header, module, type, lineno, info, extra, conditionals)
234: except:
235: d = identifier(name, header, module, type, lineno, info, extra, conditionals)
236: self.identifiers[name] = d
237:
238: if d != None and static == 1:
239: d.set_static(1)
240:
241: if d != None and name != None and type != None:
242: if type == "function":
243: self.functions[name] = d
244: elif type == "functype":
245: self.functions[name] = d
246: elif type == "variable":
247: self.variables[name] = d
248: elif type == "include":
249: self.includes[name] = d
250: elif type == "struct":
251: self.structs[name] = d
252: elif type == "enum":
253: self.enums[name] = d
254: elif type == "typedef":
255: self.typedefs[name] = d
256: elif type == "macro":
257: self.macros[name] = d
258: else:
259: print("Unable to register type ", type)
260:
261: if name == debugsym:
262: print("New symbol: %s" % (d))
263:
264: return d
265:
266: def merge(self, idx):
267: for id in list(idx.functions.keys()):
268: #
269: # macro might be used to override functions or variables
270: # definitions
271: #
272: if id in self.macros:
273: del self.macros[id]
274: if id in self.functions:
275: print("function %s from %s redeclared in %s" % (
276: id, self.functions[id].header, idx.functions[id].header))
277: else:
278: self.functions[id] = idx.functions[id]
279: self.identifiers[id] = idx.functions[id]
280: for id in list(idx.variables.keys()):
281: #
282: # macro might be used to override functions or variables
283: # definitions
284: #
285: if id in self.macros:
286: del self.macros[id]
287: if id in self.variables:
288: print("variable %s from %s redeclared in %s" % (
289: id, self.variables[id].header, idx.variables[id].header))
290: else:
291: self.variables[id] = idx.variables[id]
292: self.identifiers[id] = idx.variables[id]
293: for id in list(idx.structs.keys()):
294: if id in self.structs:
295: print("struct %s from %s redeclared in %s" % (
296: id, self.structs[id].header, idx.structs[id].header))
297: else:
298: self.structs[id] = idx.structs[id]
299: self.identifiers[id] = idx.structs[id]
300: for id in list(idx.typedefs.keys()):
301: if id in self.typedefs:
302: print("typedef %s from %s redeclared in %s" % (
303: id, self.typedefs[id].header, idx.typedefs[id].header))
304: else:
305: self.typedefs[id] = idx.typedefs[id]
306: self.identifiers[id] = idx.typedefs[id]
307: for id in list(idx.macros.keys()):
308: #
309: # macro might be used to override functions or variables
310: # definitions
311: #
312: if id in self.variables:
313: continue
314: if id in self.functions:
315: continue
316: if id in self.enums:
317: continue
318: if id in self.macros:
319: print("macro %s from %s redeclared in %s" % (
320: id, self.macros[id].header, idx.macros[id].header))
321: else:
322: self.macros[id] = idx.macros[id]
323: self.identifiers[id] = idx.macros[id]
324: for id in list(idx.enums.keys()):
325: if id in self.enums:
326: print("enum %s from %s redeclared in %s" % (
327: id, self.enums[id].header, idx.enums[id].header))
328: else:
329: self.enums[id] = idx.enums[id]
330: self.identifiers[id] = idx.enums[id]
331:
332: def merge_public(self, idx):
333: for id in list(idx.functions.keys()):
334: if id in self.functions:
335: # check that function condition agrees with header
336: if idx.functions[id].conditionals != \
337: self.functions[id].conditionals:
338: print("Header condition differs from Function for %s:" \
339: % id)
340: print(" H: %s" % self.functions[id].conditionals)
341: print(" C: %s" % idx.functions[id].conditionals)
342: up = idx.functions[id]
343: self.functions[id].update(None, up.module, up.type, up.info, up.extra)
344: # else:
345: # print "Function %s from %s is not declared in headers" % (
346: # id, idx.functions[id].module)
347: # TODO: do the same for variables.
348:
349: def analyze_dict(self, type, dict):
350: count = 0
351: public = 0
352: for name in list(dict.keys()):
353: id = dict[name]
354: count = count + 1
355: if id.static == 0:
356: public = public + 1
357: if count != public:
358: print(" %d %s , %d public" % (count, type, public))
359: elif count != 0:
360: print(" %d public %s" % (count, type))
361:
362:
363: def analyze(self):
364: self.analyze_dict("functions", self.functions)
365: self.analyze_dict("variables", self.variables)
366: self.analyze_dict("structs", self.structs)
367: self.analyze_dict("typedefs", self.typedefs)
368: self.analyze_dict("macros", self.macros)
369:
370: class CLexer:
371: """A lexer for the C language, tokenize the input by reading and
372: analyzing it line by line"""
373: def __init__(self, input):
374: self.input = input
375: self.tokens = []
376: self.line = ""
377: self.lineno = 0
378:
379: def getline(self):
380: line = ''
381: while line == '':
382: line = self.input.readline()
383: if not line:
384: return None
385: self.lineno = self.lineno + 1
386: line = line.lstrip()
387: line = line.rstrip()
388: if line == '':
389: continue
390: while line[-1] == '\\':
391: line = line[:-1]
392: n = self.input.readline()
393: self.lineno = self.lineno + 1
394: n = n.lstrip()
395: n = n.rstrip()
396: if not n:
397: break
398: else:
399: line = line + n
400: return line
401:
402: def getlineno(self):
403: return self.lineno
404:
405: def push(self, token):
406: self.tokens.insert(0, token);
407:
408: def debug(self):
409: print("Last token: ", self.last)
410: print("Token queue: ", self.tokens)
411: print("Line %d end: " % (self.lineno), self.line)
412:
413: def token(self):
414: while self.tokens == []:
415: if self.line == "":
416: line = self.getline()
417: else:
418: line = self.line
419: self.line = ""
420: if line == None:
421: return None
422:
423: if line[0] == '#':
424: self.tokens = list(map((lambda x: ('preproc', x)),
425: line.split()))
426: break;
427: l = len(line)
428: if line[0] == '"' or line[0] == "'":
429: end = line[0]
430: line = line[1:]
431: found = 0
432: tok = ""
433: while found == 0:
434: i = 0
435: l = len(line)
436: while i < l:
437: if line[i] == end:
438: self.line = line[i+1:]
439: line = line[:i]
440: l = i
441: found = 1
442: break
443: if line[i] == '\\':
444: i = i + 1
445: i = i + 1
446: tok = tok + line
447: if found == 0:
448: line = self.getline()
449: if line == None:
450: return None
451: self.last = ('string', tok)
452: return self.last
453:
454: if l >= 2 and line[0] == '/' and line[1] == '*':
455: line = line[2:]
456: found = 0
457: tok = ""
458: while found == 0:
459: i = 0
460: l = len(line)
461: while i < l:
462: if line[i] == '*' and i+1 < l and line[i+1] == '/':
463: self.line = line[i+2:]
464: line = line[:i-1]
465: l = i
466: found = 1
467: break
468: i = i + 1
469: if tok != "":
470: tok = tok + "\n"
471: tok = tok + line
472: if found == 0:
473: line = self.getline()
474: if line == None:
475: return None
476: self.last = ('comment', tok)
477: return self.last
478: if l >= 2 and line[0] == '/' and line[1] == '/':
479: line = line[2:]
480: self.last = ('comment', line)
481: return self.last
482: i = 0
483: while i < l:
484: if line[i] == '/' and i+1 < l and line[i+1] == '/':
485: self.line = line[i:]
486: line = line[:i]
487: break
488: if line[i] == '/' and i+1 < l and line[i+1] == '*':
489: self.line = line[i:]
490: line = line[:i]
491: break
492: if line[i] == '"' or line[i] == "'":
493: self.line = line[i:]
494: line = line[:i]
495: break
496: i = i + 1
497: l = len(line)
498: i = 0
499: while i < l:
500: if line[i] == ' ' or line[i] == '\t':
501: i = i + 1
502: continue
503: o = ord(line[i])
504: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
505: (o >= 48 and o <= 57):
506: s = i
507: while i < l:
508: o = ord(line[i])
509: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
510: (o >= 48 and o <= 57) or \
511: (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
512: i = i + 1
513: else:
514: break
515: self.tokens.append(('name', line[s:i]))
516: continue
517: if "(){}:;,[]".find(line[i]) != -1:
518: # if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
519: # line[i] == '}' or line[i] == ':' or line[i] == ';' or \
520: # line[i] == ',' or line[i] == '[' or line[i] == ']':
521: self.tokens.append(('sep', line[i]))
522: i = i + 1
523: continue
524: if "+-*><=/%&!|.".find(line[i]) != -1:
525: # if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
526: # line[i] == '>' or line[i] == '<' or line[i] == '=' or \
527: # line[i] == '/' or line[i] == '%' or line[i] == '&' or \
528: # line[i] == '!' or line[i] == '|' or line[i] == '.':
529: if line[i] == '.' and i + 2 < l and \
530: line[i+1] == '.' and line[i+2] == '.':
531: self.tokens.append(('name', '...'))
532: i = i + 3
533: continue
534:
535: j = i + 1
536: if j < l and (
537: "+-*><=/%&!|".find(line[j]) != -1):
538: # line[j] == '+' or line[j] == '-' or line[j] == '*' or \
539: # line[j] == '>' or line[j] == '<' or line[j] == '=' or \
540: # line[j] == '/' or line[j] == '%' or line[j] == '&' or \
541: # line[j] == '!' or line[j] == '|'):
542: self.tokens.append(('op', line[i:j+1]))
543: i = j + 1
544: else:
545: self.tokens.append(('op', line[i]))
546: i = i + 1
547: continue
548: s = i
549: while i < l:
550: o = ord(line[i])
551: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
552: (o >= 48 and o <= 57) or (
553: " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
554: # line[i] != ' ' and line[i] != '\t' and
555: # line[i] != '(' and line[i] != ')' and
556: # line[i] != '{' and line[i] != '}' and
557: # line[i] != ':' and line[i] != ';' and
558: # line[i] != ',' and line[i] != '+' and
559: # line[i] != '-' and line[i] != '*' and
560: # line[i] != '/' and line[i] != '%' and
561: # line[i] != '&' and line[i] != '!' and
562: # line[i] != '|' and line[i] != '[' and
563: # line[i] != ']' and line[i] != '=' and
564: # line[i] != '*' and line[i] != '>' and
565: # line[i] != '<'):
566: i = i + 1
567: else:
568: break
569: self.tokens.append(('name', line[s:i]))
570:
571: tok = self.tokens[0]
572: self.tokens = self.tokens[1:]
573: self.last = tok
574: return tok
575:
576: class CParser:
577: """The C module parser"""
578: def __init__(self, filename, idx = None):
579: self.filename = filename
580: if len(filename) > 2 and filename[-2:] == '.h':
581: self.is_header = 1
582: else:
583: self.is_header = 0
584: self.input = open(filename)
585: self.lexer = CLexer(self.input)
586: if idx == None:
587: self.index = index()
588: else:
589: self.index = idx
590: self.top_comment = ""
591: self.last_comment = ""
592: self.comment = None
593: self.collect_ref = 0
594: self.no_error = 0
595: self.conditionals = []
596: self.defines = []
597:
598: def collect_references(self):
599: self.collect_ref = 1
600:
601: def stop_error(self):
602: self.no_error = 1
603:
604: def start_error(self):
605: self.no_error = 0
606:
607: def lineno(self):
608: return self.lexer.getlineno()
609:
610: def index_add(self, name, module, static, type, info=None, extra = None):
611: if self.is_header == 1:
612: self.index.add(name, module, module, static, type, self.lineno(),
613: info, extra, self.conditionals)
614: else:
615: self.index.add(name, None, module, static, type, self.lineno(),
616: info, extra, self.conditionals)
617:
618: def index_add_ref(self, name, module, static, type, info=None,
619: extra = None):
620: if self.is_header == 1:
621: self.index.add_ref(name, module, module, static, type,
622: self.lineno(), info, extra, self.conditionals)
623: else:
624: self.index.add_ref(name, None, module, static, type, self.lineno(),
625: info, extra, self.conditionals)
626:
627: def warning(self, msg):
628: if self.no_error:
629: return
630: print(msg)
631:
632: def error(self, msg, token=-1):
633: if self.no_error:
634: return
635:
636: print("Parse Error: " + msg)
637: if token != -1:
638: print("Got token ", token)
639: self.lexer.debug()
640: sys.exit(1)
641:
642: def debug(self, msg, token=-1):
643: print("Debug: " + msg)
644: if token != -1:
645: print("Got token ", token)
646: self.lexer.debug()
647:
648: def parseTopComment(self, comment):
649: res = {}
650: lines = comment.split("\n")
651: item = None
652: for line in lines:
653: while line != "" and (line[0] == ' ' or line[0] == '\t'):
654: line = line[1:]
655: while line != "" and line[0] == '*':
656: line = line[1:]
657: while line != "" and (line[0] == ' ' or line[0] == '\t'):
658: line = line[1:]
659: try:
660: (it, line) = line.split(":", 1)
661: item = it
662: while line != "" and (line[0] == ' ' or line[0] == '\t'):
663: line = line[1:]
664: if item in res:
665: res[item] = res[item] + " " + line
666: else:
667: res[item] = line
668: except:
669: if item != None:
670: if item in res:
671: res[item] = res[item] + " " + line
672: else:
673: res[item] = line
674: self.index.info = res
675:
676: def parseComment(self, token):
677: if self.top_comment == "":
678: self.top_comment = token[1]
679: if self.comment == None or token[1][0] == '*':
680: self.comment = token[1];
681: else:
682: self.comment = self.comment + token[1]
683: token = self.lexer.token()
684:
685: if self.comment.find("DOC_DISABLE") != -1:
686: self.stop_error()
687:
688: if self.comment.find("DOC_ENABLE") != -1:
689: self.start_error()
690:
691: return token
692:
693: #
694: # Parse a comment block associate to a typedef
695: #
696: def parseTypeComment(self, name, quiet = 0):
697: if name[0:2] == '__':
698: quiet = 1
699:
700: args = []
701: desc = ""
702:
703: if self.comment == None:
704: if not quiet:
705: self.warning("Missing comment for type %s" % (name))
706: return((args, desc))
707: if self.comment[0] != '*':
708: if not quiet:
709: self.warning("Missing * in type comment for %s" % (name))
710: return((args, desc))
711: lines = self.comment.split('\n')
712: if lines[0] == '*':
713: del lines[0]
714: if lines[0] != "* %s:" % (name):
715: if not quiet:
716: self.warning("Misformatted type comment for %s" % (name))
717: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
718: return((args, desc))
719: del lines[0]
720: while len(lines) > 0 and lines[0] == '*':
721: del lines[0]
722: desc = ""
723: while len(lines) > 0:
724: l = lines[0]
725: while len(l) > 0 and l[0] == '*':
726: l = l[1:]
727: l = l.strip()
728: desc = desc + " " + l
729: del lines[0]
730:
731: desc = desc.strip()
732:
733: if quiet == 0:
734: if desc == "":
735: self.warning("Type comment for %s lack description of the macro" % (name))
736:
737: return(desc)
738: #
739: # Parse a comment block associate to a macro
740: #
741: def parseMacroComment(self, name, quiet = 0):
742: if name[0:2] == '__':
743: quiet = 1
744:
745: args = []
746: desc = ""
747:
748: if self.comment == None:
749: if not quiet:
750: self.warning("Missing comment for macro %s" % (name))
751: return((args, desc))
752: if self.comment[0] != '*':
753: if not quiet:
754: self.warning("Missing * in macro comment for %s" % (name))
755: return((args, desc))
756: lines = self.comment.split('\n')
757: if lines[0] == '*':
758: del lines[0]
759: if lines[0] != "* %s:" % (name):
760: if not quiet:
761: self.warning("Misformatted macro comment for %s" % (name))
762: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
763: return((args, desc))
764: del lines[0]
765: while lines[0] == '*':
766: del lines[0]
767: while len(lines) > 0 and lines[0][0:3] == '* @':
768: l = lines[0][3:]
769: try:
770: (arg, desc) = l.split(':', 1)
771: desc=desc.strip()
772: arg=arg.strip()
773: except:
774: if not quiet:
775: self.warning("Misformatted macro comment for %s" % (name))
776: self.warning(" problem with '%s'" % (lines[0]))
777: del lines[0]
778: continue
779: del lines[0]
780: l = lines[0].strip()
781: while len(l) > 2 and l[0:3] != '* @':
782: while l[0] == '*':
783: l = l[1:]
784: desc = desc + ' ' + l.strip()
785: del lines[0]
786: if len(lines) == 0:
787: break
788: l = lines[0]
789: args.append((arg, desc))
790: while len(lines) > 0 and lines[0] == '*':
791: del lines[0]
792: desc = ""
793: while len(lines) > 0:
794: l = lines[0]
795: while len(l) > 0 and l[0] == '*':
796: l = l[1:]
797: l = l.strip()
798: desc = desc + " " + l
799: del lines[0]
800:
801: desc = desc.strip()
802:
803: if quiet == 0:
804: if desc == "":
805: self.warning("Macro comment for %s lack description of the macro" % (name))
806:
807: return((args, desc))
808:
809: #
810: # Parse a comment block and merge the informations found in the
811: # parameters descriptions, finally returns a block as complete
812: # as possible
813: #
814: def mergeFunctionComment(self, name, description, quiet = 0):
815: if name == 'main':
816: quiet = 1
817: if name[0:2] == '__':
818: quiet = 1
819:
820: (ret, args) = description
821: desc = ""
822: retdesc = ""
823:
824: if self.comment == None:
825: if not quiet:
826: self.warning("Missing comment for function %s" % (name))
827: return(((ret[0], retdesc), args, desc))
828: if self.comment[0] != '*':
829: if not quiet:
830: self.warning("Missing * in function comment for %s" % (name))
831: return(((ret[0], retdesc), args, desc))
832: lines = self.comment.split('\n')
833: if lines[0] == '*':
834: del lines[0]
835: if lines[0] != "* %s:" % (name):
836: if not quiet:
837: self.warning("Misformatted function comment for %s" % (name))
838: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
839: return(((ret[0], retdesc), args, desc))
840: del lines[0]
841: while lines[0] == '*':
842: del lines[0]
843: nbargs = len(args)
844: while len(lines) > 0 and lines[0][0:3] == '* @':
845: l = lines[0][3:]
846: try:
847: (arg, desc) = l.split(':', 1)
848: desc=desc.strip()
849: arg=arg.strip()
850: except:
851: if not quiet:
852: self.warning("Misformatted function comment for %s" % (name))
853: self.warning(" problem with '%s'" % (lines[0]))
854: del lines[0]
855: continue
856: del lines[0]
857: l = lines[0].strip()
858: while len(l) > 2 and l[0:3] != '* @':
859: while l[0] == '*':
860: l = l[1:]
861: desc = desc + ' ' + l.strip()
862: del lines[0]
863: if len(lines) == 0:
864: break
865: l = lines[0]
866: i = 0
867: while i < nbargs:
868: if args[i][1] == arg:
869: args[i] = (args[i][0], arg, desc)
870: break;
871: i = i + 1
872: if i >= nbargs:
873: if not quiet:
874: self.warning("Unable to find arg %s from function comment for %s" % (
875: arg, name))
876: while len(lines) > 0 and lines[0] == '*':
877: del lines[0]
878: desc = ""
879: while len(lines) > 0:
880: l = lines[0]
881: while len(l) > 0 and l[0] == '*':
882: l = l[1:]
883: l = l.strip()
884: if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
885: try:
886: l = l.split(' ', 1)[1]
887: except:
888: l = ""
889: retdesc = l.strip()
890: del lines[0]
891: while len(lines) > 0:
892: l = lines[0]
893: while len(l) > 0 and l[0] == '*':
894: l = l[1:]
895: l = l.strip()
896: retdesc = retdesc + " " + l
897: del lines[0]
898: else:
899: desc = desc + " " + l
900: del lines[0]
901:
902: retdesc = retdesc.strip()
903: desc = desc.strip()
904:
905: if quiet == 0:
906: #
907: # report missing comments
908: #
909: i = 0
910: while i < nbargs:
911: if args[i][2] == None and args[i][0] != "void" and \
912: ((args[i][1] != None) or (args[i][1] == '')):
913: self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
914: i = i + 1
915: if retdesc == "" and ret[0] != "void":
916: self.warning("Function comment for %s lacks description of return value" % (name))
917: if desc == "":
918: self.warning("Function comment for %s lacks description of the function" % (name))
919:
920: return(((ret[0], retdesc), args, desc))
921:
922: def parsePreproc(self, token):
923: if debug:
924: print("=> preproc ", token, self.lexer.tokens)
925: name = token[1]
926: if name == "#include":
927: token = self.lexer.token()
928: if token == None:
929: return None
930: if token[0] == 'preproc':
931: self.index_add(token[1], self.filename, not self.is_header,
932: "include")
933: return self.lexer.token()
934: return token
935: if name == "#define":
936: token = self.lexer.token()
937: if token == None:
938: return None
939: if token[0] == 'preproc':
940: # TODO macros with arguments
941: name = token[1]
942: lst = []
943: token = self.lexer.token()
944: while token != None and token[0] == 'preproc' and \
945: token[1][0] != '#':
946: lst.append(token[1])
947: token = self.lexer.token()
948: try:
949: name = name.split('(') [0]
950: except:
951: pass
952: info = self.parseMacroComment(name, not self.is_header)
953: self.index_add(name, self.filename, not self.is_header,
954: "macro", info)
955: return token
956:
957: #
958: # Processing of conditionals modified by Bill 1/1/05
959: #
960: # We process conditionals (i.e. tokens from #ifdef, #ifndef,
961: # #if, #else and #endif) for headers and mainline code,
962: # store the ones from the header in libxml2-api.xml, and later
963: # (in the routine merge_public) verify that the two (header and
964: # mainline code) agree.
965: #
966: # There is a small problem with processing the headers. Some of
967: # the variables are not concerned with enabling / disabling of
968: # library functions (e.g. '__XML_PARSER_H__'), and we don't want
969: # them to be included in libxml2-api.xml, or involved in
970: # the check between the header and the mainline code. To
971: # accomplish this, we ignore any conditional which doesn't include
972: # the string 'ENABLED'
973: #
974: if name == "#ifdef":
975: apstr = self.lexer.tokens[0][1]
976: try:
977: self.defines.append(apstr)
978: if apstr.find('ENABLED') != -1:
979: self.conditionals.append("defined(%s)" % apstr)
980: except:
981: pass
982: elif name == "#ifndef":
983: apstr = self.lexer.tokens[0][1]
984: try:
985: self.defines.append(apstr)
986: if apstr.find('ENABLED') != -1:
987: self.conditionals.append("!defined(%s)" % apstr)
988: except:
989: pass
990: elif name == "#if":
991: apstr = ""
992: for tok in self.lexer.tokens:
993: if apstr != "":
994: apstr = apstr + " "
995: apstr = apstr + tok[1]
996: try:
997: self.defines.append(apstr)
998: if apstr.find('ENABLED') != -1:
999: self.conditionals.append(apstr)
1000: except:
1001: pass
1002: elif name == "#else":
1003: if self.conditionals != [] and \
1004: self.defines[-1].find('ENABLED') != -1:
1005: self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
1006: elif name == "#endif":
1007: if self.conditionals != [] and \
1008: self.defines[-1].find('ENABLED') != -1:
1009: self.conditionals = self.conditionals[:-1]
1010: self.defines = self.defines[:-1]
1011: token = self.lexer.token()
1012: while token != None and token[0] == 'preproc' and \
1013: token[1][0] != '#':
1014: token = self.lexer.token()
1015: return token
1016:
1017: #
1018: # token acquisition on top of the lexer, it handle internally
1019: # preprocessor and comments since they are logically not part of
1020: # the program structure.
1021: #
1022: def token(self):
1023: global ignored_words
1024:
1025: token = self.lexer.token()
1026: while token != None:
1027: if token[0] == 'comment':
1028: token = self.parseComment(token)
1029: continue
1030: elif token[0] == 'preproc':
1031: token = self.parsePreproc(token)
1032: continue
1033: elif token[0] == "name" and token[1] == "__const":
1034: token = ("name", "const")
1035: return token
1036: elif token[0] == "name" and token[1] == "__attribute":
1037: token = self.lexer.token()
1038: while token != None and token[1] != ";":
1039: token = self.lexer.token()
1040: return token
1041: elif token[0] == "name" and token[1] in ignored_words:
1042: (n, info) = ignored_words[token[1]]
1043: i = 0
1044: while i < n:
1045: token = self.lexer.token()
1046: i = i + 1
1047: token = self.lexer.token()
1048: continue
1049: else:
1050: if debug:
1051: print("=> ", token)
1052: return token
1053: return None
1054:
1055: #
1056: # Parse a typedef, it records the type and its name.
1057: #
1058: def parseTypedef(self, token):
1059: if token == None:
1060: return None
1061: token = self.parseType(token)
1062: if token == None:
1063: self.error("parsing typedef")
1064: return None
1065: base_type = self.type
1066: type = base_type
1067: #self.debug("end typedef type", token)
1068: while token != None:
1069: if token[0] == "name":
1070: name = token[1]
1071: signature = self.signature
1072: if signature != None:
1073: type = type.split('(')[0]
1074: d = self.mergeFunctionComment(name,
1075: ((type, None), signature), 1)
1076: self.index_add(name, self.filename, not self.is_header,
1077: "functype", d)
1078: else:
1079: if base_type == "struct":
1080: self.index_add(name, self.filename, not self.is_header,
1081: "struct", type)
1082: base_type = "struct " + name
1083: else:
1084: # TODO report missing or misformatted comments
1085: info = self.parseTypeComment(name, 1)
1086: self.index_add(name, self.filename, not self.is_header,
1087: "typedef", type, info)
1088: token = self.token()
1089: else:
1090: self.error("parsing typedef: expecting a name")
1091: return token
1092: #self.debug("end typedef", token)
1093: if token != None and token[0] == 'sep' and token[1] == ',':
1094: type = base_type
1095: token = self.token()
1096: while token != None and token[0] == "op":
1097: type = type + token[1]
1098: token = self.token()
1099: elif token != None and token[0] == 'sep' and token[1] == ';':
1100: break;
1101: elif token != None and token[0] == 'name':
1102: type = base_type
1103: continue;
1104: else:
1105: self.error("parsing typedef: expecting ';'", token)
1106: return token
1107: token = self.token()
1108: return token
1109:
1110: #
1111: # Parse a C code block, used for functions it parse till
1112: # the balancing } included
1113: #
1114: def parseBlock(self, token):
1115: while token != None:
1116: if token[0] == "sep" and token[1] == "{":
1117: token = self.token()
1118: token = self.parseBlock(token)
1119: elif token[0] == "sep" and token[1] == "}":
1120: self.comment = None
1121: token = self.token()
1122: return token
1123: else:
1124: if self.collect_ref == 1:
1125: oldtok = token
1126: token = self.token()
1127: if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1128: if token[0] == "sep" and token[1] == "(":
1129: self.index_add_ref(oldtok[1], self.filename,
1130: 0, "function")
1131: token = self.token()
1132: elif token[0] == "name":
1133: token = self.token()
1134: if token[0] == "sep" and (token[1] == ";" or
1135: token[1] == "," or token[1] == "="):
1136: self.index_add_ref(oldtok[1], self.filename,
1137: 0, "type")
1138: elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1139: self.index_add_ref(oldtok[1], self.filename,
1140: 0, "typedef")
1141: elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1142: self.index_add_ref(oldtok[1], self.filename,
1143: 0, "typedef")
1144:
1145: else:
1146: token = self.token()
1147: return token
1148:
1149: #
1150: # Parse a C struct definition till the balancing }
1151: #
1152: def parseStruct(self, token):
1153: fields = []
1154: #self.debug("start parseStruct", token)
1155: while token != None:
1156: if token[0] == "sep" and token[1] == "{":
1157: token = self.token()
1158: token = self.parseTypeBlock(token)
1159: elif token[0] == "sep" and token[1] == "}":
1160: self.struct_fields = fields
1161: #self.debug("end parseStruct", token)
1162: #print fields
1163: token = self.token()
1164: return token
1165: else:
1166: base_type = self.type
1167: #self.debug("before parseType", token)
1168: token = self.parseType(token)
1169: #self.debug("after parseType", token)
1170: if token != None and token[0] == "name":
1171: fname = token[1]
1172: token = self.token()
1173: if token[0] == "sep" and token[1] == ";":
1174: self.comment = None
1175: token = self.token()
1176: fields.append((self.type, fname, self.comment))
1177: self.comment = None
1178: else:
1179: self.error("parseStruct: expecting ;", token)
1180: elif token != None and token[0] == "sep" and token[1] == "{":
1181: token = self.token()
1182: token = self.parseTypeBlock(token)
1183: if token != None and token[0] == "name":
1184: token = self.token()
1185: if token != None and token[0] == "sep" and token[1] == ";":
1186: token = self.token()
1187: else:
1188: self.error("parseStruct: expecting ;", token)
1189: else:
1190: self.error("parseStruct: name", token)
1191: token = self.token()
1192: self.type = base_type;
1193: self.struct_fields = fields
1194: #self.debug("end parseStruct", token)
1195: #print fields
1196: return token
1197:
1198: #
1199: # Parse a C enum block, parse till the balancing }
1200: #
1201: def parseEnumBlock(self, token):
1202: self.enums = []
1203: name = None
1204: self.comment = None
1205: comment = ""
1206: value = "0"
1207: while token != None:
1208: if token[0] == "sep" and token[1] == "{":
1209: token = self.token()
1210: token = self.parseTypeBlock(token)
1211: elif token[0] == "sep" and token[1] == "}":
1212: if name != None:
1213: if self.comment != None:
1214: comment = self.comment
1215: self.comment = None
1216: self.enums.append((name, value, comment))
1217: token = self.token()
1218: return token
1219: elif token[0] == "name":
1220: if name != None:
1221: if self.comment != None:
1222: comment = self.comment.strip()
1223: self.comment = None
1224: self.enums.append((name, value, comment))
1225: name = token[1]
1226: comment = ""
1227: token = self.token()
1228: if token[0] == "op" and token[1][0] == "=":
1229: value = ""
1230: if len(token[1]) > 1:
1231: value = token[1][1:]
1232: token = self.token()
1233: while token[0] != "sep" or (token[1] != ',' and
1234: token[1] != '}'):
1235: value = value + token[1]
1236: token = self.token()
1237: else:
1238: try:
1239: value = "%d" % (int(value) + 1)
1240: except:
1241: self.warning("Failed to compute value of enum %s" % (name))
1242: value=""
1243: if token[0] == "sep" and token[1] == ",":
1244: token = self.token()
1245: else:
1246: token = self.token()
1247: return token
1248:
1249: #
1250: # Parse a C definition block, used for structs it parse till
1251: # the balancing }
1252: #
1253: def parseTypeBlock(self, token):
1254: while token != None:
1255: if token[0] == "sep" and token[1] == "{":
1256: token = self.token()
1257: token = self.parseTypeBlock(token)
1258: elif token[0] == "sep" and token[1] == "}":
1259: token = self.token()
1260: return token
1261: else:
1262: token = self.token()
1263: return token
1264:
1265: #
1266: # Parse a type: the fact that the type name can either occur after
1267: # the definition or within the definition makes it a little harder
1268: # if inside, the name token is pushed back before returning
1269: #
1270: def parseType(self, token):
1271: self.type = ""
1272: self.struct_fields = []
1273: self.signature = None
1274: if token == None:
1275: return token
1276:
1277: while token[0] == "name" and (
1278: token[1] == "const" or \
1279: token[1] == "unsigned" or \
1280: token[1] == "signed"):
1281: if self.type == "":
1282: self.type = token[1]
1283: else:
1284: self.type = self.type + " " + token[1]
1285: token = self.token()
1286:
1287: if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1288: if self.type == "":
1289: self.type = token[1]
1290: else:
1291: self.type = self.type + " " + token[1]
1292: if token[0] == "name" and token[1] == "int":
1293: if self.type == "":
1294: self.type = tmp[1]
1295: else:
1296: self.type = self.type + " " + tmp[1]
1297:
1298: elif token[0] == "name" and token[1] == "struct":
1299: if self.type == "":
1300: self.type = token[1]
1301: else:
1302: self.type = self.type + " " + token[1]
1303: token = self.token()
1304: nametok = None
1305: if token[0] == "name":
1306: nametok = token
1307: token = self.token()
1308: if token != None and token[0] == "sep" and token[1] == "{":
1309: token = self.token()
1310: token = self.parseStruct(token)
1311: elif token != None and token[0] == "op" and token[1] == "*":
1312: self.type = self.type + " " + nametok[1] + " *"
1313: token = self.token()
1314: while token != None and token[0] == "op" and token[1] == "*":
1315: self.type = self.type + " *"
1316: token = self.token()
1317: if token[0] == "name":
1318: nametok = token
1319: token = self.token()
1320: else:
1321: self.error("struct : expecting name", token)
1322: return token
1323: elif token != None and token[0] == "name" and nametok != None:
1324: self.type = self.type + " " + nametok[1]
1325: return token
1326:
1327: if nametok != None:
1328: self.lexer.push(token)
1329: token = nametok
1330: return token
1331:
1332: elif token[0] == "name" and token[1] == "enum":
1333: if self.type == "":
1334: self.type = token[1]
1335: else:
1336: self.type = self.type + " " + token[1]
1337: self.enums = []
1338: token = self.token()
1339: if token != None and token[0] == "sep" and token[1] == "{":
1340: token = self.token()
1341: token = self.parseEnumBlock(token)
1342: else:
1343: self.error("parsing enum: expecting '{'", token)
1344: enum_type = None
1345: if token != None and token[0] != "name":
1346: self.lexer.push(token)
1347: token = ("name", "enum")
1348: else:
1349: enum_type = token[1]
1350: for enum in self.enums:
1351: self.index_add(enum[0], self.filename,
1352: not self.is_header, "enum",
1353: (enum[1], enum[2], enum_type))
1354: return token
1355:
1356: elif token[0] == "name":
1357: if self.type == "":
1358: self.type = token[1]
1359: else:
1360: self.type = self.type + " " + token[1]
1361: else:
1362: self.error("parsing type %s: expecting a name" % (self.type),
1363: token)
1364: return token
1365: token = self.token()
1366: while token != None and (token[0] == "op" or
1367: token[0] == "name" and token[1] == "const"):
1368: self.type = self.type + " " + token[1]
1369: token = self.token()
1370:
1371: #
1372: # if there is a parenthesis here, this means a function type
1373: #
1374: if token != None and token[0] == "sep" and token[1] == '(':
1375: self.type = self.type + token[1]
1376: token = self.token()
1377: while token != None and token[0] == "op" and token[1] == '*':
1378: self.type = self.type + token[1]
1379: token = self.token()
1380: if token == None or token[0] != "name" :
1381: self.error("parsing function type, name expected", token);
1382: return token
1383: self.type = self.type + token[1]
1384: nametok = token
1385: token = self.token()
1386: if token != None and token[0] == "sep" and token[1] == ')':
1387: self.type = self.type + token[1]
1388: token = self.token()
1389: if token != None and token[0] == "sep" and token[1] == '(':
1390: token = self.token()
1391: type = self.type;
1392: token = self.parseSignature(token);
1393: self.type = type;
1394: else:
1395: self.error("parsing function type, '(' expected", token);
1396: return token
1397: else:
1398: self.error("parsing function type, ')' expected", token);
1399: return token
1400: self.lexer.push(token)
1401: token = nametok
1402: return token
1403:
1404: #
1405: # do some lookahead for arrays
1406: #
1407: if token != None and token[0] == "name":
1408: nametok = token
1409: token = self.token()
1410: if token != None and token[0] == "sep" and token[1] == '[':
1411: self.type = self.type + nametok[1]
1412: while token != None and token[0] == "sep" and token[1] == '[':
1413: self.type = self.type + token[1]
1414: token = self.token()
1415: while token != None and token[0] != 'sep' and \
1416: token[1] != ']' and token[1] != ';':
1417: self.type = self.type + token[1]
1418: token = self.token()
1419: if token != None and token[0] == 'sep' and token[1] == ']':
1420: self.type = self.type + token[1]
1421: token = self.token()
1422: else:
1423: self.error("parsing array type, ']' expected", token);
1424: return token
1425: elif token != None and token[0] == "sep" and token[1] == ':':
1426: # remove :12 in case it's a limited int size
1427: token = self.token()
1428: token = self.token()
1429: self.lexer.push(token)
1430: token = nametok
1431:
1432: return token
1433:
1434: #
1435: # Parse a signature: '(' has been parsed and we scan the type definition
1436: # up to the ')' included
1437: def parseSignature(self, token):
1438: signature = []
1439: if token != None and token[0] == "sep" and token[1] == ')':
1440: self.signature = []
1441: token = self.token()
1442: return token
1443: while token != None:
1444: token = self.parseType(token)
1445: if token != None and token[0] == "name":
1446: signature.append((self.type, token[1], None))
1447: token = self.token()
1448: elif token != None and token[0] == "sep" and token[1] == ',':
1449: token = self.token()
1450: continue
1451: elif token != None and token[0] == "sep" and token[1] == ')':
1452: # only the type was provided
1453: if self.type == "...":
1454: signature.append((self.type, "...", None))
1455: else:
1456: signature.append((self.type, None, None))
1457: if token != None and token[0] == "sep":
1458: if token[1] == ',':
1459: token = self.token()
1460: continue
1461: elif token[1] == ')':
1462: token = self.token()
1463: break
1464: self.signature = signature
1465: return token
1466:
1467: #
1468: # Parse a global definition, be it a type, variable or function
1469: # the extern "C" blocks are a bit nasty and require it to recurse.
1470: #
1471: def parseGlobal(self, token):
1472: static = 0
1473: if token[1] == 'extern':
1474: token = self.token()
1475: if token == None:
1476: return token
1477: if token[0] == 'string':
1478: if token[1] == 'C':
1479: token = self.token()
1480: if token == None:
1481: return token
1482: if token[0] == 'sep' and token[1] == "{":
1483: token = self.token()
1484: # print 'Entering extern "C line ', self.lineno()
1485: while token != None and (token[0] != 'sep' or
1486: token[1] != "}"):
1487: if token[0] == 'name':
1488: token = self.parseGlobal(token)
1489: else:
1490: self.error(
1491: "token %s %s unexpected at the top level" % (
1492: token[0], token[1]))
1493: token = self.parseGlobal(token)
1494: # print 'Exiting extern "C" line', self.lineno()
1495: token = self.token()
1496: return token
1497: else:
1498: return token
1499: elif token[1] == 'static':
1500: static = 1
1501: token = self.token()
1502: if token == None or token[0] != 'name':
1503: return token
1504:
1505: if token[1] == 'typedef':
1506: token = self.token()
1507: return self.parseTypedef(token)
1508: else:
1509: token = self.parseType(token)
1510: type_orig = self.type
1511: if token == None or token[0] != "name":
1512: return token
1513: type = type_orig
1514: self.name = token[1]
1515: token = self.token()
1516: while token != None and (token[0] == "sep" or token[0] == "op"):
1517: if token[0] == "sep":
1518: if token[1] == "[":
1519: type = type + token[1]
1520: token = self.token()
1521: while token != None and (token[0] != "sep" or \
1522: token[1] != ";"):
1523: type = type + token[1]
1524: token = self.token()
1525:
1526: if token != None and token[0] == "op" and token[1] == "=":
1527: #
1528: # Skip the initialization of the variable
1529: #
1530: token = self.token()
1531: if token[0] == 'sep' and token[1] == '{':
1532: token = self.token()
1533: token = self.parseBlock(token)
1534: else:
1535: self.comment = None
1536: while token != None and (token[0] != "sep" or \
1537: (token[1] != ';' and token[1] != ',')):
1538: token = self.token()
1539: self.comment = None
1540: if token == None or token[0] != "sep" or (token[1] != ';' and
1541: token[1] != ','):
1542: self.error("missing ';' or ',' after value")
1543:
1544: if token != None and token[0] == "sep":
1545: if token[1] == ";":
1546: self.comment = None
1547: token = self.token()
1548: if type == "struct":
1549: self.index_add(self.name, self.filename,
1550: not self.is_header, "struct", self.struct_fields)
1551: else:
1552: self.index_add(self.name, self.filename,
1553: not self.is_header, "variable", type)
1554: break
1555: elif token[1] == "(":
1556: token = self.token()
1557: token = self.parseSignature(token)
1558: if token == None:
1559: return None
1560: if token[0] == "sep" and token[1] == ";":
1561: d = self.mergeFunctionComment(self.name,
1562: ((type, None), self.signature), 1)
1563: self.index_add(self.name, self.filename, static,
1564: "function", d)
1565: token = self.token()
1566: elif token[0] == "sep" and token[1] == "{":
1567: d = self.mergeFunctionComment(self.name,
1568: ((type, None), self.signature), static)
1569: self.index_add(self.name, self.filename, static,
1570: "function", d)
1571: token = self.token()
1572: token = self.parseBlock(token);
1573: elif token[1] == ',':
1574: self.comment = None
1575: self.index_add(self.name, self.filename, static,
1576: "variable", type)
1577: type = type_orig
1578: token = self.token()
1579: while token != None and token[0] == "sep":
1580: type = type + token[1]
1581: token = self.token()
1582: if token != None and token[0] == "name":
1583: self.name = token[1]
1584: token = self.token()
1585: else:
1586: break
1587:
1588: return token
1589:
1590: def parse(self):
1591: self.warning("Parsing %s" % (self.filename))
1592: token = self.token()
1593: while token != None:
1594: if token[0] == 'name':
1595: token = self.parseGlobal(token)
1596: else:
1597: self.error("token %s %s unexpected at the top level" % (
1598: token[0], token[1]))
1599: token = self.parseGlobal(token)
1600: return
1601: self.parseTopComment(self.top_comment)
1602: return self.index
1603:
1604:
1605: class docBuilder:
1606: """A documentation builder"""
1607: def __init__(self, name, directories=['.'], excludes=[]):
1608: self.name = name
1609: self.directories = directories
1610: self.excludes = excludes + list(ignored_files.keys())
1611: self.modules = {}
1612: self.headers = {}
1613: self.idx = index()
1614: self.xref = {}
1615: self.index = {}
1616: if name == 'libxml2':
1617: self.basename = 'libxml'
1618: else:
1619: self.basename = name
1620:
1621: def indexString(self, id, str):
1622: if str == None:
1623: return
1624: str = str.replace("'", ' ')
1625: str = str.replace('"', ' ')
1626: str = str.replace("/", ' ')
1627: str = str.replace('*', ' ')
1628: str = str.replace("[", ' ')
1629: str = str.replace("]", ' ')
1630: str = str.replace("(", ' ')
1631: str = str.replace(")", ' ')
1632: str = str.replace("<", ' ')
1633: str = str.replace('>', ' ')
1634: str = str.replace("&", ' ')
1635: str = str.replace('#', ' ')
1636: str = str.replace(",", ' ')
1637: str = str.replace('.', ' ')
1638: str = str.replace(';', ' ')
1639: tokens = str.split()
1640: for token in tokens:
1641: try:
1642: c = token[0]
1643: if string.ascii_letters.find(c) < 0:
1644: pass
1645: elif len(token) < 3:
1646: pass
1647: else:
1648: lower = token.lower()
1649: # TODO: generalize this a bit
1650: if lower == 'and' or lower == 'the':
1651: pass
1652: elif token in self.xref:
1653: self.xref[token].append(id)
1654: else:
1655: self.xref[token] = [id]
1656: except:
1657: pass
1658:
1659: def analyze(self):
1660: print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
1661: self.idx.analyze()
1662:
1663: def scanHeaders(self):
1664: for header in list(self.headers.keys()):
1665: parser = CParser(header)
1666: idx = parser.parse()
1667: self.headers[header] = idx;
1668: self.idx.merge(idx)
1669:
1670: def scanModules(self):
1671: for module in list(self.modules.keys()):
1672: parser = CParser(module)
1673: idx = parser.parse()
1674: # idx.analyze()
1675: self.modules[module] = idx
1676: self.idx.merge_public(idx)
1677:
1678: def scan(self):
1679: for directory in self.directories:
1680: files = glob.glob(directory + "/*.c")
1681: for file in files:
1682: skip = 0
1683: for excl in self.excludes:
1684: if file.find(excl) != -1:
1685: print("Skipping %s" % file)
1686: skip = 1
1687: break
1688: if skip == 0:
1689: self.modules[file] = None;
1690: files = glob.glob(directory + "/*.h")
1691: for file in files:
1692: skip = 0
1693: for excl in self.excludes:
1694: if file.find(excl) != -1:
1695: print("Skipping %s" % file)
1696: skip = 1
1697: break
1698: if skip == 0:
1699: self.headers[file] = None;
1700: self.scanHeaders()
1701: self.scanModules()
1702:
1703: def modulename_file(self, file):
1704: module = os.path.basename(file)
1705: if module[-2:] == '.h':
1706: module = module[:-2]
1707: elif module[-2:] == '.c':
1708: module = module[:-2]
1709: return module
1710:
1711: def serialize_enum(self, output, name):
1712: id = self.idx.enums[name]
1713: output.write(" <enum name='%s' file='%s'" % (name,
1714: self.modulename_file(id.header)))
1715: if id.info != None:
1716: info = id.info
1717: if info[0] != None and info[0] != '':
1718: try:
1719: val = eval(info[0])
1720: except:
1721: val = info[0]
1722: output.write(" value='%s'" % (val));
1723: if info[2] != None and info[2] != '':
1724: output.write(" type='%s'" % info[2]);
1725: if info[1] != None and info[1] != '':
1726: output.write(" info='%s'" % escape(info[1]));
1727: output.write("/>\n")
1728:
1729: def serialize_macro(self, output, name):
1730: id = self.idx.macros[name]
1731: output.write(" <macro name='%s' file='%s'>\n" % (name,
1732: self.modulename_file(id.header)))
1733: if id.info != None:
1734: try:
1735: (args, desc) = id.info
1736: if desc != None and desc != "":
1737: output.write(" <info>%s</info>\n" % (escape(desc)))
1738: self.indexString(name, desc)
1739: for arg in args:
1740: (name, desc) = arg
1741: if desc != None and desc != "":
1742: output.write(" <arg name='%s' info='%s'/>\n" % (
1743: name, escape(desc)))
1744: self.indexString(name, desc)
1745: else:
1746: output.write(" <arg name='%s'/>\n" % (name))
1747: except:
1748: pass
1749: output.write(" </macro>\n")
1750:
1751: def serialize_typedef(self, output, name):
1752: id = self.idx.typedefs[name]
1753: if id.info[0:7] == 'struct ':
1754: output.write(" <struct name='%s' file='%s' type='%s'" % (
1755: name, self.modulename_file(id.header), id.info))
1756: name = id.info[7:]
1757: if name in self.idx.structs and ( \
1758: type(self.idx.structs[name].info) == type(()) or
1759: type(self.idx.structs[name].info) == type([])):
1760: output.write(">\n");
1761: try:
1762: for field in self.idx.structs[name].info:
1763: desc = field[2]
1764: self.indexString(name, desc)
1765: if desc == None:
1766: desc = ''
1767: else:
1768: desc = escape(desc)
1769: output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1770: except:
1771: print("Failed to serialize struct %s" % (name))
1772: output.write(" </struct>\n")
1773: else:
1774: output.write("/>\n");
1775: else :
1776: output.write(" <typedef name='%s' file='%s' type='%s'" % (
1777: name, self.modulename_file(id.header), id.info))
1778: try:
1779: desc = id.extra
1780: if desc != None and desc != "":
1781: output.write(">\n <info>%s</info>\n" % (escape(desc)))
1782: output.write(" </typedef>\n")
1783: else:
1784: output.write("/>\n")
1785: except:
1786: output.write("/>\n")
1787:
1788: def serialize_variable(self, output, name):
1789: id = self.idx.variables[name]
1790: if id.info != None:
1791: output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
1792: name, self.modulename_file(id.header), id.info))
1793: else:
1794: output.write(" <variable name='%s' file='%s'/>\n" % (
1795: name, self.modulename_file(id.header)))
1796:
1797: def serialize_function(self, output, name):
1798: id = self.idx.functions[name]
1799: if name == debugsym:
1800: print("=>", id)
1801:
1802: output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1803: name, self.modulename_file(id.header),
1804: self.modulename_file(id.module)))
1805: #
1806: # Processing of conditionals modified by Bill 1/1/05
1807: #
1808: if id.conditionals != None:
1809: apstr = ""
1810: for cond in id.conditionals:
1811: if apstr != "":
1812: apstr = apstr + " && "
1813: apstr = apstr + cond
1814: output.write(" <cond>%s</cond>\n"% (apstr));
1815: try:
1816: (ret, params, desc) = id.info
1817: if (desc == None or desc == '') and \
1818: name[0:9] != "xmlThrDef" and name != "xmlDllMain":
1819: print("%s %s from %s has no description" % (id.type, name,
1820: self.modulename_file(id.module)))
1821:
1822: output.write(" <info>%s</info>\n" % (escape(desc)))
1823: self.indexString(name, desc)
1824: if ret[0] != None:
1825: if ret[0] == "void":
1826: output.write(" <return type='void'/>\n")
1827: else:
1828: output.write(" <return type='%s' info='%s'/>\n" % (
1829: ret[0], escape(ret[1])))
1830: self.indexString(name, ret[1])
1831: for param in params:
1832: if param[0] == 'void':
1833: continue
1834: if param[2] == None:
1835: output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1836: else:
1837: output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1838: self.indexString(name, param[2])
1839: except:
1840: print("Failed to save function %s info: " % name, repr(id.info))
1841: output.write(" </%s>\n" % (id.type))
1842:
1843: def serialize_exports(self, output, file):
1844: module = self.modulename_file(file)
1845: output.write(" <file name='%s'>\n" % (module))
1846: dict = self.headers[file]
1847: if dict.info != None:
1848: for data in ('Summary', 'Description', 'Author'):
1849: try:
1850: output.write(" <%s>%s</%s>\n" % (
1851: data.lower(),
1852: escape(dict.info[data]),
1853: data.lower()))
1854: except:
1855: print("Header %s lacks a %s description" % (module, data))
1856: if 'Description' in dict.info:
1857: desc = dict.info['Description']
1858: if desc.find("DEPRECATED") != -1:
1859: output.write(" <deprecated/>\n")
1860:
1861: ids = list(dict.macros.keys())
1862: ids.sort()
1863: for id in uniq(ids):
1864: # Macros are sometime used to masquerade other types.
1865: if id in dict.functions:
1866: continue
1867: if id in dict.variables:
1868: continue
1869: if id in dict.typedefs:
1870: continue
1871: if id in dict.structs:
1872: continue
1873: if id in dict.enums:
1874: continue
1875: output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1876: ids = list(dict.enums.keys())
1877: ids.sort()
1878: for id in uniq(ids):
1879: output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1880: ids = list(dict.typedefs.keys())
1881: ids.sort()
1882: for id in uniq(ids):
1883: output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1884: ids = list(dict.structs.keys())
1885: ids.sort()
1886: for id in uniq(ids):
1887: output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1888: ids = list(dict.variables.keys())
1889: ids.sort()
1890: for id in uniq(ids):
1891: output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1892: ids = list(dict.functions.keys())
1893: ids.sort()
1894: for id in uniq(ids):
1895: output.write(" <exports symbol='%s' type='function'/>\n" % (id))
1896: output.write(" </file>\n")
1897:
1898: def serialize_xrefs_files(self, output):
1899: headers = list(self.headers.keys())
1900: headers.sort()
1901: for file in headers:
1902: module = self.modulename_file(file)
1903: output.write(" <file name='%s'>\n" % (module))
1904: dict = self.headers[file]
1905: ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \
1906: list(dict.macros.keys()) + list(dict.typedefs.keys()) + \
1907: list(dict.structs.keys()) + list(dict.enums.keys()))
1908: ids.sort()
1909: for id in ids:
1910: output.write(" <ref name='%s'/>\n" % (id))
1911: output.write(" </file>\n")
1912: pass
1913:
1914: def serialize_xrefs_functions(self, output):
1915: funcs = {}
1916: for name in list(self.idx.functions.keys()):
1917: id = self.idx.functions[name]
1918: try:
1919: (ret, params, desc) = id.info
1920: for param in params:
1921: if param[0] == 'void':
1922: continue
1923: if param[0] in funcs:
1924: funcs[param[0]].append(name)
1925: else:
1926: funcs[param[0]] = [name]
1927: except:
1928: pass
1929: typ = list(funcs.keys())
1930: typ.sort()
1931: for type in typ:
1932: if type == '' or type == 'void' or type == "int" or \
1933: type == "char *" or type == "const char *" :
1934: continue
1935: output.write(" <type name='%s'>\n" % (type))
1936: ids = funcs[type]
1937: ids.sort()
1938: pid = '' # not sure why we have dups, but get rid of them!
1939: for id in ids:
1940: if id != pid:
1941: output.write(" <ref name='%s'/>\n" % (id))
1942: pid = id
1943: output.write(" </type>\n")
1944:
1945: def serialize_xrefs_constructors(self, output):
1946: funcs = {}
1947: for name in list(self.idx.functions.keys()):
1948: id = self.idx.functions[name]
1949: try:
1950: (ret, params, desc) = id.info
1951: if ret[0] == "void":
1952: continue
1953: if ret[0] in funcs:
1954: funcs[ret[0]].append(name)
1955: else:
1956: funcs[ret[0]] = [name]
1957: except:
1958: pass
1959: typ = list(funcs.keys())
1960: typ.sort()
1961: for type in typ:
1962: if type == '' or type == 'void' or type == "int" or \
1963: type == "char *" or type == "const char *" :
1964: continue
1965: output.write(" <type name='%s'>\n" % (type))
1966: ids = funcs[type]
1967: ids.sort()
1968: for id in ids:
1969: output.write(" <ref name='%s'/>\n" % (id))
1970: output.write(" </type>\n")
1971:
1972: def serialize_xrefs_alpha(self, output):
1973: letter = None
1974: ids = list(self.idx.identifiers.keys())
1975: ids.sort()
1976: for id in ids:
1977: if id[0] != letter:
1978: if letter != None:
1979: output.write(" </letter>\n")
1980: letter = id[0]
1981: output.write(" <letter name='%s'>\n" % (letter))
1982: output.write(" <ref name='%s'/>\n" % (id))
1983: if letter != None:
1984: output.write(" </letter>\n")
1985:
1986: def serialize_xrefs_references(self, output):
1987: typ = list(self.idx.identifiers.keys())
1988: typ.sort()
1989: for id in typ:
1990: idf = self.idx.identifiers[id]
1991: module = idf.header
1992: output.write(" <reference name='%s' href='%s'/>\n" % (id,
1993: 'html/' + self.basename + '-' +
1994: self.modulename_file(module) + '.html#' +
1995: id))
1996:
1997: def serialize_xrefs_index(self, output):
1998: index = self.xref
1999: typ = list(index.keys())
2000: typ.sort()
2001: letter = None
2002: count = 0
2003: chunk = 0
2004: chunks = []
2005: for id in typ:
2006: if len(index[id]) > 30:
2007: continue
2008: if id[0] != letter:
2009: if letter == None or count > 200:
2010: if letter != None:
2011: output.write(" </letter>\n")
2012: output.write(" </chunk>\n")
2013: count = 0
2014: chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2015: output.write(" <chunk name='chunk%s'>\n" % (chunk))
2016: first_letter = id[0]
2017: chunk = chunk + 1
2018: elif letter != None:
2019: output.write(" </letter>\n")
2020: letter = id[0]
2021: output.write(" <letter name='%s'>\n" % (letter))
2022: output.write(" <word name='%s'>\n" % (id))
2023: tokens = index[id];
2024: tokens.sort()
2025: tok = None
2026: for token in tokens:
2027: if tok == token:
2028: continue
2029: tok = token
2030: output.write(" <ref name='%s'/>\n" % (token))
2031: count = count + 1
2032: output.write(" </word>\n")
2033: if letter != None:
2034: output.write(" </letter>\n")
2035: output.write(" </chunk>\n")
2036: if count != 0:
2037: chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2038: output.write(" <chunks>\n")
2039: for ch in chunks:
2040: output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
2041: ch[0], ch[1], ch[2]))
2042: output.write(" </chunks>\n")
2043:
2044: def serialize_xrefs(self, output):
2045: output.write(" <references>\n")
2046: self.serialize_xrefs_references(output)
2047: output.write(" </references>\n")
2048: output.write(" <alpha>\n")
2049: self.serialize_xrefs_alpha(output)
2050: output.write(" </alpha>\n")
2051: output.write(" <constructors>\n")
2052: self.serialize_xrefs_constructors(output)
2053: output.write(" </constructors>\n")
2054: output.write(" <functions>\n")
2055: self.serialize_xrefs_functions(output)
2056: output.write(" </functions>\n")
2057: output.write(" <files>\n")
2058: self.serialize_xrefs_files(output)
2059: output.write(" </files>\n")
2060: output.write(" <index>\n")
2061: self.serialize_xrefs_index(output)
2062: output.write(" </index>\n")
2063:
2064: def serialize(self):
2065: filename = "%s-api.xml" % self.name
2066: print("Saving XML description %s" % (filename))
2067: output = open(filename, "w")
2068: output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2069: output.write("<api name='%s'>\n" % self.name)
2070: output.write(" <files>\n")
2071: headers = list(self.headers.keys())
2072: headers.sort()
2073: for file in headers:
2074: self.serialize_exports(output, file)
2075: output.write(" </files>\n")
2076: output.write(" <symbols>\n")
2077: macros = list(self.idx.macros.keys())
2078: macros.sort()
2079: for macro in macros:
2080: self.serialize_macro(output, macro)
2081: enums = list(self.idx.enums.keys())
2082: enums.sort()
2083: for enum in enums:
2084: self.serialize_enum(output, enum)
2085: typedefs = list(self.idx.typedefs.keys())
2086: typedefs.sort()
2087: for typedef in typedefs:
2088: self.serialize_typedef(output, typedef)
2089: variables = list(self.idx.variables.keys())
2090: variables.sort()
2091: for variable in variables:
2092: self.serialize_variable(output, variable)
2093: functions = list(self.idx.functions.keys())
2094: functions.sort()
2095: for function in functions:
2096: self.serialize_function(output, function)
2097: output.write(" </symbols>\n")
2098: output.write("</api>\n")
2099: output.close()
2100:
2101: filename = "%s-refs.xml" % self.name
2102: print("Saving XML Cross References %s" % (filename))
2103: output = open(filename, "w")
2104: output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2105: output.write("<apirefs name='%s'>\n" % self.name)
2106: self.serialize_xrefs(output)
2107: output.write("</apirefs>\n")
2108: output.close()
2109:
2110:
2111: def rebuild():
2112: builder = None
2113: if glob.glob("parser.c") != [] :
2114: print("Rebuilding API description for libxml2")
2115: builder = docBuilder("libxml2", [".", "."],
2116: ["xmlwin32version.h", "tst.c"])
2117: elif glob.glob("../parser.c") != [] :
2118: print("Rebuilding API description for libxml2")
2119: builder = docBuilder("libxml2", ["..", "../include/libxml"],
2120: ["xmlwin32version.h", "tst.c"])
2121: elif glob.glob("../libxslt/transform.c") != [] :
2122: print("Rebuilding API description for libxslt")
2123: builder = docBuilder("libxslt", ["../libxslt"],
2124: ["win32config.h", "libxslt.h", "tst.c"])
2125: else:
2126: print("rebuild() failed, unable to guess the module")
2127: return None
2128: builder.scan()
2129: builder.analyze()
2130: builder.serialize()
2131: if glob.glob("../libexslt/exslt.c") != [] :
2132: extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2133: extra.scan()
2134: extra.analyze()
2135: extra.serialize()
2136: return builder
2137:
2138: #
2139: # for debugging the parser
2140: #
2141: def parse(filename):
2142: parser = CParser(filename)
2143: idx = parser.parse()
2144: return idx
2145:
2146: if __name__ == "__main__":
2147: if len(sys.argv) > 1:
2148: debug = 1
2149: parse(sys.argv[1])
2150: else:
2151: rebuild()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>