Annotation of embedaddon/libxml2/doc/apibuild.py, revision 1.1.1.1
1.1 misho 1: #!/usr/bin/python -u
2: #
3: # This is the API builder, it parses the C sources and build the
4: # API formal description in XML.
5: #
6: # See Copyright for the status of this software.
7: #
8: # daniel@veillard.com
9: #
10: import os, sys
11: import string
12: import glob
13:
14: debug=0
15: #debugsym='ignorableWhitespaceSAXFunc'
16: debugsym=None
17:
18: #
19: # C parser analysis code
20: #
21: ignored_files = {
22: "trio": "too many non standard macros",
23: "trio.c": "too many non standard macros",
24: "trionan.c": "too many non standard macros",
25: "triostr.c": "too many non standard macros",
26: "acconfig.h": "generated portability layer",
27: "config.h": "generated portability layer",
28: "libxml.h": "internal only",
29: "testOOM.c": "out of memory tester",
30: "testOOMlib.h": "out of memory tester",
31: "testOOMlib.c": "out of memory tester",
32: "rngparser.c": "not yet integrated",
33: "rngparser.h": "not yet integrated",
34: "elfgcchack.h": "not a normal header",
35: "testHTML.c": "test tool",
36: "testReader.c": "test tool",
37: "testSchemas.c": "test tool",
38: "testXPath.c": "test tool",
39: "testAutomata.c": "test tool",
40: "testModule.c": "test tool",
41: "testRegexp.c": "test tool",
42: "testThreads.c": "test tool",
43: "testC14N.c": "test tool",
44: "testRelax.c": "test tool",
45: "testThreadsWin32.c": "test tool",
46: "testSAX.c": "test tool",
47: "testURI.c": "test tool",
48: "testapi.c": "generated regression tests",
49: "runtest.c": "regression tests program",
50: "runsuite.c": "regression tests program",
51: "tst.c": "not part of the library",
52: "test.c": "not part of the library",
53: "testdso.c": "test for dynamid shared libraries",
54: "testrecurse.c": "test for entities recursions",
55: }
56:
57: ignored_words = {
58: "WINAPI": (0, "Windows keyword"),
59: "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
60: "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
61: "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
62: "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
63: "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
64: "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
65: "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
66: "XMLCALL": (0, "Special macro for win32 calls"),
67: "XSLTCALL": (0, "Special macro for win32 calls"),
68: "XMLCDECL": (0, "Special macro for win32 calls"),
69: "EXSLTCALL": (0, "Special macro for win32 calls"),
70: "__declspec": (3, "Windows keyword"),
71: "__stdcall": (0, "Windows keyword"),
72: "ATTRIBUTE_UNUSED": (0, "macro keyword"),
73: "LIBEXSLT_PUBLIC": (0, "macro keyword"),
74: "X_IN_Y": (5, "macro function builder"),
75: "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
76: "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
77: "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
78: "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
79: }
80:
81: def escape(raw):
82: raw = string.replace(raw, '&', '&')
83: raw = string.replace(raw, '<', '<')
84: raw = string.replace(raw, '>', '>')
85: raw = string.replace(raw, "'", ''')
86: raw = string.replace(raw, '"', '"')
87: return raw
88:
89: def uniq(items):
90: d = {}
91: for item in items:
92: d[item]=1
93: return d.keys()
94:
95: class identifier:
96: def __init__(self, name, header=None, module=None, type=None, lineno = 0,
97: info=None, extra=None, conditionals = None):
98: self.name = name
99: self.header = header
100: self.module = module
101: self.type = type
102: self.info = info
103: self.extra = extra
104: self.lineno = lineno
105: self.static = 0
106: if conditionals == None or len(conditionals) == 0:
107: self.conditionals = None
108: else:
109: self.conditionals = conditionals[:]
110: if self.name == debugsym:
111: print "=> define %s : %s" % (debugsym, (module, type, info,
112: extra, conditionals))
113:
114: def __repr__(self):
115: r = "%s %s:" % (self.type, self.name)
116: if self.static:
117: r = r + " static"
118: if self.module != None:
119: r = r + " from %s" % (self.module)
120: if self.info != None:
121: r = r + " " + `self.info`
122: if self.extra != None:
123: r = r + " " + `self.extra`
124: if self.conditionals != None:
125: r = r + " " + `self.conditionals`
126: return r
127:
128:
129: def set_header(self, header):
130: self.header = header
131: def set_module(self, module):
132: self.module = module
133: def set_type(self, type):
134: self.type = type
135: def set_info(self, info):
136: self.info = info
137: def set_extra(self, extra):
138: self.extra = extra
139: def set_lineno(self, lineno):
140: self.lineno = lineno
141: def set_static(self, static):
142: self.static = static
143: def set_conditionals(self, conditionals):
144: if conditionals == None or len(conditionals) == 0:
145: self.conditionals = None
146: else:
147: self.conditionals = conditionals[:]
148:
149: def get_name(self):
150: return self.name
151: def get_header(self):
152: return self.module
153: def get_module(self):
154: return self.module
155: def get_type(self):
156: return self.type
157: def get_info(self):
158: return self.info
159: def get_lineno(self):
160: return self.lineno
161: def get_extra(self):
162: return self.extra
163: def get_static(self):
164: return self.static
165: def get_conditionals(self):
166: return self.conditionals
167:
168: def update(self, header, module, type = None, info = None, extra=None,
169: conditionals=None):
170: if self.name == debugsym:
171: print "=> update %s : %s" % (debugsym, (module, type, info,
172: extra, conditionals))
173: if header != None and self.header == None:
174: self.set_header(module)
175: if module != None and (self.module == None or self.header == self.module):
176: self.set_module(module)
177: if type != None and self.type == None:
178: self.set_type(type)
179: if info != None:
180: self.set_info(info)
181: if extra != None:
182: self.set_extra(extra)
183: if conditionals != None:
184: self.set_conditionals(conditionals)
185:
186: class index:
187: def __init__(self, name = "noname"):
188: self.name = name
189: self.identifiers = {}
190: self.functions = {}
191: self.variables = {}
192: self.includes = {}
193: self.structs = {}
194: self.enums = {}
195: self.typedefs = {}
196: self.macros = {}
197: self.references = {}
198: self.info = {}
199:
200: def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
201: if name[0:2] == '__':
202: return None
203: d = None
204: try:
205: d = self.identifiers[name]
206: d.update(header, module, type, lineno, info, extra, conditionals)
207: except:
208: d = identifier(name, header, module, type, lineno, info, extra, conditionals)
209: self.identifiers[name] = d
210:
211: if d != None and static == 1:
212: d.set_static(1)
213:
214: if d != None and name != None and type != None:
215: self.references[name] = d
216:
217: if name == debugsym:
218: print "New ref: %s" % (d)
219:
220: return d
221:
222: def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
223: if name[0:2] == '__':
224: return None
225: d = None
226: try:
227: d = self.identifiers[name]
228: d.update(header, module, type, lineno, info, extra, conditionals)
229: except:
230: d = identifier(name, header, module, type, lineno, info, extra, conditionals)
231: self.identifiers[name] = d
232:
233: if d != None and static == 1:
234: d.set_static(1)
235:
236: if d != None and name != None and type != None:
237: if type == "function":
238: self.functions[name] = d
239: elif type == "functype":
240: self.functions[name] = d
241: elif type == "variable":
242: self.variables[name] = d
243: elif type == "include":
244: self.includes[name] = d
245: elif type == "struct":
246: self.structs[name] = d
247: elif type == "enum":
248: self.enums[name] = d
249: elif type == "typedef":
250: self.typedefs[name] = d
251: elif type == "macro":
252: self.macros[name] = d
253: else:
254: print "Unable to register type ", type
255:
256: if name == debugsym:
257: print "New symbol: %s" % (d)
258:
259: return d
260:
261: def merge(self, idx):
262: for id in idx.functions.keys():
263: #
264: # macro might be used to override functions or variables
265: # definitions
266: #
267: if self.macros.has_key(id):
268: del self.macros[id]
269: if self.functions.has_key(id):
270: print "function %s from %s redeclared in %s" % (
271: id, self.functions[id].header, idx.functions[id].header)
272: else:
273: self.functions[id] = idx.functions[id]
274: self.identifiers[id] = idx.functions[id]
275: for id in idx.variables.keys():
276: #
277: # macro might be used to override functions or variables
278: # definitions
279: #
280: if self.macros.has_key(id):
281: del self.macros[id]
282: if self.variables.has_key(id):
283: print "variable %s from %s redeclared in %s" % (
284: id, self.variables[id].header, idx.variables[id].header)
285: else:
286: self.variables[id] = idx.variables[id]
287: self.identifiers[id] = idx.variables[id]
288: for id in idx.structs.keys():
289: if self.structs.has_key(id):
290: print "struct %s from %s redeclared in %s" % (
291: id, self.structs[id].header, idx.structs[id].header)
292: else:
293: self.structs[id] = idx.structs[id]
294: self.identifiers[id] = idx.structs[id]
295: for id in idx.typedefs.keys():
296: if self.typedefs.has_key(id):
297: print "typedef %s from %s redeclared in %s" % (
298: id, self.typedefs[id].header, idx.typedefs[id].header)
299: else:
300: self.typedefs[id] = idx.typedefs[id]
301: self.identifiers[id] = idx.typedefs[id]
302: for id in idx.macros.keys():
303: #
304: # macro might be used to override functions or variables
305: # definitions
306: #
307: if self.variables.has_key(id):
308: continue
309: if self.functions.has_key(id):
310: continue
311: if self.enums.has_key(id):
312: continue
313: if self.macros.has_key(id):
314: print "macro %s from %s redeclared in %s" % (
315: id, self.macros[id].header, idx.macros[id].header)
316: else:
317: self.macros[id] = idx.macros[id]
318: self.identifiers[id] = idx.macros[id]
319: for id in idx.enums.keys():
320: if self.enums.has_key(id):
321: print "enum %s from %s redeclared in %s" % (
322: id, self.enums[id].header, idx.enums[id].header)
323: else:
324: self.enums[id] = idx.enums[id]
325: self.identifiers[id] = idx.enums[id]
326:
327: def merge_public(self, idx):
328: for id in idx.functions.keys():
329: if self.functions.has_key(id):
330: # check that function condition agrees with header
331: if idx.functions[id].conditionals != \
332: self.functions[id].conditionals:
333: print "Header condition differs from Function for %s:" \
334: % id
335: print " H: %s" % self.functions[id].conditionals
336: print " C: %s" % idx.functions[id].conditionals
337: up = idx.functions[id]
338: self.functions[id].update(None, up.module, up.type, up.info, up.extra)
339: # else:
340: # print "Function %s from %s is not declared in headers" % (
341: # id, idx.functions[id].module)
342: # TODO: do the same for variables.
343:
344: def analyze_dict(self, type, dict):
345: count = 0
346: public = 0
347: for name in dict.keys():
348: id = dict[name]
349: count = count + 1
350: if id.static == 0:
351: public = public + 1
352: if count != public:
353: print " %d %s , %d public" % (count, type, public)
354: elif count != 0:
355: print " %d public %s" % (count, type)
356:
357:
358: def analyze(self):
359: self.analyze_dict("functions", self.functions)
360: self.analyze_dict("variables", self.variables)
361: self.analyze_dict("structs", self.structs)
362: self.analyze_dict("typedefs", self.typedefs)
363: self.analyze_dict("macros", self.macros)
364:
365: class CLexer:
366: """A lexer for the C language, tokenize the input by reading and
367: analyzing it line by line"""
368: def __init__(self, input):
369: self.input = input
370: self.tokens = []
371: self.line = ""
372: self.lineno = 0
373:
374: def getline(self):
375: line = ''
376: while line == '':
377: line = self.input.readline()
378: if not line:
379: return None
380: self.lineno = self.lineno + 1
381: line = string.lstrip(line)
382: line = string.rstrip(line)
383: if line == '':
384: continue
385: while line[-1] == '\\':
386: line = line[:-1]
387: n = self.input.readline()
388: self.lineno = self.lineno + 1
389: n = string.lstrip(n)
390: n = string.rstrip(n)
391: if not n:
392: break
393: else:
394: line = line + n
395: return line
396:
397: def getlineno(self):
398: return self.lineno
399:
400: def push(self, token):
401: self.tokens.insert(0, token);
402:
403: def debug(self):
404: print "Last token: ", self.last
405: print "Token queue: ", self.tokens
406: print "Line %d end: " % (self.lineno), self.line
407:
408: def token(self):
409: while self.tokens == []:
410: if self.line == "":
411: line = self.getline()
412: else:
413: line = self.line
414: self.line = ""
415: if line == None:
416: return None
417:
418: if line[0] == '#':
419: self.tokens = map((lambda x: ('preproc', x)),
420: string.split(line))
421: break;
422: l = len(line)
423: if line[0] == '"' or line[0] == "'":
424: end = line[0]
425: line = line[1:]
426: found = 0
427: tok = ""
428: while found == 0:
429: i = 0
430: l = len(line)
431: while i < l:
432: if line[i] == end:
433: self.line = line[i+1:]
434: line = line[:i]
435: l = i
436: found = 1
437: break
438: if line[i] == '\\':
439: i = i + 1
440: i = i + 1
441: tok = tok + line
442: if found == 0:
443: line = self.getline()
444: if line == None:
445: return None
446: self.last = ('string', tok)
447: return self.last
448:
449: if l >= 2 and line[0] == '/' and line[1] == '*':
450: line = line[2:]
451: found = 0
452: tok = ""
453: while found == 0:
454: i = 0
455: l = len(line)
456: while i < l:
457: if line[i] == '*' and i+1 < l and line[i+1] == '/':
458: self.line = line[i+2:]
459: line = line[:i-1]
460: l = i
461: found = 1
462: break
463: i = i + 1
464: if tok != "":
465: tok = tok + "\n"
466: tok = tok + line
467: if found == 0:
468: line = self.getline()
469: if line == None:
470: return None
471: self.last = ('comment', tok)
472: return self.last
473: if l >= 2 and line[0] == '/' and line[1] == '/':
474: line = line[2:]
475: self.last = ('comment', line)
476: return self.last
477: i = 0
478: while i < l:
479: if line[i] == '/' and i+1 < l and line[i+1] == '/':
480: self.line = line[i:]
481: line = line[:i]
482: break
483: if line[i] == '/' and i+1 < l and line[i+1] == '*':
484: self.line = line[i:]
485: line = line[:i]
486: break
487: if line[i] == '"' or line[i] == "'":
488: self.line = line[i:]
489: line = line[:i]
490: break
491: i = i + 1
492: l = len(line)
493: i = 0
494: while i < l:
495: if line[i] == ' ' or line[i] == '\t':
496: i = i + 1
497: continue
498: o = ord(line[i])
499: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
500: (o >= 48 and o <= 57):
501: s = i
502: while i < l:
503: o = ord(line[i])
504: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
505: (o >= 48 and o <= 57) or string.find(
506: " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
507: i = i + 1
508: else:
509: break
510: self.tokens.append(('name', line[s:i]))
511: continue
512: if string.find("(){}:;,[]", line[i]) != -1:
513: # if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
514: # line[i] == '}' or line[i] == ':' or line[i] == ';' or \
515: # line[i] == ',' or line[i] == '[' or line[i] == ']':
516: self.tokens.append(('sep', line[i]))
517: i = i + 1
518: continue
519: if string.find("+-*><=/%&!|.", line[i]) != -1:
520: # if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
521: # line[i] == '>' or line[i] == '<' or line[i] == '=' or \
522: # line[i] == '/' or line[i] == '%' or line[i] == '&' or \
523: # line[i] == '!' or line[i] == '|' or line[i] == '.':
524: if line[i] == '.' and i + 2 < l and \
525: line[i+1] == '.' and line[i+2] == '.':
526: self.tokens.append(('name', '...'))
527: i = i + 3
528: continue
529:
530: j = i + 1
531: if j < l and (
532: string.find("+-*><=/%&!|", line[j]) != -1):
533: # line[j] == '+' or line[j] == '-' or line[j] == '*' or \
534: # line[j] == '>' or line[j] == '<' or line[j] == '=' or \
535: # line[j] == '/' or line[j] == '%' or line[j] == '&' or \
536: # line[j] == '!' or line[j] == '|'):
537: self.tokens.append(('op', line[i:j+1]))
538: i = j + 1
539: else:
540: self.tokens.append(('op', line[i]))
541: i = i + 1
542: continue
543: s = i
544: while i < l:
545: o = ord(line[i])
546: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
547: (o >= 48 and o <= 57) or (
548: string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
549: # line[i] != ' ' and line[i] != '\t' and
550: # line[i] != '(' and line[i] != ')' and
551: # line[i] != '{' and line[i] != '}' and
552: # line[i] != ':' and line[i] != ';' and
553: # line[i] != ',' and line[i] != '+' and
554: # line[i] != '-' and line[i] != '*' and
555: # line[i] != '/' and line[i] != '%' and
556: # line[i] != '&' and line[i] != '!' and
557: # line[i] != '|' and line[i] != '[' and
558: # line[i] != ']' and line[i] != '=' and
559: # line[i] != '*' and line[i] != '>' and
560: # line[i] != '<'):
561: i = i + 1
562: else:
563: break
564: self.tokens.append(('name', line[s:i]))
565:
566: tok = self.tokens[0]
567: self.tokens = self.tokens[1:]
568: self.last = tok
569: return tok
570:
571: class CParser:
572: """The C module parser"""
573: def __init__(self, filename, idx = None):
574: self.filename = filename
575: if len(filename) > 2 and filename[-2:] == '.h':
576: self.is_header = 1
577: else:
578: self.is_header = 0
579: self.input = open(filename)
580: self.lexer = CLexer(self.input)
581: if idx == None:
582: self.index = index()
583: else:
584: self.index = idx
585: self.top_comment = ""
586: self.last_comment = ""
587: self.comment = None
588: self.collect_ref = 0
589: self.no_error = 0
590: self.conditionals = []
591: self.defines = []
592:
593: def collect_references(self):
594: self.collect_ref = 1
595:
596: def stop_error(self):
597: self.no_error = 1
598:
599: def start_error(self):
600: self.no_error = 0
601:
602: def lineno(self):
603: return self.lexer.getlineno()
604:
605: def index_add(self, name, module, static, type, info=None, extra = None):
606: if self.is_header == 1:
607: self.index.add(name, module, module, static, type, self.lineno(),
608: info, extra, self.conditionals)
609: else:
610: self.index.add(name, None, module, static, type, self.lineno(),
611: info, extra, self.conditionals)
612:
613: def index_add_ref(self, name, module, static, type, info=None,
614: extra = None):
615: if self.is_header == 1:
616: self.index.add_ref(name, module, module, static, type,
617: self.lineno(), info, extra, self.conditionals)
618: else:
619: self.index.add_ref(name, None, module, static, type, self.lineno(),
620: info, extra, self.conditionals)
621:
622: def warning(self, msg):
623: if self.no_error:
624: return
625: print msg
626:
627: def error(self, msg, token=-1):
628: if self.no_error:
629: return
630:
631: print "Parse Error: " + msg
632: if token != -1:
633: print "Got token ", token
634: self.lexer.debug()
635: sys.exit(1)
636:
637: def debug(self, msg, token=-1):
638: print "Debug: " + msg
639: if token != -1:
640: print "Got token ", token
641: self.lexer.debug()
642:
643: def parseTopComment(self, comment):
644: res = {}
645: lines = string.split(comment, "\n")
646: item = None
647: for line in lines:
648: while line != "" and (line[0] == ' ' or line[0] == '\t'):
649: line = line[1:]
650: while line != "" and line[0] == '*':
651: line = line[1:]
652: while line != "" and (line[0] == ' ' or line[0] == '\t'):
653: line = line[1:]
654: try:
655: (it, line) = string.split(line, ":", 1)
656: item = it
657: while line != "" and (line[0] == ' ' or line[0] == '\t'):
658: line = line[1:]
659: if res.has_key(item):
660: res[item] = res[item] + " " + line
661: else:
662: res[item] = line
663: except:
664: if item != None:
665: if res.has_key(item):
666: res[item] = res[item] + " " + line
667: else:
668: res[item] = line
669: self.index.info = res
670:
671: def parseComment(self, token):
672: if self.top_comment == "":
673: self.top_comment = token[1]
674: if self.comment == None or token[1][0] == '*':
675: self.comment = token[1];
676: else:
677: self.comment = self.comment + token[1]
678: token = self.lexer.token()
679:
680: if string.find(self.comment, "DOC_DISABLE") != -1:
681: self.stop_error()
682:
683: if string.find(self.comment, "DOC_ENABLE") != -1:
684: self.start_error()
685:
686: return token
687:
688: #
689: # Parse a comment block associate to a typedef
690: #
691: def parseTypeComment(self, name, quiet = 0):
692: if name[0:2] == '__':
693: quiet = 1
694:
695: args = []
696: desc = ""
697:
698: if self.comment == None:
699: if not quiet:
700: self.warning("Missing comment for type %s" % (name))
701: return((args, desc))
702: if self.comment[0] != '*':
703: if not quiet:
704: self.warning("Missing * in type comment for %s" % (name))
705: return((args, desc))
706: lines = string.split(self.comment, '\n')
707: if lines[0] == '*':
708: del lines[0]
709: if lines[0] != "* %s:" % (name):
710: if not quiet:
711: self.warning("Misformatted type comment for %s" % (name))
712: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
713: return((args, desc))
714: del lines[0]
715: while len(lines) > 0 and lines[0] == '*':
716: del lines[0]
717: desc = ""
718: while len(lines) > 0:
719: l = lines[0]
720: while len(l) > 0 and l[0] == '*':
721: l = l[1:]
722: l = string.strip(l)
723: desc = desc + " " + l
724: del lines[0]
725:
726: desc = string.strip(desc)
727:
728: if quiet == 0:
729: if desc == "":
730: self.warning("Type comment for %s lack description of the macro" % (name))
731:
732: return(desc)
733: #
734: # Parse a comment block associate to a macro
735: #
736: def parseMacroComment(self, name, quiet = 0):
737: if name[0:2] == '__':
738: quiet = 1
739:
740: args = []
741: desc = ""
742:
743: if self.comment == None:
744: if not quiet:
745: self.warning("Missing comment for macro %s" % (name))
746: return((args, desc))
747: if self.comment[0] != '*':
748: if not quiet:
749: self.warning("Missing * in macro comment for %s" % (name))
750: return((args, desc))
751: lines = string.split(self.comment, '\n')
752: if lines[0] == '*':
753: del lines[0]
754: if lines[0] != "* %s:" % (name):
755: if not quiet:
756: self.warning("Misformatted macro comment for %s" % (name))
757: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
758: return((args, desc))
759: del lines[0]
760: while lines[0] == '*':
761: del lines[0]
762: while len(lines) > 0 and lines[0][0:3] == '* @':
763: l = lines[0][3:]
764: try:
765: (arg, desc) = string.split(l, ':', 1)
766: desc=string.strip(desc)
767: arg=string.strip(arg)
768: except:
769: if not quiet:
770: self.warning("Misformatted macro comment for %s" % (name))
771: self.warning(" problem with '%s'" % (lines[0]))
772: del lines[0]
773: continue
774: del lines[0]
775: l = string.strip(lines[0])
776: while len(l) > 2 and l[0:3] != '* @':
777: while l[0] == '*':
778: l = l[1:]
779: desc = desc + ' ' + string.strip(l)
780: del lines[0]
781: if len(lines) == 0:
782: break
783: l = lines[0]
784: args.append((arg, desc))
785: while len(lines) > 0 and lines[0] == '*':
786: del lines[0]
787: desc = ""
788: while len(lines) > 0:
789: l = lines[0]
790: while len(l) > 0 and l[0] == '*':
791: l = l[1:]
792: l = string.strip(l)
793: desc = desc + " " + l
794: del lines[0]
795:
796: desc = string.strip(desc)
797:
798: if quiet == 0:
799: if desc == "":
800: self.warning("Macro comment for %s lack description of the macro" % (name))
801:
802: return((args, desc))
803:
804: #
805: # Parse a comment block and merge the informations found in the
806: # parameters descriptions, finally returns a block as complete
807: # as possible
808: #
809: def mergeFunctionComment(self, name, description, quiet = 0):
810: if name == 'main':
811: quiet = 1
812: if name[0:2] == '__':
813: quiet = 1
814:
815: (ret, args) = description
816: desc = ""
817: retdesc = ""
818:
819: if self.comment == None:
820: if not quiet:
821: self.warning("Missing comment for function %s" % (name))
822: return(((ret[0], retdesc), args, desc))
823: if self.comment[0] != '*':
824: if not quiet:
825: self.warning("Missing * in function comment for %s" % (name))
826: return(((ret[0], retdesc), args, desc))
827: lines = string.split(self.comment, '\n')
828: if lines[0] == '*':
829: del lines[0]
830: if lines[0] != "* %s:" % (name):
831: if not quiet:
832: self.warning("Misformatted function comment for %s" % (name))
833: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
834: return(((ret[0], retdesc), args, desc))
835: del lines[0]
836: while lines[0] == '*':
837: del lines[0]
838: nbargs = len(args)
839: while len(lines) > 0 and lines[0][0:3] == '* @':
840: l = lines[0][3:]
841: try:
842: (arg, desc) = string.split(l, ':', 1)
843: desc=string.strip(desc)
844: arg=string.strip(arg)
845: except:
846: if not quiet:
847: self.warning("Misformatted function comment for %s" % (name))
848: self.warning(" problem with '%s'" % (lines[0]))
849: del lines[0]
850: continue
851: del lines[0]
852: l = string.strip(lines[0])
853: while len(l) > 2 and l[0:3] != '* @':
854: while l[0] == '*':
855: l = l[1:]
856: desc = desc + ' ' + string.strip(l)
857: del lines[0]
858: if len(lines) == 0:
859: break
860: l = lines[0]
861: i = 0
862: while i < nbargs:
863: if args[i][1] == arg:
864: args[i] = (args[i][0], arg, desc)
865: break;
866: i = i + 1
867: if i >= nbargs:
868: if not quiet:
869: self.warning("Unable to find arg %s from function comment for %s" % (
870: arg, name))
871: while len(lines) > 0 and lines[0] == '*':
872: del lines[0]
873: desc = ""
874: while len(lines) > 0:
875: l = lines[0]
876: while len(l) > 0 and l[0] == '*':
877: l = l[1:]
878: l = string.strip(l)
879: if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
880: try:
881: l = string.split(l, ' ', 1)[1]
882: except:
883: l = ""
884: retdesc = string.strip(l)
885: del lines[0]
886: while len(lines) > 0:
887: l = lines[0]
888: while len(l) > 0 and l[0] == '*':
889: l = l[1:]
890: l = string.strip(l)
891: retdesc = retdesc + " " + l
892: del lines[0]
893: else:
894: desc = desc + " " + l
895: del lines[0]
896:
897: retdesc = string.strip(retdesc)
898: desc = string.strip(desc)
899:
900: if quiet == 0:
901: #
902: # report missing comments
903: #
904: i = 0
905: while i < nbargs:
906: if args[i][2] == None and args[i][0] != "void" and \
907: ((args[i][1] != None) or (args[i][1] == '')):
908: self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
909: i = i + 1
910: if retdesc == "" and ret[0] != "void":
911: self.warning("Function comment for %s lacks description of return value" % (name))
912: if desc == "":
913: self.warning("Function comment for %s lacks description of the function" % (name))
914:
915: return(((ret[0], retdesc), args, desc))
916:
917: def parsePreproc(self, token):
918: if debug:
919: print "=> preproc ", token, self.lexer.tokens
920: name = token[1]
921: if name == "#include":
922: token = self.lexer.token()
923: if token == None:
924: return None
925: if token[0] == 'preproc':
926: self.index_add(token[1], self.filename, not self.is_header,
927: "include")
928: return self.lexer.token()
929: return token
930: if name == "#define":
931: token = self.lexer.token()
932: if token == None:
933: return None
934: if token[0] == 'preproc':
935: # TODO macros with arguments
936: name = token[1]
937: lst = []
938: token = self.lexer.token()
939: while token != None and token[0] == 'preproc' and \
940: token[1][0] != '#':
941: lst.append(token[1])
942: token = self.lexer.token()
943: try:
944: name = string.split(name, '(') [0]
945: except:
946: pass
947: info = self.parseMacroComment(name, not self.is_header)
948: self.index_add(name, self.filename, not self.is_header,
949: "macro", info)
950: return token
951:
952: #
953: # Processing of conditionals modified by Bill 1/1/05
954: #
955: # We process conditionals (i.e. tokens from #ifdef, #ifndef,
956: # #if, #else and #endif) for headers and mainline code,
957: # store the ones from the header in libxml2-api.xml, and later
958: # (in the routine merge_public) verify that the two (header and
959: # mainline code) agree.
960: #
961: # There is a small problem with processing the headers. Some of
962: # the variables are not concerned with enabling / disabling of
963: # library functions (e.g. '__XML_PARSER_H__'), and we don't want
964: # them to be included in libxml2-api.xml, or involved in
965: # the check between the header and the mainline code. To
966: # accomplish this, we ignore any conditional which doesn't include
967: # the string 'ENABLED'
968: #
969: if name == "#ifdef":
970: apstr = self.lexer.tokens[0][1]
971: try:
972: self.defines.append(apstr)
973: if string.find(apstr, 'ENABLED') != -1:
974: self.conditionals.append("defined(%s)" % apstr)
975: except:
976: pass
977: elif name == "#ifndef":
978: apstr = self.lexer.tokens[0][1]
979: try:
980: self.defines.append(apstr)
981: if string.find(apstr, 'ENABLED') != -1:
982: self.conditionals.append("!defined(%s)" % apstr)
983: except:
984: pass
985: elif name == "#if":
986: apstr = ""
987: for tok in self.lexer.tokens:
988: if apstr != "":
989: apstr = apstr + " "
990: apstr = apstr + tok[1]
991: try:
992: self.defines.append(apstr)
993: if string.find(apstr, 'ENABLED') != -1:
994: self.conditionals.append(apstr)
995: except:
996: pass
997: elif name == "#else":
998: if self.conditionals != [] and \
999: string.find(self.defines[-1], 'ENABLED') != -1:
1000: self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
1001: elif name == "#endif":
1002: if self.conditionals != [] and \
1003: string.find(self.defines[-1], 'ENABLED') != -1:
1004: self.conditionals = self.conditionals[:-1]
1005: self.defines = self.defines[:-1]
1006: token = self.lexer.token()
1007: while token != None and token[0] == 'preproc' and \
1008: token[1][0] != '#':
1009: token = self.lexer.token()
1010: return token
1011:
1012: #
1013: # token acquisition on top of the lexer, it handle internally
1014: # preprocessor and comments since they are logically not part of
1015: # the program structure.
1016: #
1017: def token(self):
1018: global ignored_words
1019:
1020: token = self.lexer.token()
1021: while token != None:
1022: if token[0] == 'comment':
1023: token = self.parseComment(token)
1024: continue
1025: elif token[0] == 'preproc':
1026: token = self.parsePreproc(token)
1027: continue
1028: elif token[0] == "name" and token[1] == "__const":
1029: token = ("name", "const")
1030: return token
1031: elif token[0] == "name" and token[1] == "__attribute":
1032: token = self.lexer.token()
1033: while token != None and token[1] != ";":
1034: token = self.lexer.token()
1035: return token
1036: elif token[0] == "name" and ignored_words.has_key(token[1]):
1037: (n, info) = ignored_words[token[1]]
1038: i = 0
1039: while i < n:
1040: token = self.lexer.token()
1041: i = i + 1
1042: token = self.lexer.token()
1043: continue
1044: else:
1045: if debug:
1046: print "=> ", token
1047: return token
1048: return None
1049:
1050: #
1051: # Parse a typedef, it records the type and its name.
1052: #
1053: def parseTypedef(self, token):
1054: if token == None:
1055: return None
1056: token = self.parseType(token)
1057: if token == None:
1058: self.error("parsing typedef")
1059: return None
1060: base_type = self.type
1061: type = base_type
1062: #self.debug("end typedef type", token)
1063: while token != None:
1064: if token[0] == "name":
1065: name = token[1]
1066: signature = self.signature
1067: if signature != None:
1068: type = string.split(type, '(')[0]
1069: d = self.mergeFunctionComment(name,
1070: ((type, None), signature), 1)
1071: self.index_add(name, self.filename, not self.is_header,
1072: "functype", d)
1073: else:
1074: if base_type == "struct":
1075: self.index_add(name, self.filename, not self.is_header,
1076: "struct", type)
1077: base_type = "struct " + name
1078: else:
1079: # TODO report missing or misformatted comments
1080: info = self.parseTypeComment(name, 1)
1081: self.index_add(name, self.filename, not self.is_header,
1082: "typedef", type, info)
1083: token = self.token()
1084: else:
1085: self.error("parsing typedef: expecting a name")
1086: return token
1087: #self.debug("end typedef", token)
1088: if token != None and token[0] == 'sep' and token[1] == ',':
1089: type = base_type
1090: token = self.token()
1091: while token != None and token[0] == "op":
1092: type = type + token[1]
1093: token = self.token()
1094: elif token != None and token[0] == 'sep' and token[1] == ';':
1095: break;
1096: elif token != None and token[0] == 'name':
1097: type = base_type
1098: continue;
1099: else:
1100: self.error("parsing typedef: expecting ';'", token)
1101: return token
1102: token = self.token()
1103: return token
1104:
1105: #
1106: # Parse a C code block, used for functions it parse till
1107: # the balancing } included
1108: #
1109: def parseBlock(self, token):
1110: while token != None:
1111: if token[0] == "sep" and token[1] == "{":
1112: token = self.token()
1113: token = self.parseBlock(token)
1114: elif token[0] == "sep" and token[1] == "}":
1115: self.comment = None
1116: token = self.token()
1117: return token
1118: else:
1119: if self.collect_ref == 1:
1120: oldtok = token
1121: token = self.token()
1122: if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1123: if token[0] == "sep" and token[1] == "(":
1124: self.index_add_ref(oldtok[1], self.filename,
1125: 0, "function")
1126: token = self.token()
1127: elif token[0] == "name":
1128: token = self.token()
1129: if token[0] == "sep" and (token[1] == ";" or
1130: token[1] == "," or token[1] == "="):
1131: self.index_add_ref(oldtok[1], self.filename,
1132: 0, "type")
1133: elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1134: self.index_add_ref(oldtok[1], self.filename,
1135: 0, "typedef")
1136: elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1137: self.index_add_ref(oldtok[1], self.filename,
1138: 0, "typedef")
1139:
1140: else:
1141: token = self.token()
1142: return token
1143:
1144: #
1145: # Parse a C struct definition till the balancing }
1146: #
1147: def parseStruct(self, token):
1148: fields = []
1149: #self.debug("start parseStruct", token)
1150: while token != None:
1151: if token[0] == "sep" and token[1] == "{":
1152: token = self.token()
1153: token = self.parseTypeBlock(token)
1154: elif token[0] == "sep" and token[1] == "}":
1155: self.struct_fields = fields
1156: #self.debug("end parseStruct", token)
1157: #print fields
1158: token = self.token()
1159: return token
1160: else:
1161: base_type = self.type
1162: #self.debug("before parseType", token)
1163: token = self.parseType(token)
1164: #self.debug("after parseType", token)
1165: if token != None and token[0] == "name":
1166: fname = token[1]
1167: token = self.token()
1168: if token[0] == "sep" and token[1] == ";":
1169: self.comment = None
1170: token = self.token()
1171: fields.append((self.type, fname, self.comment))
1172: self.comment = None
1173: else:
1174: self.error("parseStruct: expecting ;", token)
1175: elif token != None and token[0] == "sep" and token[1] == "{":
1176: token = self.token()
1177: token = self.parseTypeBlock(token)
1178: if token != None and token[0] == "name":
1179: token = self.token()
1180: if token != None and token[0] == "sep" and token[1] == ";":
1181: token = self.token()
1182: else:
1183: self.error("parseStruct: expecting ;", token)
1184: else:
1185: self.error("parseStruct: name", token)
1186: token = self.token()
1187: self.type = base_type;
1188: self.struct_fields = fields
1189: #self.debug("end parseStruct", token)
1190: #print fields
1191: return token
1192:
1193: #
1194: # Parse a C enum block, parse till the balancing }
1195: #
1196: def parseEnumBlock(self, token):
1197: self.enums = []
1198: name = None
1199: self.comment = None
1200: comment = ""
1201: value = "0"
1202: while token != None:
1203: if token[0] == "sep" and token[1] == "{":
1204: token = self.token()
1205: token = self.parseTypeBlock(token)
1206: elif token[0] == "sep" and token[1] == "}":
1207: if name != None:
1208: if self.comment != None:
1209: comment = self.comment
1210: self.comment = None
1211: self.enums.append((name, value, comment))
1212: token = self.token()
1213: return token
1214: elif token[0] == "name":
1215: if name != None:
1216: if self.comment != None:
1217: comment = string.strip(self.comment)
1218: self.comment = None
1219: self.enums.append((name, value, comment))
1220: name = token[1]
1221: comment = ""
1222: token = self.token()
1223: if token[0] == "op" and token[1][0] == "=":
1224: value = ""
1225: if len(token[1]) > 1:
1226: value = token[1][1:]
1227: token = self.token()
1228: while token[0] != "sep" or (token[1] != ',' and
1229: token[1] != '}'):
1230: value = value + token[1]
1231: token = self.token()
1232: else:
1233: try:
1234: value = "%d" % (int(value) + 1)
1235: except:
1236: self.warning("Failed to compute value of enum %s" % (name))
1237: value=""
1238: if token[0] == "sep" and token[1] == ",":
1239: token = self.token()
1240: else:
1241: token = self.token()
1242: return token
1243:
1244: #
1245: # Parse a C definition block, used for structs it parse till
1246: # the balancing }
1247: #
1248: def parseTypeBlock(self, token):
1249: while token != None:
1250: if token[0] == "sep" and token[1] == "{":
1251: token = self.token()
1252: token = self.parseTypeBlock(token)
1253: elif token[0] == "sep" and token[1] == "}":
1254: token = self.token()
1255: return token
1256: else:
1257: token = self.token()
1258: return token
1259:
1260: #
1261: # Parse a type: the fact that the type name can either occur after
1262: # the definition or within the definition makes it a little harder
1263: # if inside, the name token is pushed back before returning
1264: #
1265: def parseType(self, token):
1266: self.type = ""
1267: self.struct_fields = []
1268: self.signature = None
1269: if token == None:
1270: return token
1271:
1272: while token[0] == "name" and (
1273: token[1] == "const" or \
1274: token[1] == "unsigned" or \
1275: token[1] == "signed"):
1276: if self.type == "":
1277: self.type = token[1]
1278: else:
1279: self.type = self.type + " " + token[1]
1280: token = self.token()
1281:
1282: if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1283: if self.type == "":
1284: self.type = token[1]
1285: else:
1286: self.type = self.type + " " + token[1]
1287: if token[0] == "name" and token[1] == "int":
1288: if self.type == "":
1289: self.type = tmp[1]
1290: else:
1291: self.type = self.type + " " + tmp[1]
1292:
1293: elif token[0] == "name" and token[1] == "struct":
1294: if self.type == "":
1295: self.type = token[1]
1296: else:
1297: self.type = self.type + " " + token[1]
1298: token = self.token()
1299: nametok = None
1300: if token[0] == "name":
1301: nametok = token
1302: token = self.token()
1303: if token != None and token[0] == "sep" and token[1] == "{":
1304: token = self.token()
1305: token = self.parseStruct(token)
1306: elif token != None and token[0] == "op" and token[1] == "*":
1307: self.type = self.type + " " + nametok[1] + " *"
1308: token = self.token()
1309: while token != None and token[0] == "op" and token[1] == "*":
1310: self.type = self.type + " *"
1311: token = self.token()
1312: if token[0] == "name":
1313: nametok = token
1314: token = self.token()
1315: else:
1316: self.error("struct : expecting name", token)
1317: return token
1318: elif token != None and token[0] == "name" and nametok != None:
1319: self.type = self.type + " " + nametok[1]
1320: return token
1321:
1322: if nametok != None:
1323: self.lexer.push(token)
1324: token = nametok
1325: return token
1326:
1327: elif token[0] == "name" and token[1] == "enum":
1328: if self.type == "":
1329: self.type = token[1]
1330: else:
1331: self.type = self.type + " " + token[1]
1332: self.enums = []
1333: token = self.token()
1334: if token != None and token[0] == "sep" and token[1] == "{":
1335: token = self.token()
1336: token = self.parseEnumBlock(token)
1337: else:
1338: self.error("parsing enum: expecting '{'", token)
1339: enum_type = None
1340: if token != None and token[0] != "name":
1341: self.lexer.push(token)
1342: token = ("name", "enum")
1343: else:
1344: enum_type = token[1]
1345: for enum in self.enums:
1346: self.index_add(enum[0], self.filename,
1347: not self.is_header, "enum",
1348: (enum[1], enum[2], enum_type))
1349: return token
1350:
1351: elif token[0] == "name":
1352: if self.type == "":
1353: self.type = token[1]
1354: else:
1355: self.type = self.type + " " + token[1]
1356: else:
1357: self.error("parsing type %s: expecting a name" % (self.type),
1358: token)
1359: return token
1360: token = self.token()
1361: while token != None and (token[0] == "op" or
1362: token[0] == "name" and token[1] == "const"):
1363: self.type = self.type + " " + token[1]
1364: token = self.token()
1365:
1366: #
1367: # if there is a parenthesis here, this means a function type
1368: #
1369: if token != None and token[0] == "sep" and token[1] == '(':
1370: self.type = self.type + token[1]
1371: token = self.token()
1372: while token != None and token[0] == "op" and token[1] == '*':
1373: self.type = self.type + token[1]
1374: token = self.token()
1375: if token == None or token[0] != "name" :
1376: self.error("parsing function type, name expected", token);
1377: return token
1378: self.type = self.type + token[1]
1379: nametok = token
1380: token = self.token()
1381: if token != None and token[0] == "sep" and token[1] == ')':
1382: self.type = self.type + token[1]
1383: token = self.token()
1384: if token != None and token[0] == "sep" and token[1] == '(':
1385: token = self.token()
1386: type = self.type;
1387: token = self.parseSignature(token);
1388: self.type = type;
1389: else:
1390: self.error("parsing function type, '(' expected", token);
1391: return token
1392: else:
1393: self.error("parsing function type, ')' expected", token);
1394: return token
1395: self.lexer.push(token)
1396: token = nametok
1397: return token
1398:
1399: #
1400: # do some lookahead for arrays
1401: #
1402: if token != None and token[0] == "name":
1403: nametok = token
1404: token = self.token()
1405: if token != None and token[0] == "sep" and token[1] == '[':
1406: self.type = self.type + nametok[1]
1407: while token != None and token[0] == "sep" and token[1] == '[':
1408: self.type = self.type + token[1]
1409: token = self.token()
1410: while token != None and token[0] != 'sep' and \
1411: token[1] != ']' and token[1] != ';':
1412: self.type = self.type + token[1]
1413: token = self.token()
1414: if token != None and token[0] == 'sep' and token[1] == ']':
1415: self.type = self.type + token[1]
1416: token = self.token()
1417: else:
1418: self.error("parsing array type, ']' expected", token);
1419: return token
1420: elif token != None and token[0] == "sep" and token[1] == ':':
1421: # remove :12 in case it's a limited int size
1422: token = self.token()
1423: token = self.token()
1424: self.lexer.push(token)
1425: token = nametok
1426:
1427: return token
1428:
1429: #
1430: # Parse a signature: '(' has been parsed and we scan the type definition
1431: # up to the ')' included
1432: def parseSignature(self, token):
1433: signature = []
1434: if token != None and token[0] == "sep" and token[1] == ')':
1435: self.signature = []
1436: token = self.token()
1437: return token
1438: while token != None:
1439: token = self.parseType(token)
1440: if token != None and token[0] == "name":
1441: signature.append((self.type, token[1], None))
1442: token = self.token()
1443: elif token != None and token[0] == "sep" and token[1] == ',':
1444: token = self.token()
1445: continue
1446: elif token != None and token[0] == "sep" and token[1] == ')':
1447: # only the type was provided
1448: if self.type == "...":
1449: signature.append((self.type, "...", None))
1450: else:
1451: signature.append((self.type, None, None))
1452: if token != None and token[0] == "sep":
1453: if token[1] == ',':
1454: token = self.token()
1455: continue
1456: elif token[1] == ')':
1457: token = self.token()
1458: break
1459: self.signature = signature
1460: return token
1461:
1462: #
1463: # Parse a global definition, be it a type, variable or function
1464: # the extern "C" blocks are a bit nasty and require it to recurse.
1465: #
1466: def parseGlobal(self, token):
1467: static = 0
1468: if token[1] == 'extern':
1469: token = self.token()
1470: if token == None:
1471: return token
1472: if token[0] == 'string':
1473: if token[1] == 'C':
1474: token = self.token()
1475: if token == None:
1476: return token
1477: if token[0] == 'sep' and token[1] == "{":
1478: token = self.token()
1479: # print 'Entering extern "C line ', self.lineno()
1480: while token != None and (token[0] != 'sep' or
1481: token[1] != "}"):
1482: if token[0] == 'name':
1483: token = self.parseGlobal(token)
1484: else:
1485: self.error(
1486: "token %s %s unexpected at the top level" % (
1487: token[0], token[1]))
1488: token = self.parseGlobal(token)
1489: # print 'Exiting extern "C" line', self.lineno()
1490: token = self.token()
1491: return token
1492: else:
1493: return token
1494: elif token[1] == 'static':
1495: static = 1
1496: token = self.token()
1497: if token == None or token[0] != 'name':
1498: return token
1499:
1500: if token[1] == 'typedef':
1501: token = self.token()
1502: return self.parseTypedef(token)
1503: else:
1504: token = self.parseType(token)
1505: type_orig = self.type
1506: if token == None or token[0] != "name":
1507: return token
1508: type = type_orig
1509: self.name = token[1]
1510: token = self.token()
1511: while token != None and (token[0] == "sep" or token[0] == "op"):
1512: if token[0] == "sep":
1513: if token[1] == "[":
1514: type = type + token[1]
1515: token = self.token()
1516: while token != None and (token[0] != "sep" or \
1517: token[1] != ";"):
1518: type = type + token[1]
1519: token = self.token()
1520:
1521: if token != None and token[0] == "op" and token[1] == "=":
1522: #
1523: # Skip the initialization of the variable
1524: #
1525: token = self.token()
1526: if token[0] == 'sep' and token[1] == '{':
1527: token = self.token()
1528: token = self.parseBlock(token)
1529: else:
1530: self.comment = None
1531: while token != None and (token[0] != "sep" or \
1532: (token[1] != ';' and token[1] != ',')):
1533: token = self.token()
1534: self.comment = None
1535: if token == None or token[0] != "sep" or (token[1] != ';' and
1536: token[1] != ','):
1537: self.error("missing ';' or ',' after value")
1538:
1539: if token != None and token[0] == "sep":
1540: if token[1] == ";":
1541: self.comment = None
1542: token = self.token()
1543: if type == "struct":
1544: self.index_add(self.name, self.filename,
1545: not self.is_header, "struct", self.struct_fields)
1546: else:
1547: self.index_add(self.name, self.filename,
1548: not self.is_header, "variable", type)
1549: break
1550: elif token[1] == "(":
1551: token = self.token()
1552: token = self.parseSignature(token)
1553: if token == None:
1554: return None
1555: if token[0] == "sep" and token[1] == ";":
1556: d = self.mergeFunctionComment(self.name,
1557: ((type, None), self.signature), 1)
1558: self.index_add(self.name, self.filename, static,
1559: "function", d)
1560: token = self.token()
1561: elif token[0] == "sep" and token[1] == "{":
1562: d = self.mergeFunctionComment(self.name,
1563: ((type, None), self.signature), static)
1564: self.index_add(self.name, self.filename, static,
1565: "function", d)
1566: token = self.token()
1567: token = self.parseBlock(token);
1568: elif token[1] == ',':
1569: self.comment = None
1570: self.index_add(self.name, self.filename, static,
1571: "variable", type)
1572: type = type_orig
1573: token = self.token()
1574: while token != None and token[0] == "sep":
1575: type = type + token[1]
1576: token = self.token()
1577: if token != None and token[0] == "name":
1578: self.name = token[1]
1579: token = self.token()
1580: else:
1581: break
1582:
1583: return token
1584:
1585: def parse(self):
1586: self.warning("Parsing %s" % (self.filename))
1587: token = self.token()
1588: while token != None:
1589: if token[0] == 'name':
1590: token = self.parseGlobal(token)
1591: else:
1592: self.error("token %s %s unexpected at the top level" % (
1593: token[0], token[1]))
1594: token = self.parseGlobal(token)
1595: return
1596: self.parseTopComment(self.top_comment)
1597: return self.index
1598:
1599:
1600: class docBuilder:
1601: """A documentation builder"""
1602: def __init__(self, name, directories=['.'], excludes=[]):
1603: self.name = name
1604: self.directories = directories
1605: self.excludes = excludes + ignored_files.keys()
1606: self.modules = {}
1607: self.headers = {}
1608: self.idx = index()
1609: self.xref = {}
1610: self.index = {}
1611: if name == 'libxml2':
1612: self.basename = 'libxml'
1613: else:
1614: self.basename = name
1615:
1616: def indexString(self, id, str):
1617: if str == None:
1618: return
1619: str = string.replace(str, "'", ' ')
1620: str = string.replace(str, '"', ' ')
1621: str = string.replace(str, "/", ' ')
1622: str = string.replace(str, '*', ' ')
1623: str = string.replace(str, "[", ' ')
1624: str = string.replace(str, "]", ' ')
1625: str = string.replace(str, "(", ' ')
1626: str = string.replace(str, ")", ' ')
1627: str = string.replace(str, "<", ' ')
1628: str = string.replace(str, '>', ' ')
1629: str = string.replace(str, "&", ' ')
1630: str = string.replace(str, '#', ' ')
1631: str = string.replace(str, ",", ' ')
1632: str = string.replace(str, '.', ' ')
1633: str = string.replace(str, ';', ' ')
1634: tokens = string.split(str)
1635: for token in tokens:
1636: try:
1637: c = token[0]
1638: if string.find(string.letters, c) < 0:
1639: pass
1640: elif len(token) < 3:
1641: pass
1642: else:
1643: lower = string.lower(token)
1644: # TODO: generalize this a bit
1645: if lower == 'and' or lower == 'the':
1646: pass
1647: elif self.xref.has_key(token):
1648: self.xref[token].append(id)
1649: else:
1650: self.xref[token] = [id]
1651: except:
1652: pass
1653:
1654: def analyze(self):
1655: print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1656: self.idx.analyze()
1657:
1658: def scanHeaders(self):
1659: for header in self.headers.keys():
1660: parser = CParser(header)
1661: idx = parser.parse()
1662: self.headers[header] = idx;
1663: self.idx.merge(idx)
1664:
1665: def scanModules(self):
1666: for module in self.modules.keys():
1667: parser = CParser(module)
1668: idx = parser.parse()
1669: # idx.analyze()
1670: self.modules[module] = idx
1671: self.idx.merge_public(idx)
1672:
1673: def scan(self):
1674: for directory in self.directories:
1675: files = glob.glob(directory + "/*.c")
1676: for file in files:
1677: skip = 0
1678: for excl in self.excludes:
1679: if string.find(file, excl) != -1:
1680: skip = 1;
1681: break
1682: if skip == 0:
1683: self.modules[file] = None;
1684: files = glob.glob(directory + "/*.h")
1685: for file in files:
1686: skip = 0
1687: for excl in self.excludes:
1688: if string.find(file, excl) != -1:
1689: skip = 1;
1690: break
1691: if skip == 0:
1692: self.headers[file] = None;
1693: self.scanHeaders()
1694: self.scanModules()
1695:
1696: def modulename_file(self, file):
1697: module = os.path.basename(file)
1698: if module[-2:] == '.h':
1699: module = module[:-2]
1700: elif module[-2:] == '.c':
1701: module = module[:-2]
1702: return module
1703:
1704: def serialize_enum(self, output, name):
1705: id = self.idx.enums[name]
1706: output.write(" <enum name='%s' file='%s'" % (name,
1707: self.modulename_file(id.header)))
1708: if id.info != None:
1709: info = id.info
1710: if info[0] != None and info[0] != '':
1711: try:
1712: val = eval(info[0])
1713: except:
1714: val = info[0]
1715: output.write(" value='%s'" % (val));
1716: if info[2] != None and info[2] != '':
1717: output.write(" type='%s'" % info[2]);
1718: if info[1] != None and info[1] != '':
1719: output.write(" info='%s'" % escape(info[1]));
1720: output.write("/>\n")
1721:
1722: def serialize_macro(self, output, name):
1723: id = self.idx.macros[name]
1724: output.write(" <macro name='%s' file='%s'>\n" % (name,
1725: self.modulename_file(id.header)))
1726: if id.info != None:
1727: try:
1728: (args, desc) = id.info
1729: if desc != None and desc != "":
1730: output.write(" <info>%s</info>\n" % (escape(desc)))
1731: self.indexString(name, desc)
1732: for arg in args:
1733: (name, desc) = arg
1734: if desc != None and desc != "":
1735: output.write(" <arg name='%s' info='%s'/>\n" % (
1736: name, escape(desc)))
1737: self.indexString(name, desc)
1738: else:
1739: output.write(" <arg name='%s'/>\n" % (name))
1740: except:
1741: pass
1742: output.write(" </macro>\n")
1743:
1744: def serialize_typedef(self, output, name):
1745: id = self.idx.typedefs[name]
1746: if id.info[0:7] == 'struct ':
1747: output.write(" <struct name='%s' file='%s' type='%s'" % (
1748: name, self.modulename_file(id.header), id.info))
1749: name = id.info[7:]
1750: if self.idx.structs.has_key(name) and ( \
1751: type(self.idx.structs[name].info) == type(()) or
1752: type(self.idx.structs[name].info) == type([])):
1753: output.write(">\n");
1754: try:
1755: for field in self.idx.structs[name].info:
1756: desc = field[2]
1757: self.indexString(name, desc)
1758: if desc == None:
1759: desc = ''
1760: else:
1761: desc = escape(desc)
1762: output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1763: except:
1764: print "Failed to serialize struct %s" % (name)
1765: output.write(" </struct>\n")
1766: else:
1767: output.write("/>\n");
1768: else :
1769: output.write(" <typedef name='%s' file='%s' type='%s'" % (
1770: name, self.modulename_file(id.header), id.info))
1771: try:
1772: desc = id.extra
1773: if desc != None and desc != "":
1774: output.write(">\n <info>%s</info>\n" % (escape(desc)))
1775: output.write(" </typedef>\n")
1776: else:
1777: output.write("/>\n")
1778: except:
1779: output.write("/>\n")
1780:
1781: def serialize_variable(self, output, name):
1782: id = self.idx.variables[name]
1783: if id.info != None:
1784: output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
1785: name, self.modulename_file(id.header), id.info))
1786: else:
1787: output.write(" <variable name='%s' file='%s'/>\n" % (
1788: name, self.modulename_file(id.header)))
1789:
1790: def serialize_function(self, output, name):
1791: id = self.idx.functions[name]
1792: if name == debugsym:
1793: print "=>", id
1794:
1795: output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1796: name, self.modulename_file(id.header),
1797: self.modulename_file(id.module)))
1798: #
1799: # Processing of conditionals modified by Bill 1/1/05
1800: #
1801: if id.conditionals != None:
1802: apstr = ""
1803: for cond in id.conditionals:
1804: if apstr != "":
1805: apstr = apstr + " && "
1806: apstr = apstr + cond
1807: output.write(" <cond>%s</cond>\n"% (apstr));
1808: try:
1809: (ret, params, desc) = id.info
1810: if (desc == None or desc == '') and \
1811: name[0:9] != "xmlThrDef" and name != "xmlDllMain":
1812: print "%s %s from %s has no description" % (id.type, name,
1813: self.modulename_file(id.module))
1814:
1815: output.write(" <info>%s</info>\n" % (escape(desc)))
1816: self.indexString(name, desc)
1817: if ret[0] != None:
1818: if ret[0] == "void":
1819: output.write(" <return type='void'/>\n")
1820: else:
1821: output.write(" <return type='%s' info='%s'/>\n" % (
1822: ret[0], escape(ret[1])))
1823: self.indexString(name, ret[1])
1824: for param in params:
1825: if param[0] == 'void':
1826: continue
1827: if param[2] == None:
1828: output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1829: else:
1830: output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1831: self.indexString(name, param[2])
1832: except:
1833: print "Failed to save function %s info: " % name, `id.info`
1834: output.write(" </%s>\n" % (id.type))
1835:
1836: def serialize_exports(self, output, file):
1837: module = self.modulename_file(file)
1838: output.write(" <file name='%s'>\n" % (module))
1839: dict = self.headers[file]
1840: if dict.info != None:
1841: for data in ('Summary', 'Description', 'Author'):
1842: try:
1843: output.write(" <%s>%s</%s>\n" % (
1844: string.lower(data),
1845: escape(dict.info[data]),
1846: string.lower(data)))
1847: except:
1848: print "Header %s lacks a %s description" % (module, data)
1849: if dict.info.has_key('Description'):
1850: desc = dict.info['Description']
1851: if string.find(desc, "DEPRECATED") != -1:
1852: output.write(" <deprecated/>\n")
1853:
1854: ids = dict.macros.keys()
1855: ids.sort()
1856: for id in uniq(ids):
1857: # Macros are sometime used to masquerade other types.
1858: if dict.functions.has_key(id):
1859: continue
1860: if dict.variables.has_key(id):
1861: continue
1862: if dict.typedefs.has_key(id):
1863: continue
1864: if dict.structs.has_key(id):
1865: continue
1866: if dict.enums.has_key(id):
1867: continue
1868: output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1869: ids = dict.enums.keys()
1870: ids.sort()
1871: for id in uniq(ids):
1872: output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1873: ids = dict.typedefs.keys()
1874: ids.sort()
1875: for id in uniq(ids):
1876: output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1877: ids = dict.structs.keys()
1878: ids.sort()
1879: for id in uniq(ids):
1880: output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1881: ids = dict.variables.keys()
1882: ids.sort()
1883: for id in uniq(ids):
1884: output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1885: ids = dict.functions.keys()
1886: ids.sort()
1887: for id in uniq(ids):
1888: output.write(" <exports symbol='%s' type='function'/>\n" % (id))
1889: output.write(" </file>\n")
1890:
1891: def serialize_xrefs_files(self, output):
1892: headers = self.headers.keys()
1893: headers.sort()
1894: for file in headers:
1895: module = self.modulename_file(file)
1896: output.write(" <file name='%s'>\n" % (module))
1897: dict = self.headers[file]
1898: ids = uniq(dict.functions.keys() + dict.variables.keys() + \
1899: dict.macros.keys() + dict.typedefs.keys() + \
1900: dict.structs.keys() + dict.enums.keys())
1901: ids.sort()
1902: for id in ids:
1903: output.write(" <ref name='%s'/>\n" % (id))
1904: output.write(" </file>\n")
1905: pass
1906:
1907: def serialize_xrefs_functions(self, output):
1908: funcs = {}
1909: for name in self.idx.functions.keys():
1910: id = self.idx.functions[name]
1911: try:
1912: (ret, params, desc) = id.info
1913: for param in params:
1914: if param[0] == 'void':
1915: continue
1916: if funcs.has_key(param[0]):
1917: funcs[param[0]].append(name)
1918: else:
1919: funcs[param[0]] = [name]
1920: except:
1921: pass
1922: typ = funcs.keys()
1923: typ.sort()
1924: for type in typ:
1925: if type == '' or type == 'void' or type == "int" or \
1926: type == "char *" or type == "const char *" :
1927: continue
1928: output.write(" <type name='%s'>\n" % (type))
1929: ids = funcs[type]
1930: ids.sort()
1931: pid = '' # not sure why we have dups, but get rid of them!
1932: for id in ids:
1933: if id != pid:
1934: output.write(" <ref name='%s'/>\n" % (id))
1935: pid = id
1936: output.write(" </type>\n")
1937:
1938: def serialize_xrefs_constructors(self, output):
1939: funcs = {}
1940: for name in self.idx.functions.keys():
1941: id = self.idx.functions[name]
1942: try:
1943: (ret, params, desc) = id.info
1944: if ret[0] == "void":
1945: continue
1946: if funcs.has_key(ret[0]):
1947: funcs[ret[0]].append(name)
1948: else:
1949: funcs[ret[0]] = [name]
1950: except:
1951: pass
1952: typ = funcs.keys()
1953: typ.sort()
1954: for type in typ:
1955: if type == '' or type == 'void' or type == "int" or \
1956: type == "char *" or type == "const char *" :
1957: continue
1958: output.write(" <type name='%s'>\n" % (type))
1959: ids = funcs[type]
1960: ids.sort()
1961: for id in ids:
1962: output.write(" <ref name='%s'/>\n" % (id))
1963: output.write(" </type>\n")
1964:
1965: def serialize_xrefs_alpha(self, output):
1966: letter = None
1967: ids = self.idx.identifiers.keys()
1968: ids.sort()
1969: for id in ids:
1970: if id[0] != letter:
1971: if letter != None:
1972: output.write(" </letter>\n")
1973: letter = id[0]
1974: output.write(" <letter name='%s'>\n" % (letter))
1975: output.write(" <ref name='%s'/>\n" % (id))
1976: if letter != None:
1977: output.write(" </letter>\n")
1978:
1979: def serialize_xrefs_references(self, output):
1980: typ = self.idx.identifiers.keys()
1981: typ.sort()
1982: for id in typ:
1983: idf = self.idx.identifiers[id]
1984: module = idf.header
1985: output.write(" <reference name='%s' href='%s'/>\n" % (id,
1986: 'html/' + self.basename + '-' +
1987: self.modulename_file(module) + '.html#' +
1988: id))
1989:
1990: def serialize_xrefs_index(self, output):
1991: index = self.xref
1992: typ = index.keys()
1993: typ.sort()
1994: letter = None
1995: count = 0
1996: chunk = 0
1997: chunks = []
1998: for id in typ:
1999: if len(index[id]) > 30:
2000: continue
2001: if id[0] != letter:
2002: if letter == None or count > 200:
2003: if letter != None:
2004: output.write(" </letter>\n")
2005: output.write(" </chunk>\n")
2006: count = 0
2007: chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2008: output.write(" <chunk name='chunk%s'>\n" % (chunk))
2009: first_letter = id[0]
2010: chunk = chunk + 1
2011: elif letter != None:
2012: output.write(" </letter>\n")
2013: letter = id[0]
2014: output.write(" <letter name='%s'>\n" % (letter))
2015: output.write(" <word name='%s'>\n" % (id))
2016: tokens = index[id];
2017: tokens.sort()
2018: tok = None
2019: for token in tokens:
2020: if tok == token:
2021: continue
2022: tok = token
2023: output.write(" <ref name='%s'/>\n" % (token))
2024: count = count + 1
2025: output.write(" </word>\n")
2026: if letter != None:
2027: output.write(" </letter>\n")
2028: output.write(" </chunk>\n")
2029: if count != 0:
2030: chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2031: output.write(" <chunks>\n")
2032: for ch in chunks:
2033: output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
2034: ch[0], ch[1], ch[2]))
2035: output.write(" </chunks>\n")
2036:
2037: def serialize_xrefs(self, output):
2038: output.write(" <references>\n")
2039: self.serialize_xrefs_references(output)
2040: output.write(" </references>\n")
2041: output.write(" <alpha>\n")
2042: self.serialize_xrefs_alpha(output)
2043: output.write(" </alpha>\n")
2044: output.write(" <constructors>\n")
2045: self.serialize_xrefs_constructors(output)
2046: output.write(" </constructors>\n")
2047: output.write(" <functions>\n")
2048: self.serialize_xrefs_functions(output)
2049: output.write(" </functions>\n")
2050: output.write(" <files>\n")
2051: self.serialize_xrefs_files(output)
2052: output.write(" </files>\n")
2053: output.write(" <index>\n")
2054: self.serialize_xrefs_index(output)
2055: output.write(" </index>\n")
2056:
2057: def serialize(self):
2058: filename = "%s-api.xml" % self.name
2059: print "Saving XML description %s" % (filename)
2060: output = open(filename, "w")
2061: output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2062: output.write("<api name='%s'>\n" % self.name)
2063: output.write(" <files>\n")
2064: headers = self.headers.keys()
2065: headers.sort()
2066: for file in headers:
2067: self.serialize_exports(output, file)
2068: output.write(" </files>\n")
2069: output.write(" <symbols>\n")
2070: macros = self.idx.macros.keys()
2071: macros.sort()
2072: for macro in macros:
2073: self.serialize_macro(output, macro)
2074: enums = self.idx.enums.keys()
2075: enums.sort()
2076: for enum in enums:
2077: self.serialize_enum(output, enum)
2078: typedefs = self.idx.typedefs.keys()
2079: typedefs.sort()
2080: for typedef in typedefs:
2081: self.serialize_typedef(output, typedef)
2082: variables = self.idx.variables.keys()
2083: variables.sort()
2084: for variable in variables:
2085: self.serialize_variable(output, variable)
2086: functions = self.idx.functions.keys()
2087: functions.sort()
2088: for function in functions:
2089: self.serialize_function(output, function)
2090: output.write(" </symbols>\n")
2091: output.write("</api>\n")
2092: output.close()
2093:
2094: filename = "%s-refs.xml" % self.name
2095: print "Saving XML Cross References %s" % (filename)
2096: output = open(filename, "w")
2097: output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2098: output.write("<apirefs name='%s'>\n" % self.name)
2099: self.serialize_xrefs(output)
2100: output.write("</apirefs>\n")
2101: output.close()
2102:
2103:
2104: def rebuild():
2105: builder = None
2106: if glob.glob("parser.c") != [] :
2107: print "Rebuilding API description for libxml2"
2108: builder = docBuilder("libxml2", [".", "."],
2109: ["xmlwin32version.h", "tst.c"])
2110: elif glob.glob("../parser.c") != [] :
2111: print "Rebuilding API description for libxml2"
2112: builder = docBuilder("libxml2", ["..", "../include/libxml"],
2113: ["xmlwin32version.h", "tst.c"])
2114: elif glob.glob("../libxslt/transform.c") != [] :
2115: print "Rebuilding API description for libxslt"
2116: builder = docBuilder("libxslt", ["../libxslt"],
2117: ["win32config.h", "libxslt.h", "tst.c"])
2118: else:
2119: print "rebuild() failed, unable to guess the module"
2120: return None
2121: builder.scan()
2122: builder.analyze()
2123: builder.serialize()
2124: if glob.glob("../libexslt/exslt.c") != [] :
2125: extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2126: extra.scan()
2127: extra.analyze()
2128: extra.serialize()
2129: return builder
2130:
2131: #
2132: # for debugging the parser
2133: #
2134: def parse(filename):
2135: parser = CParser(filename)
2136: idx = parser.parse()
2137: return idx
2138:
2139: if __name__ == "__main__":
2140: if len(sys.argv) > 1:
2141: debug = 1
2142: parse(sys.argv[1])
2143: else:
2144: rebuild()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>