Return to apibuild.py CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / doc |
1.1 misho 1: #!/usr/bin/python -u
2: #
3: # This is the API builder, it parses the C sources and build the
4: # API formal description in XML.
5: #
6: # See Copyright for the status of this software.
7: #
8: # daniel@veillard.com
9: #
10: import os, sys
11: import string
12: import glob
13:
14: debug=0
15: #debugsym='ignorableWhitespaceSAXFunc'
16: debugsym=None
17:
18: #
19: # C parser analysis code
20: #
21: ignored_files = {
22: "trio": "too many non standard macros",
23: "trio.c": "too many non standard macros",
24: "trionan.c": "too many non standard macros",
25: "triostr.c": "too many non standard macros",
26: "acconfig.h": "generated portability layer",
27: "config.h": "generated portability layer",
28: "libxml.h": "internal only",
29: "testOOM.c": "out of memory tester",
30: "testOOMlib.h": "out of memory tester",
31: "testOOMlib.c": "out of memory tester",
32: "rngparser.c": "not yet integrated",
33: "rngparser.h": "not yet integrated",
34: "elfgcchack.h": "not a normal header",
35: "testHTML.c": "test tool",
36: "testReader.c": "test tool",
37: "testSchemas.c": "test tool",
38: "testXPath.c": "test tool",
39: "testAutomata.c": "test tool",
40: "testModule.c": "test tool",
41: "testRegexp.c": "test tool",
42: "testThreads.c": "test tool",
43: "testC14N.c": "test tool",
44: "testRelax.c": "test tool",
45: "testThreadsWin32.c": "test tool",
46: "testSAX.c": "test tool",
47: "testURI.c": "test tool",
48: "testapi.c": "generated regression tests",
49: "runtest.c": "regression tests program",
50: "runsuite.c": "regression tests program",
51: "tst.c": "not part of the library",
52: "test.c": "not part of the library",
53: "testdso.c": "test for dynamid shared libraries",
54: "testrecurse.c": "test for entities recursions",
1.1.1.2 ! misho 55: "xzlib.h": "Internal API only",
1.1 misho 56: }
57:
58: ignored_words = {
59: "WINAPI": (0, "Windows keyword"),
60: "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
61: "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
62: "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
63: "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
64: "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
65: "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
66: "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
67: "XMLCALL": (0, "Special macro for win32 calls"),
68: "XSLTCALL": (0, "Special macro for win32 calls"),
69: "XMLCDECL": (0, "Special macro for win32 calls"),
70: "EXSLTCALL": (0, "Special macro for win32 calls"),
71: "__declspec": (3, "Windows keyword"),
72: "__stdcall": (0, "Windows keyword"),
73: "ATTRIBUTE_UNUSED": (0, "macro keyword"),
74: "LIBEXSLT_PUBLIC": (0, "macro keyword"),
75: "X_IN_Y": (5, "macro function builder"),
76: "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
77: "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
78: "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
79: "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
80: }
81:
82: def escape(raw):
83: raw = string.replace(raw, '&', '&')
84: raw = string.replace(raw, '<', '<')
85: raw = string.replace(raw, '>', '>')
86: raw = string.replace(raw, "'", ''')
87: raw = string.replace(raw, '"', '"')
88: return raw
89:
90: def uniq(items):
91: d = {}
92: for item in items:
93: d[item]=1
94: return d.keys()
95:
96: class identifier:
97: def __init__(self, name, header=None, module=None, type=None, lineno = 0,
98: info=None, extra=None, conditionals = None):
99: self.name = name
100: self.header = header
101: self.module = module
102: self.type = type
103: self.info = info
104: self.extra = extra
105: self.lineno = lineno
106: self.static = 0
107: if conditionals == None or len(conditionals) == 0:
108: self.conditionals = None
109: else:
110: self.conditionals = conditionals[:]
111: if self.name == debugsym:
112: print "=> define %s : %s" % (debugsym, (module, type, info,
113: extra, conditionals))
114:
115: def __repr__(self):
116: r = "%s %s:" % (self.type, self.name)
117: if self.static:
118: r = r + " static"
119: if self.module != None:
120: r = r + " from %s" % (self.module)
121: if self.info != None:
122: r = r + " " + `self.info`
123: if self.extra != None:
124: r = r + " " + `self.extra`
125: if self.conditionals != None:
126: r = r + " " + `self.conditionals`
127: return r
128:
129:
130: def set_header(self, header):
131: self.header = header
132: def set_module(self, module):
133: self.module = module
134: def set_type(self, type):
135: self.type = type
136: def set_info(self, info):
137: self.info = info
138: def set_extra(self, extra):
139: self.extra = extra
140: def set_lineno(self, lineno):
141: self.lineno = lineno
142: def set_static(self, static):
143: self.static = static
144: def set_conditionals(self, conditionals):
145: if conditionals == None or len(conditionals) == 0:
146: self.conditionals = None
147: else:
148: self.conditionals = conditionals[:]
149:
150: def get_name(self):
151: return self.name
152: def get_header(self):
153: return self.module
154: def get_module(self):
155: return self.module
156: def get_type(self):
157: return self.type
158: def get_info(self):
159: return self.info
160: def get_lineno(self):
161: return self.lineno
162: def get_extra(self):
163: return self.extra
164: def get_static(self):
165: return self.static
166: def get_conditionals(self):
167: return self.conditionals
168:
169: def update(self, header, module, type = None, info = None, extra=None,
170: conditionals=None):
171: if self.name == debugsym:
172: print "=> update %s : %s" % (debugsym, (module, type, info,
173: extra, conditionals))
174: if header != None and self.header == None:
175: self.set_header(module)
176: if module != None and (self.module == None or self.header == self.module):
177: self.set_module(module)
178: if type != None and self.type == None:
179: self.set_type(type)
180: if info != None:
181: self.set_info(info)
182: if extra != None:
183: self.set_extra(extra)
184: if conditionals != None:
185: self.set_conditionals(conditionals)
186:
187: class index:
188: def __init__(self, name = "noname"):
189: self.name = name
190: self.identifiers = {}
191: self.functions = {}
192: self.variables = {}
193: self.includes = {}
194: self.structs = {}
195: self.enums = {}
196: self.typedefs = {}
197: self.macros = {}
198: self.references = {}
199: self.info = {}
200:
201: def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
202: if name[0:2] == '__':
203: return None
204: d = None
205: try:
206: d = self.identifiers[name]
207: d.update(header, module, type, lineno, info, extra, conditionals)
208: except:
209: d = identifier(name, header, module, type, lineno, info, extra, conditionals)
210: self.identifiers[name] = d
211:
212: if d != None and static == 1:
213: d.set_static(1)
214:
215: if d != None and name != None and type != None:
216: self.references[name] = d
217:
218: if name == debugsym:
219: print "New ref: %s" % (d)
220:
221: return d
222:
223: def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
224: if name[0:2] == '__':
225: return None
226: d = None
227: try:
228: d = self.identifiers[name]
229: d.update(header, module, type, lineno, info, extra, conditionals)
230: except:
231: d = identifier(name, header, module, type, lineno, info, extra, conditionals)
232: self.identifiers[name] = d
233:
234: if d != None and static == 1:
235: d.set_static(1)
236:
237: if d != None and name != None and type != None:
238: if type == "function":
239: self.functions[name] = d
240: elif type == "functype":
241: self.functions[name] = d
242: elif type == "variable":
243: self.variables[name] = d
244: elif type == "include":
245: self.includes[name] = d
246: elif type == "struct":
247: self.structs[name] = d
248: elif type == "enum":
249: self.enums[name] = d
250: elif type == "typedef":
251: self.typedefs[name] = d
252: elif type == "macro":
253: self.macros[name] = d
254: else:
255: print "Unable to register type ", type
256:
257: if name == debugsym:
258: print "New symbol: %s" % (d)
259:
260: return d
261:
262: def merge(self, idx):
263: for id in idx.functions.keys():
264: #
265: # macro might be used to override functions or variables
266: # definitions
267: #
268: if self.macros.has_key(id):
269: del self.macros[id]
270: if self.functions.has_key(id):
271: print "function %s from %s redeclared in %s" % (
272: id, self.functions[id].header, idx.functions[id].header)
273: else:
274: self.functions[id] = idx.functions[id]
275: self.identifiers[id] = idx.functions[id]
276: for id in idx.variables.keys():
277: #
278: # macro might be used to override functions or variables
279: # definitions
280: #
281: if self.macros.has_key(id):
282: del self.macros[id]
283: if self.variables.has_key(id):
284: print "variable %s from %s redeclared in %s" % (
285: id, self.variables[id].header, idx.variables[id].header)
286: else:
287: self.variables[id] = idx.variables[id]
288: self.identifiers[id] = idx.variables[id]
289: for id in idx.structs.keys():
290: if self.structs.has_key(id):
291: print "struct %s from %s redeclared in %s" % (
292: id, self.structs[id].header, idx.structs[id].header)
293: else:
294: self.structs[id] = idx.structs[id]
295: self.identifiers[id] = idx.structs[id]
296: for id in idx.typedefs.keys():
297: if self.typedefs.has_key(id):
298: print "typedef %s from %s redeclared in %s" % (
299: id, self.typedefs[id].header, idx.typedefs[id].header)
300: else:
301: self.typedefs[id] = idx.typedefs[id]
302: self.identifiers[id] = idx.typedefs[id]
303: for id in idx.macros.keys():
304: #
305: # macro might be used to override functions or variables
306: # definitions
307: #
308: if self.variables.has_key(id):
309: continue
310: if self.functions.has_key(id):
311: continue
312: if self.enums.has_key(id):
313: continue
314: if self.macros.has_key(id):
315: print "macro %s from %s redeclared in %s" % (
316: id, self.macros[id].header, idx.macros[id].header)
317: else:
318: self.macros[id] = idx.macros[id]
319: self.identifiers[id] = idx.macros[id]
320: for id in idx.enums.keys():
321: if self.enums.has_key(id):
322: print "enum %s from %s redeclared in %s" % (
323: id, self.enums[id].header, idx.enums[id].header)
324: else:
325: self.enums[id] = idx.enums[id]
326: self.identifiers[id] = idx.enums[id]
327:
328: def merge_public(self, idx):
329: for id in idx.functions.keys():
330: if self.functions.has_key(id):
331: # check that function condition agrees with header
332: if idx.functions[id].conditionals != \
333: self.functions[id].conditionals:
334: print "Header condition differs from Function for %s:" \
335: % id
336: print " H: %s" % self.functions[id].conditionals
337: print " C: %s" % idx.functions[id].conditionals
338: up = idx.functions[id]
339: self.functions[id].update(None, up.module, up.type, up.info, up.extra)
340: # else:
341: # print "Function %s from %s is not declared in headers" % (
342: # id, idx.functions[id].module)
343: # TODO: do the same for variables.
344:
345: def analyze_dict(self, type, dict):
346: count = 0
347: public = 0
348: for name in dict.keys():
349: id = dict[name]
350: count = count + 1
351: if id.static == 0:
352: public = public + 1
353: if count != public:
354: print " %d %s , %d public" % (count, type, public)
355: elif count != 0:
356: print " %d public %s" % (count, type)
357:
358:
359: def analyze(self):
360: self.analyze_dict("functions", self.functions)
361: self.analyze_dict("variables", self.variables)
362: self.analyze_dict("structs", self.structs)
363: self.analyze_dict("typedefs", self.typedefs)
364: self.analyze_dict("macros", self.macros)
365:
366: class CLexer:
367: """A lexer for the C language, tokenize the input by reading and
368: analyzing it line by line"""
369: def __init__(self, input):
370: self.input = input
371: self.tokens = []
372: self.line = ""
373: self.lineno = 0
374:
375: def getline(self):
376: line = ''
377: while line == '':
378: line = self.input.readline()
379: if not line:
380: return None
381: self.lineno = self.lineno + 1
382: line = string.lstrip(line)
383: line = string.rstrip(line)
384: if line == '':
385: continue
386: while line[-1] == '\\':
387: line = line[:-1]
388: n = self.input.readline()
389: self.lineno = self.lineno + 1
390: n = string.lstrip(n)
391: n = string.rstrip(n)
392: if not n:
393: break
394: else:
395: line = line + n
396: return line
397:
398: def getlineno(self):
399: return self.lineno
400:
401: def push(self, token):
402: self.tokens.insert(0, token);
403:
404: def debug(self):
405: print "Last token: ", self.last
406: print "Token queue: ", self.tokens
407: print "Line %d end: " % (self.lineno), self.line
408:
409: def token(self):
410: while self.tokens == []:
411: if self.line == "":
412: line = self.getline()
413: else:
414: line = self.line
415: self.line = ""
416: if line == None:
417: return None
418:
419: if line[0] == '#':
420: self.tokens = map((lambda x: ('preproc', x)),
421: string.split(line))
422: break;
423: l = len(line)
424: if line[0] == '"' or line[0] == "'":
425: end = line[0]
426: line = line[1:]
427: found = 0
428: tok = ""
429: while found == 0:
430: i = 0
431: l = len(line)
432: while i < l:
433: if line[i] == end:
434: self.line = line[i+1:]
435: line = line[:i]
436: l = i
437: found = 1
438: break
439: if line[i] == '\\':
440: i = i + 1
441: i = i + 1
442: tok = tok + line
443: if found == 0:
444: line = self.getline()
445: if line == None:
446: return None
447: self.last = ('string', tok)
448: return self.last
449:
450: if l >= 2 and line[0] == '/' and line[1] == '*':
451: line = line[2:]
452: found = 0
453: tok = ""
454: while found == 0:
455: i = 0
456: l = len(line)
457: while i < l:
458: if line[i] == '*' and i+1 < l and line[i+1] == '/':
459: self.line = line[i+2:]
460: line = line[:i-1]
461: l = i
462: found = 1
463: break
464: i = i + 1
465: if tok != "":
466: tok = tok + "\n"
467: tok = tok + line
468: if found == 0:
469: line = self.getline()
470: if line == None:
471: return None
472: self.last = ('comment', tok)
473: return self.last
474: if l >= 2 and line[0] == '/' and line[1] == '/':
475: line = line[2:]
476: self.last = ('comment', line)
477: return self.last
478: i = 0
479: while i < l:
480: if line[i] == '/' and i+1 < l and line[i+1] == '/':
481: self.line = line[i:]
482: line = line[:i]
483: break
484: if line[i] == '/' and i+1 < l and line[i+1] == '*':
485: self.line = line[i:]
486: line = line[:i]
487: break
488: if line[i] == '"' or line[i] == "'":
489: self.line = line[i:]
490: line = line[:i]
491: break
492: i = i + 1
493: l = len(line)
494: i = 0
495: while i < l:
496: if line[i] == ' ' or line[i] == '\t':
497: i = i + 1
498: continue
499: o = ord(line[i])
500: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
501: (o >= 48 and o <= 57):
502: s = i
503: while i < l:
504: o = ord(line[i])
505: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
506: (o >= 48 and o <= 57) or string.find(
507: " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
508: i = i + 1
509: else:
510: break
511: self.tokens.append(('name', line[s:i]))
512: continue
513: if string.find("(){}:;,[]", line[i]) != -1:
514: # if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
515: # line[i] == '}' or line[i] == ':' or line[i] == ';' or \
516: # line[i] == ',' or line[i] == '[' or line[i] == ']':
517: self.tokens.append(('sep', line[i]))
518: i = i + 1
519: continue
520: if string.find("+-*><=/%&!|.", line[i]) != -1:
521: # if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
522: # line[i] == '>' or line[i] == '<' or line[i] == '=' or \
523: # line[i] == '/' or line[i] == '%' or line[i] == '&' or \
524: # line[i] == '!' or line[i] == '|' or line[i] == '.':
525: if line[i] == '.' and i + 2 < l and \
526: line[i+1] == '.' and line[i+2] == '.':
527: self.tokens.append(('name', '...'))
528: i = i + 3
529: continue
530:
531: j = i + 1
532: if j < l and (
533: string.find("+-*><=/%&!|", line[j]) != -1):
534: # line[j] == '+' or line[j] == '-' or line[j] == '*' or \
535: # line[j] == '>' or line[j] == '<' or line[j] == '=' or \
536: # line[j] == '/' or line[j] == '%' or line[j] == '&' or \
537: # line[j] == '!' or line[j] == '|'):
538: self.tokens.append(('op', line[i:j+1]))
539: i = j + 1
540: else:
541: self.tokens.append(('op', line[i]))
542: i = i + 1
543: continue
544: s = i
545: while i < l:
546: o = ord(line[i])
547: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
548: (o >= 48 and o <= 57) or (
549: string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
550: # line[i] != ' ' and line[i] != '\t' and
551: # line[i] != '(' and line[i] != ')' and
552: # line[i] != '{' and line[i] != '}' and
553: # line[i] != ':' and line[i] != ';' and
554: # line[i] != ',' and line[i] != '+' and
555: # line[i] != '-' and line[i] != '*' and
556: # line[i] != '/' and line[i] != '%' and
557: # line[i] != '&' and line[i] != '!' and
558: # line[i] != '|' and line[i] != '[' and
559: # line[i] != ']' and line[i] != '=' and
560: # line[i] != '*' and line[i] != '>' and
561: # line[i] != '<'):
562: i = i + 1
563: else:
564: break
565: self.tokens.append(('name', line[s:i]))
566:
567: tok = self.tokens[0]
568: self.tokens = self.tokens[1:]
569: self.last = tok
570: return tok
571:
572: class CParser:
573: """The C module parser"""
574: def __init__(self, filename, idx = None):
575: self.filename = filename
576: if len(filename) > 2 and filename[-2:] == '.h':
577: self.is_header = 1
578: else:
579: self.is_header = 0
580: self.input = open(filename)
581: self.lexer = CLexer(self.input)
582: if idx == None:
583: self.index = index()
584: else:
585: self.index = idx
586: self.top_comment = ""
587: self.last_comment = ""
588: self.comment = None
589: self.collect_ref = 0
590: self.no_error = 0
591: self.conditionals = []
592: self.defines = []
593:
594: def collect_references(self):
595: self.collect_ref = 1
596:
597: def stop_error(self):
598: self.no_error = 1
599:
600: def start_error(self):
601: self.no_error = 0
602:
603: def lineno(self):
604: return self.lexer.getlineno()
605:
606: def index_add(self, name, module, static, type, info=None, extra = None):
607: if self.is_header == 1:
608: self.index.add(name, module, module, static, type, self.lineno(),
609: info, extra, self.conditionals)
610: else:
611: self.index.add(name, None, module, static, type, self.lineno(),
612: info, extra, self.conditionals)
613:
614: def index_add_ref(self, name, module, static, type, info=None,
615: extra = None):
616: if self.is_header == 1:
617: self.index.add_ref(name, module, module, static, type,
618: self.lineno(), info, extra, self.conditionals)
619: else:
620: self.index.add_ref(name, None, module, static, type, self.lineno(),
621: info, extra, self.conditionals)
622:
623: def warning(self, msg):
624: if self.no_error:
625: return
626: print msg
627:
628: def error(self, msg, token=-1):
629: if self.no_error:
630: return
631:
632: print "Parse Error: " + msg
633: if token != -1:
634: print "Got token ", token
635: self.lexer.debug()
636: sys.exit(1)
637:
638: def debug(self, msg, token=-1):
639: print "Debug: " + msg
640: if token != -1:
641: print "Got token ", token
642: self.lexer.debug()
643:
644: def parseTopComment(self, comment):
645: res = {}
646: lines = string.split(comment, "\n")
647: item = None
648: for line in lines:
649: while line != "" and (line[0] == ' ' or line[0] == '\t'):
650: line = line[1:]
651: while line != "" and line[0] == '*':
652: line = line[1:]
653: while line != "" and (line[0] == ' ' or line[0] == '\t'):
654: line = line[1:]
655: try:
656: (it, line) = string.split(line, ":", 1)
657: item = it
658: while line != "" and (line[0] == ' ' or line[0] == '\t'):
659: line = line[1:]
660: if res.has_key(item):
661: res[item] = res[item] + " " + line
662: else:
663: res[item] = line
664: except:
665: if item != None:
666: if res.has_key(item):
667: res[item] = res[item] + " " + line
668: else:
669: res[item] = line
670: self.index.info = res
671:
672: def parseComment(self, token):
673: if self.top_comment == "":
674: self.top_comment = token[1]
675: if self.comment == None or token[1][0] == '*':
676: self.comment = token[1];
677: else:
678: self.comment = self.comment + token[1]
679: token = self.lexer.token()
680:
681: if string.find(self.comment, "DOC_DISABLE") != -1:
682: self.stop_error()
683:
684: if string.find(self.comment, "DOC_ENABLE") != -1:
685: self.start_error()
686:
687: return token
688:
689: #
690: # Parse a comment block associate to a typedef
691: #
692: def parseTypeComment(self, name, quiet = 0):
693: if name[0:2] == '__':
694: quiet = 1
695:
696: args = []
697: desc = ""
698:
699: if self.comment == None:
700: if not quiet:
701: self.warning("Missing comment for type %s" % (name))
702: return((args, desc))
703: if self.comment[0] != '*':
704: if not quiet:
705: self.warning("Missing * in type comment for %s" % (name))
706: return((args, desc))
707: lines = string.split(self.comment, '\n')
708: if lines[0] == '*':
709: del lines[0]
710: if lines[0] != "* %s:" % (name):
711: if not quiet:
712: self.warning("Misformatted type comment for %s" % (name))
713: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
714: return((args, desc))
715: del lines[0]
716: while len(lines) > 0 and lines[0] == '*':
717: del lines[0]
718: desc = ""
719: while len(lines) > 0:
720: l = lines[0]
721: while len(l) > 0 and l[0] == '*':
722: l = l[1:]
723: l = string.strip(l)
724: desc = desc + " " + l
725: del lines[0]
726:
727: desc = string.strip(desc)
728:
729: if quiet == 0:
730: if desc == "":
731: self.warning("Type comment for %s lack description of the macro" % (name))
732:
733: return(desc)
734: #
735: # Parse a comment block associate to a macro
736: #
737: def parseMacroComment(self, name, quiet = 0):
738: if name[0:2] == '__':
739: quiet = 1
740:
741: args = []
742: desc = ""
743:
744: if self.comment == None:
745: if not quiet:
746: self.warning("Missing comment for macro %s" % (name))
747: return((args, desc))
748: if self.comment[0] != '*':
749: if not quiet:
750: self.warning("Missing * in macro comment for %s" % (name))
751: return((args, desc))
752: lines = string.split(self.comment, '\n')
753: if lines[0] == '*':
754: del lines[0]
755: if lines[0] != "* %s:" % (name):
756: if not quiet:
757: self.warning("Misformatted macro comment for %s" % (name))
758: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
759: return((args, desc))
760: del lines[0]
761: while lines[0] == '*':
762: del lines[0]
763: while len(lines) > 0 and lines[0][0:3] == '* @':
764: l = lines[0][3:]
765: try:
766: (arg, desc) = string.split(l, ':', 1)
767: desc=string.strip(desc)
768: arg=string.strip(arg)
769: except:
770: if not quiet:
771: self.warning("Misformatted macro comment for %s" % (name))
772: self.warning(" problem with '%s'" % (lines[0]))
773: del lines[0]
774: continue
775: del lines[0]
776: l = string.strip(lines[0])
777: while len(l) > 2 and l[0:3] != '* @':
778: while l[0] == '*':
779: l = l[1:]
780: desc = desc + ' ' + string.strip(l)
781: del lines[0]
782: if len(lines) == 0:
783: break
784: l = lines[0]
785: args.append((arg, desc))
786: while len(lines) > 0 and lines[0] == '*':
787: del lines[0]
788: desc = ""
789: while len(lines) > 0:
790: l = lines[0]
791: while len(l) > 0 and l[0] == '*':
792: l = l[1:]
793: l = string.strip(l)
794: desc = desc + " " + l
795: del lines[0]
796:
797: desc = string.strip(desc)
798:
799: if quiet == 0:
800: if desc == "":
801: self.warning("Macro comment for %s lack description of the macro" % (name))
802:
803: return((args, desc))
804:
805: #
806: # Parse a comment block and merge the informations found in the
807: # parameters descriptions, finally returns a block as complete
808: # as possible
809: #
810: def mergeFunctionComment(self, name, description, quiet = 0):
811: if name == 'main':
812: quiet = 1
813: if name[0:2] == '__':
814: quiet = 1
815:
816: (ret, args) = description
817: desc = ""
818: retdesc = ""
819:
820: if self.comment == None:
821: if not quiet:
822: self.warning("Missing comment for function %s" % (name))
823: return(((ret[0], retdesc), args, desc))
824: if self.comment[0] != '*':
825: if not quiet:
826: self.warning("Missing * in function comment for %s" % (name))
827: return(((ret[0], retdesc), args, desc))
828: lines = string.split(self.comment, '\n')
829: if lines[0] == '*':
830: del lines[0]
831: if lines[0] != "* %s:" % (name):
832: if not quiet:
833: self.warning("Misformatted function comment for %s" % (name))
834: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
835: return(((ret[0], retdesc), args, desc))
836: del lines[0]
837: while lines[0] == '*':
838: del lines[0]
839: nbargs = len(args)
840: while len(lines) > 0 and lines[0][0:3] == '* @':
841: l = lines[0][3:]
842: try:
843: (arg, desc) = string.split(l, ':', 1)
844: desc=string.strip(desc)
845: arg=string.strip(arg)
846: except:
847: if not quiet:
848: self.warning("Misformatted function comment for %s" % (name))
849: self.warning(" problem with '%s'" % (lines[0]))
850: del lines[0]
851: continue
852: del lines[0]
853: l = string.strip(lines[0])
854: while len(l) > 2 and l[0:3] != '* @':
855: while l[0] == '*':
856: l = l[1:]
857: desc = desc + ' ' + string.strip(l)
858: del lines[0]
859: if len(lines) == 0:
860: break
861: l = lines[0]
862: i = 0
863: while i < nbargs:
864: if args[i][1] == arg:
865: args[i] = (args[i][0], arg, desc)
866: break;
867: i = i + 1
868: if i >= nbargs:
869: if not quiet:
870: self.warning("Unable to find arg %s from function comment for %s" % (
871: arg, name))
872: while len(lines) > 0 and lines[0] == '*':
873: del lines[0]
874: desc = ""
875: while len(lines) > 0:
876: l = lines[0]
877: while len(l) > 0 and l[0] == '*':
878: l = l[1:]
879: l = string.strip(l)
880: if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
881: try:
882: l = string.split(l, ' ', 1)[1]
883: except:
884: l = ""
885: retdesc = string.strip(l)
886: del lines[0]
887: while len(lines) > 0:
888: l = lines[0]
889: while len(l) > 0 and l[0] == '*':
890: l = l[1:]
891: l = string.strip(l)
892: retdesc = retdesc + " " + l
893: del lines[0]
894: else:
895: desc = desc + " " + l
896: del lines[0]
897:
898: retdesc = string.strip(retdesc)
899: desc = string.strip(desc)
900:
901: if quiet == 0:
902: #
903: # report missing comments
904: #
905: i = 0
906: while i < nbargs:
907: if args[i][2] == None and args[i][0] != "void" and \
908: ((args[i][1] != None) or (args[i][1] == '')):
909: self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
910: i = i + 1
911: if retdesc == "" and ret[0] != "void":
912: self.warning("Function comment for %s lacks description of return value" % (name))
913: if desc == "":
914: self.warning("Function comment for %s lacks description of the function" % (name))
915:
916: return(((ret[0], retdesc), args, desc))
917:
918: def parsePreproc(self, token):
919: if debug:
920: print "=> preproc ", token, self.lexer.tokens
921: name = token[1]
922: if name == "#include":
923: token = self.lexer.token()
924: if token == None:
925: return None
926: if token[0] == 'preproc':
927: self.index_add(token[1], self.filename, not self.is_header,
928: "include")
929: return self.lexer.token()
930: return token
931: if name == "#define":
932: token = self.lexer.token()
933: if token == None:
934: return None
935: if token[0] == 'preproc':
936: # TODO macros with arguments
937: name = token[1]
938: lst = []
939: token = self.lexer.token()
940: while token != None and token[0] == 'preproc' and \
941: token[1][0] != '#':
942: lst.append(token[1])
943: token = self.lexer.token()
944: try:
945: name = string.split(name, '(') [0]
946: except:
947: pass
948: info = self.parseMacroComment(name, not self.is_header)
949: self.index_add(name, self.filename, not self.is_header,
950: "macro", info)
951: return token
952:
953: #
954: # Processing of conditionals modified by Bill 1/1/05
955: #
956: # We process conditionals (i.e. tokens from #ifdef, #ifndef,
957: # #if, #else and #endif) for headers and mainline code,
958: # store the ones from the header in libxml2-api.xml, and later
959: # (in the routine merge_public) verify that the two (header and
960: # mainline code) agree.
961: #
962: # There is a small problem with processing the headers. Some of
963: # the variables are not concerned with enabling / disabling of
964: # library functions (e.g. '__XML_PARSER_H__'), and we don't want
965: # them to be included in libxml2-api.xml, or involved in
966: # the check between the header and the mainline code. To
967: # accomplish this, we ignore any conditional which doesn't include
968: # the string 'ENABLED'
969: #
970: if name == "#ifdef":
971: apstr = self.lexer.tokens[0][1]
972: try:
973: self.defines.append(apstr)
974: if string.find(apstr, 'ENABLED') != -1:
975: self.conditionals.append("defined(%s)" % apstr)
976: except:
977: pass
978: elif name == "#ifndef":
979: apstr = self.lexer.tokens[0][1]
980: try:
981: self.defines.append(apstr)
982: if string.find(apstr, 'ENABLED') != -1:
983: self.conditionals.append("!defined(%s)" % apstr)
984: except:
985: pass
986: elif name == "#if":
987: apstr = ""
988: for tok in self.lexer.tokens:
989: if apstr != "":
990: apstr = apstr + " "
991: apstr = apstr + tok[1]
992: try:
993: self.defines.append(apstr)
994: if string.find(apstr, 'ENABLED') != -1:
995: self.conditionals.append(apstr)
996: except:
997: pass
998: elif name == "#else":
999: if self.conditionals != [] and \
1000: string.find(self.defines[-1], 'ENABLED') != -1:
1001: self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
1002: elif name == "#endif":
1003: if self.conditionals != [] and \
1004: string.find(self.defines[-1], 'ENABLED') != -1:
1005: self.conditionals = self.conditionals[:-1]
1006: self.defines = self.defines[:-1]
1007: token = self.lexer.token()
1008: while token != None and token[0] == 'preproc' and \
1009: token[1][0] != '#':
1010: token = self.lexer.token()
1011: return token
1012:
1013: #
1014: # token acquisition on top of the lexer, it handle internally
1015: # preprocessor and comments since they are logically not part of
1016: # the program structure.
1017: #
1018: def token(self):
1019: global ignored_words
1020:
1021: token = self.lexer.token()
1022: while token != None:
1023: if token[0] == 'comment':
1024: token = self.parseComment(token)
1025: continue
1026: elif token[0] == 'preproc':
1027: token = self.parsePreproc(token)
1028: continue
1029: elif token[0] == "name" and token[1] == "__const":
1030: token = ("name", "const")
1031: return token
1032: elif token[0] == "name" and token[1] == "__attribute":
1033: token = self.lexer.token()
1034: while token != None and token[1] != ";":
1035: token = self.lexer.token()
1036: return token
1037: elif token[0] == "name" and ignored_words.has_key(token[1]):
1038: (n, info) = ignored_words[token[1]]
1039: i = 0
1040: while i < n:
1041: token = self.lexer.token()
1042: i = i + 1
1043: token = self.lexer.token()
1044: continue
1045: else:
1046: if debug:
1047: print "=> ", token
1048: return token
1049: return None
1050:
1051: #
1052: # Parse a typedef, it records the type and its name.
1053: #
1054: def parseTypedef(self, token):
1055: if token == None:
1056: return None
1057: token = self.parseType(token)
1058: if token == None:
1059: self.error("parsing typedef")
1060: return None
1061: base_type = self.type
1062: type = base_type
1063: #self.debug("end typedef type", token)
1064: while token != None:
1065: if token[0] == "name":
1066: name = token[1]
1067: signature = self.signature
1068: if signature != None:
1069: type = string.split(type, '(')[0]
1070: d = self.mergeFunctionComment(name,
1071: ((type, None), signature), 1)
1072: self.index_add(name, self.filename, not self.is_header,
1073: "functype", d)
1074: else:
1075: if base_type == "struct":
1076: self.index_add(name, self.filename, not self.is_header,
1077: "struct", type)
1078: base_type = "struct " + name
1079: else:
1080: # TODO report missing or misformatted comments
1081: info = self.parseTypeComment(name, 1)
1082: self.index_add(name, self.filename, not self.is_header,
1083: "typedef", type, info)
1084: token = self.token()
1085: else:
1086: self.error("parsing typedef: expecting a name")
1087: return token
1088: #self.debug("end typedef", token)
1089: if token != None and token[0] == 'sep' and token[1] == ',':
1090: type = base_type
1091: token = self.token()
1092: while token != None and token[0] == "op":
1093: type = type + token[1]
1094: token = self.token()
1095: elif token != None and token[0] == 'sep' and token[1] == ';':
1096: break;
1097: elif token != None and token[0] == 'name':
1098: type = base_type
1099: continue;
1100: else:
1101: self.error("parsing typedef: expecting ';'", token)
1102: return token
1103: token = self.token()
1104: return token
1105:
1106: #
1107: # Parse a C code block, used for functions it parse till
1108: # the balancing } included
1109: #
1110: def parseBlock(self, token):
1111: while token != None:
1112: if token[0] == "sep" and token[1] == "{":
1113: token = self.token()
1114: token = self.parseBlock(token)
1115: elif token[0] == "sep" and token[1] == "}":
1116: self.comment = None
1117: token = self.token()
1118: return token
1119: else:
1120: if self.collect_ref == 1:
1121: oldtok = token
1122: token = self.token()
1123: if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1124: if token[0] == "sep" and token[1] == "(":
1125: self.index_add_ref(oldtok[1], self.filename,
1126: 0, "function")
1127: token = self.token()
1128: elif token[0] == "name":
1129: token = self.token()
1130: if token[0] == "sep" and (token[1] == ";" or
1131: token[1] == "," or token[1] == "="):
1132: self.index_add_ref(oldtok[1], self.filename,
1133: 0, "type")
1134: elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1135: self.index_add_ref(oldtok[1], self.filename,
1136: 0, "typedef")
1137: elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1138: self.index_add_ref(oldtok[1], self.filename,
1139: 0, "typedef")
1140:
1141: else:
1142: token = self.token()
1143: return token
1144:
1145: #
1146: # Parse a C struct definition till the balancing }
1147: #
1148: def parseStruct(self, token):
1149: fields = []
1150: #self.debug("start parseStruct", token)
1151: while token != None:
1152: if token[0] == "sep" and token[1] == "{":
1153: token = self.token()
1154: token = self.parseTypeBlock(token)
1155: elif token[0] == "sep" and token[1] == "}":
1156: self.struct_fields = fields
1157: #self.debug("end parseStruct", token)
1158: #print fields
1159: token = self.token()
1160: return token
1161: else:
1162: base_type = self.type
1163: #self.debug("before parseType", token)
1164: token = self.parseType(token)
1165: #self.debug("after parseType", token)
1166: if token != None and token[0] == "name":
1167: fname = token[1]
1168: token = self.token()
1169: if token[0] == "sep" and token[1] == ";":
1170: self.comment = None
1171: token = self.token()
1172: fields.append((self.type, fname, self.comment))
1173: self.comment = None
1174: else:
1175: self.error("parseStruct: expecting ;", token)
1176: elif token != None and token[0] == "sep" and token[1] == "{":
1177: token = self.token()
1178: token = self.parseTypeBlock(token)
1179: if token != None and token[0] == "name":
1180: token = self.token()
1181: if token != None and token[0] == "sep" and token[1] == ";":
1182: token = self.token()
1183: else:
1184: self.error("parseStruct: expecting ;", token)
1185: else:
1186: self.error("parseStruct: name", token)
1187: token = self.token()
1188: self.type = base_type;
1189: self.struct_fields = fields
1190: #self.debug("end parseStruct", token)
1191: #print fields
1192: return token
1193:
1194: #
1195: # Parse a C enum block, parse till the balancing }
1196: #
1197: def parseEnumBlock(self, token):
1198: self.enums = []
1199: name = None
1200: self.comment = None
1201: comment = ""
1202: value = "0"
1203: while token != None:
1204: if token[0] == "sep" and token[1] == "{":
1205: token = self.token()
1206: token = self.parseTypeBlock(token)
1207: elif token[0] == "sep" and token[1] == "}":
1208: if name != None:
1209: if self.comment != None:
1210: comment = self.comment
1211: self.comment = None
1212: self.enums.append((name, value, comment))
1213: token = self.token()
1214: return token
1215: elif token[0] == "name":
1216: if name != None:
1217: if self.comment != None:
1218: comment = string.strip(self.comment)
1219: self.comment = None
1220: self.enums.append((name, value, comment))
1221: name = token[1]
1222: comment = ""
1223: token = self.token()
1224: if token[0] == "op" and token[1][0] == "=":
1225: value = ""
1226: if len(token[1]) > 1:
1227: value = token[1][1:]
1228: token = self.token()
1229: while token[0] != "sep" or (token[1] != ',' and
1230: token[1] != '}'):
1231: value = value + token[1]
1232: token = self.token()
1233: else:
1234: try:
1235: value = "%d" % (int(value) + 1)
1236: except:
1237: self.warning("Failed to compute value of enum %s" % (name))
1238: value=""
1239: if token[0] == "sep" and token[1] == ",":
1240: token = self.token()
1241: else:
1242: token = self.token()
1243: return token
1244:
1245: #
1246: # Parse a C definition block, used for structs it parse till
1247: # the balancing }
1248: #
1249: def parseTypeBlock(self, token):
1250: while token != None:
1251: if token[0] == "sep" and token[1] == "{":
1252: token = self.token()
1253: token = self.parseTypeBlock(token)
1254: elif token[0] == "sep" and token[1] == "}":
1255: token = self.token()
1256: return token
1257: else:
1258: token = self.token()
1259: return token
1260:
1261: #
1262: # Parse a type: the fact that the type name can either occur after
1263: # the definition or within the definition makes it a little harder
1264: # if inside, the name token is pushed back before returning
1265: #
1266: def parseType(self, token):
1267: self.type = ""
1268: self.struct_fields = []
1269: self.signature = None
1270: if token == None:
1271: return token
1272:
1273: while token[0] == "name" and (
1274: token[1] == "const" or \
1275: token[1] == "unsigned" or \
1276: token[1] == "signed"):
1277: if self.type == "":
1278: self.type = token[1]
1279: else:
1280: self.type = self.type + " " + token[1]
1281: token = self.token()
1282:
1283: if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1284: if self.type == "":
1285: self.type = token[1]
1286: else:
1287: self.type = self.type + " " + token[1]
1288: if token[0] == "name" and token[1] == "int":
1289: if self.type == "":
1290: self.type = tmp[1]
1291: else:
1292: self.type = self.type + " " + tmp[1]
1293:
1294: elif token[0] == "name" and token[1] == "struct":
1295: if self.type == "":
1296: self.type = token[1]
1297: else:
1298: self.type = self.type + " " + token[1]
1299: token = self.token()
1300: nametok = None
1301: if token[0] == "name":
1302: nametok = token
1303: token = self.token()
1304: if token != None and token[0] == "sep" and token[1] == "{":
1305: token = self.token()
1306: token = self.parseStruct(token)
1307: elif token != None and token[0] == "op" and token[1] == "*":
1308: self.type = self.type + " " + nametok[1] + " *"
1309: token = self.token()
1310: while token != None and token[0] == "op" and token[1] == "*":
1311: self.type = self.type + " *"
1312: token = self.token()
1313: if token[0] == "name":
1314: nametok = token
1315: token = self.token()
1316: else:
1317: self.error("struct : expecting name", token)
1318: return token
1319: elif token != None and token[0] == "name" and nametok != None:
1320: self.type = self.type + " " + nametok[1]
1321: return token
1322:
1323: if nametok != None:
1324: self.lexer.push(token)
1325: token = nametok
1326: return token
1327:
1328: elif token[0] == "name" and token[1] == "enum":
1329: if self.type == "":
1330: self.type = token[1]
1331: else:
1332: self.type = self.type + " " + token[1]
1333: self.enums = []
1334: token = self.token()
1335: if token != None and token[0] == "sep" and token[1] == "{":
1336: token = self.token()
1337: token = self.parseEnumBlock(token)
1338: else:
1339: self.error("parsing enum: expecting '{'", token)
1340: enum_type = None
1341: if token != None and token[0] != "name":
1342: self.lexer.push(token)
1343: token = ("name", "enum")
1344: else:
1345: enum_type = token[1]
1346: for enum in self.enums:
1347: self.index_add(enum[0], self.filename,
1348: not self.is_header, "enum",
1349: (enum[1], enum[2], enum_type))
1350: return token
1351:
1352: elif token[0] == "name":
1353: if self.type == "":
1354: self.type = token[1]
1355: else:
1356: self.type = self.type + " " + token[1]
1357: else:
1358: self.error("parsing type %s: expecting a name" % (self.type),
1359: token)
1360: return token
1361: token = self.token()
1362: while token != None and (token[0] == "op" or
1363: token[0] == "name" and token[1] == "const"):
1364: self.type = self.type + " " + token[1]
1365: token = self.token()
1366:
1367: #
1368: # if there is a parenthesis here, this means a function type
1369: #
1370: if token != None and token[0] == "sep" and token[1] == '(':
1371: self.type = self.type + token[1]
1372: token = self.token()
1373: while token != None and token[0] == "op" and token[1] == '*':
1374: self.type = self.type + token[1]
1375: token = self.token()
1376: if token == None or token[0] != "name" :
1377: self.error("parsing function type, name expected", token);
1378: return token
1379: self.type = self.type + token[1]
1380: nametok = token
1381: token = self.token()
1382: if token != None and token[0] == "sep" and token[1] == ')':
1383: self.type = self.type + token[1]
1384: token = self.token()
1385: if token != None and token[0] == "sep" and token[1] == '(':
1386: token = self.token()
1387: type = self.type;
1388: token = self.parseSignature(token);
1389: self.type = type;
1390: else:
1391: self.error("parsing function type, '(' expected", token);
1392: return token
1393: else:
1394: self.error("parsing function type, ')' expected", token);
1395: return token
1396: self.lexer.push(token)
1397: token = nametok
1398: return token
1399:
1400: #
1401: # do some lookahead for arrays
1402: #
1403: if token != None and token[0] == "name":
1404: nametok = token
1405: token = self.token()
1406: if token != None and token[0] == "sep" and token[1] == '[':
1407: self.type = self.type + nametok[1]
1408: while token != None and token[0] == "sep" and token[1] == '[':
1409: self.type = self.type + token[1]
1410: token = self.token()
1411: while token != None and token[0] != 'sep' and \
1412: token[1] != ']' and token[1] != ';':
1413: self.type = self.type + token[1]
1414: token = self.token()
1415: if token != None and token[0] == 'sep' and token[1] == ']':
1416: self.type = self.type + token[1]
1417: token = self.token()
1418: else:
1419: self.error("parsing array type, ']' expected", token);
1420: return token
1421: elif token != None and token[0] == "sep" and token[1] == ':':
1422: # remove :12 in case it's a limited int size
1423: token = self.token()
1424: token = self.token()
1425: self.lexer.push(token)
1426: token = nametok
1427:
1428: return token
1429:
1430: #
1431: # Parse a signature: '(' has been parsed and we scan the type definition
1432: # up to the ')' included
1433: def parseSignature(self, token):
1434: signature = []
1435: if token != None and token[0] == "sep" and token[1] == ')':
1436: self.signature = []
1437: token = self.token()
1438: return token
1439: while token != None:
1440: token = self.parseType(token)
1441: if token != None and token[0] == "name":
1442: signature.append((self.type, token[1], None))
1443: token = self.token()
1444: elif token != None and token[0] == "sep" and token[1] == ',':
1445: token = self.token()
1446: continue
1447: elif token != None and token[0] == "sep" and token[1] == ')':
1448: # only the type was provided
1449: if self.type == "...":
1450: signature.append((self.type, "...", None))
1451: else:
1452: signature.append((self.type, None, None))
1453: if token != None and token[0] == "sep":
1454: if token[1] == ',':
1455: token = self.token()
1456: continue
1457: elif token[1] == ')':
1458: token = self.token()
1459: break
1460: self.signature = signature
1461: return token
1462:
1463: #
1464: # Parse a global definition, be it a type, variable or function
1465: # the extern "C" blocks are a bit nasty and require it to recurse.
1466: #
1467: def parseGlobal(self, token):
1468: static = 0
1469: if token[1] == 'extern':
1470: token = self.token()
1471: if token == None:
1472: return token
1473: if token[0] == 'string':
1474: if token[1] == 'C':
1475: token = self.token()
1476: if token == None:
1477: return token
1478: if token[0] == 'sep' and token[1] == "{":
1479: token = self.token()
1480: # print 'Entering extern "C line ', self.lineno()
1481: while token != None and (token[0] != 'sep' or
1482: token[1] != "}"):
1483: if token[0] == 'name':
1484: token = self.parseGlobal(token)
1485: else:
1486: self.error(
1487: "token %s %s unexpected at the top level" % (
1488: token[0], token[1]))
1489: token = self.parseGlobal(token)
1490: # print 'Exiting extern "C" line', self.lineno()
1491: token = self.token()
1492: return token
1493: else:
1494: return token
1495: elif token[1] == 'static':
1496: static = 1
1497: token = self.token()
1498: if token == None or token[0] != 'name':
1499: return token
1500:
1501: if token[1] == 'typedef':
1502: token = self.token()
1503: return self.parseTypedef(token)
1504: else:
1505: token = self.parseType(token)
1506: type_orig = self.type
1507: if token == None or token[0] != "name":
1508: return token
1509: type = type_orig
1510: self.name = token[1]
1511: token = self.token()
1512: while token != None and (token[0] == "sep" or token[0] == "op"):
1513: if token[0] == "sep":
1514: if token[1] == "[":
1515: type = type + token[1]
1516: token = self.token()
1517: while token != None and (token[0] != "sep" or \
1518: token[1] != ";"):
1519: type = type + token[1]
1520: token = self.token()
1521:
1522: if token != None and token[0] == "op" and token[1] == "=":
1523: #
1524: # Skip the initialization of the variable
1525: #
1526: token = self.token()
1527: if token[0] == 'sep' and token[1] == '{':
1528: token = self.token()
1529: token = self.parseBlock(token)
1530: else:
1531: self.comment = None
1532: while token != None and (token[0] != "sep" or \
1533: (token[1] != ';' and token[1] != ',')):
1534: token = self.token()
1535: self.comment = None
1536: if token == None or token[0] != "sep" or (token[1] != ';' and
1537: token[1] != ','):
1538: self.error("missing ';' or ',' after value")
1539:
1540: if token != None and token[0] == "sep":
1541: if token[1] == ";":
1542: self.comment = None
1543: token = self.token()
1544: if type == "struct":
1545: self.index_add(self.name, self.filename,
1546: not self.is_header, "struct", self.struct_fields)
1547: else:
1548: self.index_add(self.name, self.filename,
1549: not self.is_header, "variable", type)
1550: break
1551: elif token[1] == "(":
1552: token = self.token()
1553: token = self.parseSignature(token)
1554: if token == None:
1555: return None
1556: if token[0] == "sep" and token[1] == ";":
1557: d = self.mergeFunctionComment(self.name,
1558: ((type, None), self.signature), 1)
1559: self.index_add(self.name, self.filename, static,
1560: "function", d)
1561: token = self.token()
1562: elif token[0] == "sep" and token[1] == "{":
1563: d = self.mergeFunctionComment(self.name,
1564: ((type, None), self.signature), static)
1565: self.index_add(self.name, self.filename, static,
1566: "function", d)
1567: token = self.token()
1568: token = self.parseBlock(token);
1569: elif token[1] == ',':
1570: self.comment = None
1571: self.index_add(self.name, self.filename, static,
1572: "variable", type)
1573: type = type_orig
1574: token = self.token()
1575: while token != None and token[0] == "sep":
1576: type = type + token[1]
1577: token = self.token()
1578: if token != None and token[0] == "name":
1579: self.name = token[1]
1580: token = self.token()
1581: else:
1582: break
1583:
1584: return token
1585:
1586: def parse(self):
1587: self.warning("Parsing %s" % (self.filename))
1588: token = self.token()
1589: while token != None:
1590: if token[0] == 'name':
1591: token = self.parseGlobal(token)
1592: else:
1593: self.error("token %s %s unexpected at the top level" % (
1594: token[0], token[1]))
1595: token = self.parseGlobal(token)
1596: return
1597: self.parseTopComment(self.top_comment)
1598: return self.index
1599:
1600:
1601: class docBuilder:
1602: """A documentation builder"""
1603: def __init__(self, name, directories=['.'], excludes=[]):
1604: self.name = name
1605: self.directories = directories
1606: self.excludes = excludes + ignored_files.keys()
1607: self.modules = {}
1608: self.headers = {}
1609: self.idx = index()
1610: self.xref = {}
1611: self.index = {}
1612: if name == 'libxml2':
1613: self.basename = 'libxml'
1614: else:
1615: self.basename = name
1616:
1617: def indexString(self, id, str):
1618: if str == None:
1619: return
1620: str = string.replace(str, "'", ' ')
1621: str = string.replace(str, '"', ' ')
1622: str = string.replace(str, "/", ' ')
1623: str = string.replace(str, '*', ' ')
1624: str = string.replace(str, "[", ' ')
1625: str = string.replace(str, "]", ' ')
1626: str = string.replace(str, "(", ' ')
1627: str = string.replace(str, ")", ' ')
1628: str = string.replace(str, "<", ' ')
1629: str = string.replace(str, '>', ' ')
1630: str = string.replace(str, "&", ' ')
1631: str = string.replace(str, '#', ' ')
1632: str = string.replace(str, ",", ' ')
1633: str = string.replace(str, '.', ' ')
1634: str = string.replace(str, ';', ' ')
1635: tokens = string.split(str)
1636: for token in tokens:
1637: try:
1638: c = token[0]
1639: if string.find(string.letters, c) < 0:
1640: pass
1641: elif len(token) < 3:
1642: pass
1643: else:
1644: lower = string.lower(token)
1645: # TODO: generalize this a bit
1646: if lower == 'and' or lower == 'the':
1647: pass
1648: elif self.xref.has_key(token):
1649: self.xref[token].append(id)
1650: else:
1651: self.xref[token] = [id]
1652: except:
1653: pass
1654:
1655: def analyze(self):
1656: print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1657: self.idx.analyze()
1658:
1659: def scanHeaders(self):
1660: for header in self.headers.keys():
1661: parser = CParser(header)
1662: idx = parser.parse()
1663: self.headers[header] = idx;
1664: self.idx.merge(idx)
1665:
1666: def scanModules(self):
1667: for module in self.modules.keys():
1668: parser = CParser(module)
1669: idx = parser.parse()
1670: # idx.analyze()
1671: self.modules[module] = idx
1672: self.idx.merge_public(idx)
1673:
1674: def scan(self):
1675: for directory in self.directories:
1676: files = glob.glob(directory + "/*.c")
1677: for file in files:
1678: skip = 0
1679: for excl in self.excludes:
1680: if string.find(file, excl) != -1:
1681: skip = 1;
1682: break
1683: if skip == 0:
1684: self.modules[file] = None;
1685: files = glob.glob(directory + "/*.h")
1686: for file in files:
1687: skip = 0
1688: for excl in self.excludes:
1689: if string.find(file, excl) != -1:
1690: skip = 1;
1691: break
1692: if skip == 0:
1693: self.headers[file] = None;
1694: self.scanHeaders()
1695: self.scanModules()
1696:
1697: def modulename_file(self, file):
1698: module = os.path.basename(file)
1699: if module[-2:] == '.h':
1700: module = module[:-2]
1701: elif module[-2:] == '.c':
1702: module = module[:-2]
1703: return module
1704:
1705: def serialize_enum(self, output, name):
1706: id = self.idx.enums[name]
1707: output.write(" <enum name='%s' file='%s'" % (name,
1708: self.modulename_file(id.header)))
1709: if id.info != None:
1710: info = id.info
1711: if info[0] != None and info[0] != '':
1712: try:
1713: val = eval(info[0])
1714: except:
1715: val = info[0]
1716: output.write(" value='%s'" % (val));
1717: if info[2] != None and info[2] != '':
1718: output.write(" type='%s'" % info[2]);
1719: if info[1] != None and info[1] != '':
1720: output.write(" info='%s'" % escape(info[1]));
1721: output.write("/>\n")
1722:
1723: def serialize_macro(self, output, name):
1724: id = self.idx.macros[name]
1725: output.write(" <macro name='%s' file='%s'>\n" % (name,
1726: self.modulename_file(id.header)))
1727: if id.info != None:
1728: try:
1729: (args, desc) = id.info
1730: if desc != None and desc != "":
1731: output.write(" <info>%s</info>\n" % (escape(desc)))
1732: self.indexString(name, desc)
1733: for arg in args:
1734: (name, desc) = arg
1735: if desc != None and desc != "":
1736: output.write(" <arg name='%s' info='%s'/>\n" % (
1737: name, escape(desc)))
1738: self.indexString(name, desc)
1739: else:
1740: output.write(" <arg name='%s'/>\n" % (name))
1741: except:
1742: pass
1743: output.write(" </macro>\n")
1744:
1745: def serialize_typedef(self, output, name):
1746: id = self.idx.typedefs[name]
1747: if id.info[0:7] == 'struct ':
1748: output.write(" <struct name='%s' file='%s' type='%s'" % (
1749: name, self.modulename_file(id.header), id.info))
1750: name = id.info[7:]
1751: if self.idx.structs.has_key(name) and ( \
1752: type(self.idx.structs[name].info) == type(()) or
1753: type(self.idx.structs[name].info) == type([])):
1754: output.write(">\n");
1755: try:
1756: for field in self.idx.structs[name].info:
1757: desc = field[2]
1758: self.indexString(name, desc)
1759: if desc == None:
1760: desc = ''
1761: else:
1762: desc = escape(desc)
1763: output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1764: except:
1765: print "Failed to serialize struct %s" % (name)
1766: output.write(" </struct>\n")
1767: else:
1768: output.write("/>\n");
1769: else :
1770: output.write(" <typedef name='%s' file='%s' type='%s'" % (
1771: name, self.modulename_file(id.header), id.info))
1772: try:
1773: desc = id.extra
1774: if desc != None and desc != "":
1775: output.write(">\n <info>%s</info>\n" % (escape(desc)))
1776: output.write(" </typedef>\n")
1777: else:
1778: output.write("/>\n")
1779: except:
1780: output.write("/>\n")
1781:
1782: def serialize_variable(self, output, name):
1783: id = self.idx.variables[name]
1784: if id.info != None:
1785: output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
1786: name, self.modulename_file(id.header), id.info))
1787: else:
1788: output.write(" <variable name='%s' file='%s'/>\n" % (
1789: name, self.modulename_file(id.header)))
1790:
1791: def serialize_function(self, output, name):
1792: id = self.idx.functions[name]
1793: if name == debugsym:
1794: print "=>", id
1795:
1796: output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1797: name, self.modulename_file(id.header),
1798: self.modulename_file(id.module)))
1799: #
1800: # Processing of conditionals modified by Bill 1/1/05
1801: #
1802: if id.conditionals != None:
1803: apstr = ""
1804: for cond in id.conditionals:
1805: if apstr != "":
1806: apstr = apstr + " && "
1807: apstr = apstr + cond
1808: output.write(" <cond>%s</cond>\n"% (apstr));
1809: try:
1810: (ret, params, desc) = id.info
1811: if (desc == None or desc == '') and \
1812: name[0:9] != "xmlThrDef" and name != "xmlDllMain":
1813: print "%s %s from %s has no description" % (id.type, name,
1814: self.modulename_file(id.module))
1815:
1816: output.write(" <info>%s</info>\n" % (escape(desc)))
1817: self.indexString(name, desc)
1818: if ret[0] != None:
1819: if ret[0] == "void":
1820: output.write(" <return type='void'/>\n")
1821: else:
1822: output.write(" <return type='%s' info='%s'/>\n" % (
1823: ret[0], escape(ret[1])))
1824: self.indexString(name, ret[1])
1825: for param in params:
1826: if param[0] == 'void':
1827: continue
1828: if param[2] == None:
1829: output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1830: else:
1831: output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1832: self.indexString(name, param[2])
1833: except:
1834: print "Failed to save function %s info: " % name, `id.info`
1835: output.write(" </%s>\n" % (id.type))
1836:
1837: def serialize_exports(self, output, file):
1838: module = self.modulename_file(file)
1839: output.write(" <file name='%s'>\n" % (module))
1840: dict = self.headers[file]
1841: if dict.info != None:
1842: for data in ('Summary', 'Description', 'Author'):
1843: try:
1844: output.write(" <%s>%s</%s>\n" % (
1845: string.lower(data),
1846: escape(dict.info[data]),
1847: string.lower(data)))
1848: except:
1849: print "Header %s lacks a %s description" % (module, data)
1850: if dict.info.has_key('Description'):
1851: desc = dict.info['Description']
1852: if string.find(desc, "DEPRECATED") != -1:
1853: output.write(" <deprecated/>\n")
1854:
1855: ids = dict.macros.keys()
1856: ids.sort()
1857: for id in uniq(ids):
1858: # Macros are sometime used to masquerade other types.
1859: if dict.functions.has_key(id):
1860: continue
1861: if dict.variables.has_key(id):
1862: continue
1863: if dict.typedefs.has_key(id):
1864: continue
1865: if dict.structs.has_key(id):
1866: continue
1867: if dict.enums.has_key(id):
1868: continue
1869: output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1870: ids = dict.enums.keys()
1871: ids.sort()
1872: for id in uniq(ids):
1873: output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1874: ids = dict.typedefs.keys()
1875: ids.sort()
1876: for id in uniq(ids):
1877: output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1878: ids = dict.structs.keys()
1879: ids.sort()
1880: for id in uniq(ids):
1881: output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1882: ids = dict.variables.keys()
1883: ids.sort()
1884: for id in uniq(ids):
1885: output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1886: ids = dict.functions.keys()
1887: ids.sort()
1888: for id in uniq(ids):
1889: output.write(" <exports symbol='%s' type='function'/>\n" % (id))
1890: output.write(" </file>\n")
1891:
1892: def serialize_xrefs_files(self, output):
1893: headers = self.headers.keys()
1894: headers.sort()
1895: for file in headers:
1896: module = self.modulename_file(file)
1897: output.write(" <file name='%s'>\n" % (module))
1898: dict = self.headers[file]
1899: ids = uniq(dict.functions.keys() + dict.variables.keys() + \
1900: dict.macros.keys() + dict.typedefs.keys() + \
1901: dict.structs.keys() + dict.enums.keys())
1902: ids.sort()
1903: for id in ids:
1904: output.write(" <ref name='%s'/>\n" % (id))
1905: output.write(" </file>\n")
1906: pass
1907:
1908: def serialize_xrefs_functions(self, output):
1909: funcs = {}
1910: for name in self.idx.functions.keys():
1911: id = self.idx.functions[name]
1912: try:
1913: (ret, params, desc) = id.info
1914: for param in params:
1915: if param[0] == 'void':
1916: continue
1917: if funcs.has_key(param[0]):
1918: funcs[param[0]].append(name)
1919: else:
1920: funcs[param[0]] = [name]
1921: except:
1922: pass
1923: typ = funcs.keys()
1924: typ.sort()
1925: for type in typ:
1926: if type == '' or type == 'void' or type == "int" or \
1927: type == "char *" or type == "const char *" :
1928: continue
1929: output.write(" <type name='%s'>\n" % (type))
1930: ids = funcs[type]
1931: ids.sort()
1932: pid = '' # not sure why we have dups, but get rid of them!
1933: for id in ids:
1934: if id != pid:
1935: output.write(" <ref name='%s'/>\n" % (id))
1936: pid = id
1937: output.write(" </type>\n")
1938:
1939: def serialize_xrefs_constructors(self, output):
1940: funcs = {}
1941: for name in self.idx.functions.keys():
1942: id = self.idx.functions[name]
1943: try:
1944: (ret, params, desc) = id.info
1945: if ret[0] == "void":
1946: continue
1947: if funcs.has_key(ret[0]):
1948: funcs[ret[0]].append(name)
1949: else:
1950: funcs[ret[0]] = [name]
1951: except:
1952: pass
1953: typ = funcs.keys()
1954: typ.sort()
1955: for type in typ:
1956: if type == '' or type == 'void' or type == "int" or \
1957: type == "char *" or type == "const char *" :
1958: continue
1959: output.write(" <type name='%s'>\n" % (type))
1960: ids = funcs[type]
1961: ids.sort()
1962: for id in ids:
1963: output.write(" <ref name='%s'/>\n" % (id))
1964: output.write(" </type>\n")
1965:
1966: def serialize_xrefs_alpha(self, output):
1967: letter = None
1968: ids = self.idx.identifiers.keys()
1969: ids.sort()
1970: for id in ids:
1971: if id[0] != letter:
1972: if letter != None:
1973: output.write(" </letter>\n")
1974: letter = id[0]
1975: output.write(" <letter name='%s'>\n" % (letter))
1976: output.write(" <ref name='%s'/>\n" % (id))
1977: if letter != None:
1978: output.write(" </letter>\n")
1979:
1980: def serialize_xrefs_references(self, output):
1981: typ = self.idx.identifiers.keys()
1982: typ.sort()
1983: for id in typ:
1984: idf = self.idx.identifiers[id]
1985: module = idf.header
1986: output.write(" <reference name='%s' href='%s'/>\n" % (id,
1987: 'html/' + self.basename + '-' +
1988: self.modulename_file(module) + '.html#' +
1989: id))
1990:
1991: def serialize_xrefs_index(self, output):
1992: index = self.xref
1993: typ = index.keys()
1994: typ.sort()
1995: letter = None
1996: count = 0
1997: chunk = 0
1998: chunks = []
1999: for id in typ:
2000: if len(index[id]) > 30:
2001: continue
2002: if id[0] != letter:
2003: if letter == None or count > 200:
2004: if letter != None:
2005: output.write(" </letter>\n")
2006: output.write(" </chunk>\n")
2007: count = 0
2008: chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2009: output.write(" <chunk name='chunk%s'>\n" % (chunk))
2010: first_letter = id[0]
2011: chunk = chunk + 1
2012: elif letter != None:
2013: output.write(" </letter>\n")
2014: letter = id[0]
2015: output.write(" <letter name='%s'>\n" % (letter))
2016: output.write(" <word name='%s'>\n" % (id))
2017: tokens = index[id];
2018: tokens.sort()
2019: tok = None
2020: for token in tokens:
2021: if tok == token:
2022: continue
2023: tok = token
2024: output.write(" <ref name='%s'/>\n" % (token))
2025: count = count + 1
2026: output.write(" </word>\n")
2027: if letter != None:
2028: output.write(" </letter>\n")
2029: output.write(" </chunk>\n")
2030: if count != 0:
2031: chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2032: output.write(" <chunks>\n")
2033: for ch in chunks:
2034: output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
2035: ch[0], ch[1], ch[2]))
2036: output.write(" </chunks>\n")
2037:
2038: def serialize_xrefs(self, output):
2039: output.write(" <references>\n")
2040: self.serialize_xrefs_references(output)
2041: output.write(" </references>\n")
2042: output.write(" <alpha>\n")
2043: self.serialize_xrefs_alpha(output)
2044: output.write(" </alpha>\n")
2045: output.write(" <constructors>\n")
2046: self.serialize_xrefs_constructors(output)
2047: output.write(" </constructors>\n")
2048: output.write(" <functions>\n")
2049: self.serialize_xrefs_functions(output)
2050: output.write(" </functions>\n")
2051: output.write(" <files>\n")
2052: self.serialize_xrefs_files(output)
2053: output.write(" </files>\n")
2054: output.write(" <index>\n")
2055: self.serialize_xrefs_index(output)
2056: output.write(" </index>\n")
2057:
2058: def serialize(self):
2059: filename = "%s-api.xml" % self.name
2060: print "Saving XML description %s" % (filename)
2061: output = open(filename, "w")
2062: output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2063: output.write("<api name='%s'>\n" % self.name)
2064: output.write(" <files>\n")
2065: headers = self.headers.keys()
2066: headers.sort()
2067: for file in headers:
2068: self.serialize_exports(output, file)
2069: output.write(" </files>\n")
2070: output.write(" <symbols>\n")
2071: macros = self.idx.macros.keys()
2072: macros.sort()
2073: for macro in macros:
2074: self.serialize_macro(output, macro)
2075: enums = self.idx.enums.keys()
2076: enums.sort()
2077: for enum in enums:
2078: self.serialize_enum(output, enum)
2079: typedefs = self.idx.typedefs.keys()
2080: typedefs.sort()
2081: for typedef in typedefs:
2082: self.serialize_typedef(output, typedef)
2083: variables = self.idx.variables.keys()
2084: variables.sort()
2085: for variable in variables:
2086: self.serialize_variable(output, variable)
2087: functions = self.idx.functions.keys()
2088: functions.sort()
2089: for function in functions:
2090: self.serialize_function(output, function)
2091: output.write(" </symbols>\n")
2092: output.write("</api>\n")
2093: output.close()
2094:
2095: filename = "%s-refs.xml" % self.name
2096: print "Saving XML Cross References %s" % (filename)
2097: output = open(filename, "w")
2098: output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2099: output.write("<apirefs name='%s'>\n" % self.name)
2100: self.serialize_xrefs(output)
2101: output.write("</apirefs>\n")
2102: output.close()
2103:
2104:
2105: def rebuild():
2106: builder = None
2107: if glob.glob("parser.c") != [] :
2108: print "Rebuilding API description for libxml2"
2109: builder = docBuilder("libxml2", [".", "."],
2110: ["xmlwin32version.h", "tst.c"])
2111: elif glob.glob("../parser.c") != [] :
2112: print "Rebuilding API description for libxml2"
2113: builder = docBuilder("libxml2", ["..", "../include/libxml"],
2114: ["xmlwin32version.h", "tst.c"])
2115: elif glob.glob("../libxslt/transform.c") != [] :
2116: print "Rebuilding API description for libxslt"
2117: builder = docBuilder("libxslt", ["../libxslt"],
2118: ["win32config.h", "libxslt.h", "tst.c"])
2119: else:
2120: print "rebuild() failed, unable to guess the module"
2121: return None
2122: builder.scan()
2123: builder.analyze()
2124: builder.serialize()
2125: if glob.glob("../libexslt/exslt.c") != [] :
2126: extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2127: extra.scan()
2128: extra.analyze()
2129: extra.serialize()
2130: return builder
2131:
2132: #
2133: # for debugging the parser
2134: #
2135: def parse(filename):
2136: parser = CParser(filename)
2137: idx = parser.parse()
2138: return idx
2139:
2140: if __name__ == "__main__":
2141: if len(sys.argv) > 1:
2142: debug = 1
2143: parse(sys.argv[1])
2144: else:
2145: rebuild()