embedaddon/libxml2/doc/apibuild.py - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / doc / apibuild.py
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 01:22:24 2013 UTC (10 years, 11 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_8_0p0, v2_8_0, HEAD

2.8.0

1: #!/usr/bin/python -u 2: # 3: # This is the API builder, it parses the C sources and build the 4: # API formal description in XML. 5: # 6: # See Copyright for the status of this software. 7: # 8: # daniel@veillard.com 9: # 10: import os, sys 11: import string 12: import glob 13: 14: debug=0 15: #debugsym='ignorableWhitespaceSAXFunc' 16: debugsym=None 17: 18: # 19: # C parser analysis code 20: # 21: ignored_files = { 22: "trio": "too many non standard macros", 23: "trio.c": "too many non standard macros", 24: "trionan.c": "too many non standard macros", 25: "triostr.c": "too many non standard macros", 26: "acconfig.h": "generated portability layer", 27: "config.h": "generated portability layer", 28: "libxml.h": "internal only", 29: "testOOM.c": "out of memory tester", 30: "testOOMlib.h": "out of memory tester", 31: "testOOMlib.c": "out of memory tester", 32: "rngparser.c": "not yet integrated", 33: "rngparser.h": "not yet integrated", 34: "elfgcchack.h": "not a normal header", 35: "testHTML.c": "test tool", 36: "testReader.c": "test tool", 37: "testSchemas.c": "test tool", 38: "testXPath.c": "test tool", 39: "testAutomata.c": "test tool", 40: "testModule.c": "test tool", 41: "testRegexp.c": "test tool", 42: "testThreads.c": "test tool", 43: "testC14N.c": "test tool", 44: "testRelax.c": "test tool", 45: "testThreadsWin32.c": "test tool", 46: "testSAX.c": "test tool", 47: "testURI.c": "test tool", 48: "testapi.c": "generated regression tests", 49: "runtest.c": "regression tests program", 50: "runsuite.c": "regression tests program", 51: "tst.c": "not part of the library", 52: "test.c": "not part of the library", 53: "testdso.c": "test for dynamid shared libraries", 54: "testrecurse.c": "test for entities recursions", 55: "xzlib.h": "Internal API only", 56: } 57: 58: ignored_words = { 59: "WINAPI": (0, "Windows keyword"), 60: "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 61: "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 62: "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), 63: "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), 64: "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 65: "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 66: "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 67: "XMLCALL": (0, "Special macro for win32 calls"), 68: "XSLTCALL": (0, "Special macro for win32 calls"), 69: "XMLCDECL": (0, "Special macro for win32 calls"), 70: "EXSLTCALL": (0, "Special macro for win32 calls"), 71: "__declspec": (3, "Windows keyword"), 72: "__stdcall": (0, "Windows keyword"), 73: "ATTRIBUTE_UNUSED": (0, "macro keyword"), 74: "LIBEXSLT_PUBLIC": (0, "macro keyword"), 75: "X_IN_Y": (5, "macro function builder"), 76: "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"), 77: "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"), 78: "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"), 79: "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"), 80: } 81: 82: def escape(raw): 83: raw = string.replace(raw, '&', '&') 84: raw = string.replace(raw, '<', '<') 85: raw = string.replace(raw, '>', '>') 86: raw = string.replace(raw, "'", ''') 87: raw = string.replace(raw, '"', '"') 88: return raw 89: 90: def uniq(items): 91: d = {} 92: for item in items: 93: d[item]=1 94: return d.keys() 95: 96: class identifier: 97: def __init__(self, name, header=None, module=None, type=None, lineno = 0, 98: info=None, extra=None, conditionals = None): 99: self.name = name 100: self.header = header 101: self.module = module 102: self.type = type 103: self.info = info 104: self.extra = extra 105: self.lineno = lineno 106: self.static = 0 107: if conditionals == None or len(conditionals) == 0: 108: self.conditionals = None 109: else: 110: self.conditionals = conditionals[:] 111: if self.name == debugsym: 112: print "=> define %s : %s" % (debugsym, (module, type, info, 113: extra, conditionals)) 114: 115: def __repr__(self): 116: r = "%s %s:" % (self.type, self.name) 117: if self.static: 118: r = r + " static" 119: if self.module != None: 120: r = r + " from %s" % (self.module) 121: if self.info != None: 122: r = r + " " + `self.info` 123: if self.extra != None: 124: r = r + " " + `self.extra` 125: if self.conditionals != None: 126: r = r + " " + `self.conditionals` 127: return r 128: 129: 130: def set_header(self, header): 131: self.header = header 132: def set_module(self, module): 133: self.module = module 134: def set_type(self, type): 135: self.type = type 136: def set_info(self, info): 137: self.info = info 138: def set_extra(self, extra): 139: self.extra = extra 140: def set_lineno(self, lineno): 141: self.lineno = lineno 142: def set_static(self, static): 143: self.static = static 144: def set_conditionals(self, conditionals): 145: if conditionals == None or len(conditionals) == 0: 146: self.conditionals = None 147: else: 148: self.conditionals = conditionals[:] 149: 150: def get_name(self): 151: return self.name 152: def get_header(self): 153: return self.module 154: def get_module(self): 155: return self.module 156: def get_type(self): 157: return self.type 158: def get_info(self): 159: return self.info 160: def get_lineno(self): 161: return self.lineno 162: def get_extra(self): 163: return self.extra 164: def get_static(self): 165: return self.static 166: def get_conditionals(self): 167: return self.conditionals 168: 169: def update(self, header, module, type = None, info = None, extra=None, 170: conditionals=None): 171: if self.name == debugsym: 172: print "=> update %s : %s" % (debugsym, (module, type, info, 173: extra, conditionals)) 174: if header != None and self.header == None: 175: self.set_header(module) 176: if module != None and (self.module == None or self.header == self.module): 177: self.set_module(module) 178: if type != None and self.type == None: 179: self.set_type(type) 180: if info != None: 181: self.set_info(info) 182: if extra != None: 183: self.set_extra(extra) 184: if conditionals != None: 185: self.set_conditionals(conditionals) 186: 187: class index: 188: def __init__(self, name = "noname"): 189: self.name = name 190: self.identifiers = {} 191: self.functions = {} 192: self.variables = {} 193: self.includes = {} 194: self.structs = {} 195: self.enums = {} 196: self.typedefs = {} 197: self.macros = {} 198: self.references = {} 199: self.info = {} 200: 201: def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 202: if name[0:2] == '__': 203: return None 204: d = None 205: try: 206: d = self.identifiers[name] 207: d.update(header, module, type, lineno, info, extra, conditionals) 208: except: 209: d = identifier(name, header, module, type, lineno, info, extra, conditionals) 210: self.identifiers[name] = d 211: 212: if d != None and static == 1: 213: d.set_static(1) 214: 215: if d != None and name != None and type != None: 216: self.references[name] = d 217: 218: if name == debugsym: 219: print "New ref: %s" % (d) 220: 221: return d 222: 223: def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 224: if name[0:2] == '__': 225: return None 226: d = None 227: try: 228: d = self.identifiers[name] 229: d.update(header, module, type, lineno, info, extra, conditionals) 230: except: 231: d = identifier(name, header, module, type, lineno, info, extra, conditionals) 232: self.identifiers[name] = d 233: 234: if d != None and static == 1: 235: d.set_static(1) 236: 237: if d != None and name != None and type != None: 238: if type == "function": 239: self.functions[name] = d 240: elif type == "functype": 241: self.functions[name] = d 242: elif type == "variable": 243: self.variables[name] = d 244: elif type == "include": 245: self.includes[name] = d 246: elif type == "struct": 247: self.structs[name] = d 248: elif type == "enum": 249: self.enums[name] = d 250: elif type == "typedef": 251: self.typedefs[name] = d 252: elif type == "macro": 253: self.macros[name] = d 254: else: 255: print "Unable to register type ", type 256: 257: if name == debugsym: 258: print "New symbol: %s" % (d) 259: 260: return d 261: 262: def merge(self, idx): 263: for id in idx.functions.keys(): 264: # 265: # macro might be used to override functions or variables 266: # definitions 267: # 268: if self.macros.has_key(id): 269: del self.macros[id] 270: if self.functions.has_key(id): 271: print "function %s from %s redeclared in %s" % ( 272: id, self.functions[id].header, idx.functions[id].header) 273: else: 274: self.functions[id] = idx.functions[id] 275: self.identifiers[id] = idx.functions[id] 276: for id in idx.variables.keys(): 277: # 278: # macro might be used to override functions or variables 279: # definitions 280: # 281: if self.macros.has_key(id): 282: del self.macros[id] 283: if self.variables.has_key(id): 284: print "variable %s from %s redeclared in %s" % ( 285: id, self.variables[id].header, idx.variables[id].header) 286: else: 287: self.variables[id] = idx.variables[id] 288: self.identifiers[id] = idx.variables[id] 289: for id in idx.structs.keys(): 290: if self.structs.has_key(id): 291: print "struct %s from %s redeclared in %s" % ( 292: id, self.structs[id].header, idx.structs[id].header) 293: else: 294: self.structs[id] = idx.structs[id] 295: self.identifiers[id] = idx.structs[id] 296: for id in idx.typedefs.keys(): 297: if self.typedefs.has_key(id): 298: print "typedef %s from %s redeclared in %s" % ( 299: id, self.typedefs[id].header, idx.typedefs[id].header) 300: else: 301: self.typedefs[id] = idx.typedefs[id] 302: self.identifiers[id] = idx.typedefs[id] 303: for id in idx.macros.keys(): 304: # 305: # macro might be used to override functions or variables 306: # definitions 307: # 308: if self.variables.has_key(id): 309: continue 310: if self.functions.has_key(id): 311: continue 312: if self.enums.has_key(id): 313: continue 314: if self.macros.has_key(id): 315: print "macro %s from %s redeclared in %s" % ( 316: id, self.macros[id].header, idx.macros[id].header) 317: else: 318: self.macros[id] = idx.macros[id] 319: self.identifiers[id] = idx.macros[id] 320: for id in idx.enums.keys(): 321: if self.enums.has_key(id): 322: print "enum %s from %s redeclared in %s" % ( 323: id, self.enums[id].header, idx.enums[id].header) 324: else: 325: self.enums[id] = idx.enums[id] 326: self.identifiers[id] = idx.enums[id] 327: 328: def merge_public(self, idx): 329: for id in idx.functions.keys(): 330: if self.functions.has_key(id): 331: # check that function condition agrees with header 332: if idx.functions[id].conditionals != \ 333: self.functions[id].conditionals: 334: print "Header condition differs from Function for %s:" \ 335: % id 336: print " H: %s" % self.functions[id].conditionals 337: print " C: %s" % idx.functions[id].conditionals 338: up = idx.functions[id] 339: self.functions[id].update(None, up.module, up.type, up.info, up.extra) 340: # else: 341: # print "Function %s from %s is not declared in headers" % ( 342: # id, idx.functions[id].module) 343: # TODO: do the same for variables. 344: 345: def analyze_dict(self, type, dict): 346: count = 0 347: public = 0 348: for name in dict.keys(): 349: id = dict[name] 350: count = count + 1 351: if id.static == 0: 352: public = public + 1 353: if count != public: 354: print " %d %s , %d public" % (count, type, public) 355: elif count != 0: 356: print " %d public %s" % (count, type) 357: 358: 359: def analyze(self): 360: self.analyze_dict("functions", self.functions) 361: self.analyze_dict("variables", self.variables) 362: self.analyze_dict("structs", self.structs) 363: self.analyze_dict("typedefs", self.typedefs) 364: self.analyze_dict("macros", self.macros) 365: 366: class CLexer: 367: """A lexer for the C language, tokenize the input by reading and 368: analyzing it line by line""" 369: def __init__(self, input): 370: self.input = input 371: self.tokens = [] 372: self.line = "" 373: self.lineno = 0 374: 375: def getline(self): 376: line = '' 377: while line == '': 378: line = self.input.readline() 379: if not line: 380: return None 381: self.lineno = self.lineno + 1 382: line = string.lstrip(line) 383: line = string.rstrip(line) 384: if line == '': 385: continue 386: while line[-1] == '\\': 387: line = line[:-1] 388: n = self.input.readline() 389: self.lineno = self.lineno + 1 390: n = string.lstrip(n) 391: n = string.rstrip(n) 392: if not n: 393: break 394: else: 395: line = line + n 396: return line 397: 398: def getlineno(self): 399: return self.lineno 400: 401: def push(self, token): 402: self.tokens.insert(0, token); 403: 404: def debug(self): 405: print "Last token: ", self.last 406: print "Token queue: ", self.tokens 407: print "Line %d end: " % (self.lineno), self.line 408: 409: def token(self): 410: while self.tokens == []: 411: if self.line == "": 412: line = self.getline() 413: else: 414: line = self.line 415: self.line = "" 416: if line == None: 417: return None 418: 419: if line[0] == '#': 420: self.tokens = map((lambda x: ('preproc', x)), 421: string.split(line)) 422: break; 423: l = len(line) 424: if line[0] == '"' or line[0] == "'": 425: end = line[0] 426: line = line[1:] 427: found = 0 428: tok = "" 429: while found == 0: 430: i = 0 431: l = len(line) 432: while i < l: 433: if line[i] == end: 434: self.line = line[i+1:] 435: line = line[:i] 436: l = i 437: found = 1 438: break 439: if line[i] == '\\': 440: i = i + 1 441: i = i + 1 442: tok = tok + line 443: if found == 0: 444: line = self.getline() 445: if line == None: 446: return None 447: self.last = ('string', tok) 448: return self.last 449: 450: if l >= 2 and line[0] == '/' and line[1] == '*': 451: line = line[2:] 452: found = 0 453: tok = "" 454: while found == 0: 455: i = 0 456: l = len(line) 457: while i < l: 458: if line[i] == '*' and i+1 < l and line[i+1] == '/': 459: self.line = line[i+2:] 460: line = line[:i-1] 461: l = i 462: found = 1 463: break 464: i = i + 1 465: if tok != "": 466: tok = tok + "\n" 467: tok = tok + line 468: if found == 0: 469: line = self.getline() 470: if line == None: 471: return None 472: self.last = ('comment', tok) 473: return self.last 474: if l >= 2 and line[0] == '/' and line[1] == '/': 475: line = line[2:] 476: self.last = ('comment', line) 477: return self.last 478: i = 0 479: while i < l: 480: if line[i] == '/' and i+1 < l and line[i+1] == '/': 481: self.line = line[i:] 482: line = line[:i] 483: break 484: if line[i] == '/' and i+1 < l and line[i+1] == '*': 485: self.line = line[i:] 486: line = line[:i] 487: break 488: if line[i] == '"' or line[i] == "'": 489: self.line = line[i:] 490: line = line[:i] 491: break 492: i = i + 1 493: l = len(line) 494: i = 0 495: while i < l: 496: if line[i] == ' ' or line[i] == '\t': 497: i = i + 1 498: continue 499: o = ord(line[i]) 500: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 501: (o >= 48 and o <= 57): 502: s = i 503: while i < l: 504: o = ord(line[i]) 505: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 506: (o >= 48 and o <= 57) or string.find( 507: " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1: 508: i = i + 1 509: else: 510: break 511: self.tokens.append(('name', line[s:i])) 512: continue 513: if string.find("(){}:;,[]", line[i]) != -1: 514: # if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 515: # line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 516: # line[i] == ',' or line[i] == '[' or line[i] == ']': 517: self.tokens.append(('sep', line[i])) 518: i = i + 1 519: continue 520: if string.find("+-*><=/%&!|.", line[i]) != -1: 521: # if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 522: # line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 523: # line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 524: # line[i] == '!' or line[i] == '|' or line[i] == '.': 525: if line[i] == '.' and i + 2 < l and \ 526: line[i+1] == '.' and line[i+2] == '.': 527: self.tokens.append(('name', '...')) 528: i = i + 3 529: continue 530: 531: j = i + 1 532: if j < l and ( 533: string.find("+-*><=/%&!|", line[j]) != -1): 534: # line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 535: # line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 536: # line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 537: # line[j] == '!' or line[j] == '|'): 538: self.tokens.append(('op', line[i:j+1])) 539: i = j + 1 540: else: 541: self.tokens.append(('op', line[i])) 542: i = i + 1 543: continue 544: s = i 545: while i < l: 546: o = ord(line[i]) 547: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 548: (o >= 48 and o <= 57) or ( 549: string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1): 550: # line[i] != ' ' and line[i] != '\t' and 551: # line[i] != '(' and line[i] != ')' and 552: # line[i] != '{' and line[i] != '}' and 553: # line[i] != ':' and line[i] != ';' and 554: # line[i] != ',' and line[i] != '+' and 555: # line[i] != '-' and line[i] != '*' and 556: # line[i] != '/' and line[i] != '%' and 557: # line[i] != '&' and line[i] != '!' and 558: # line[i] != '|' and line[i] != '[' and 559: # line[i] != ']' and line[i] != '=' and 560: # line[i] != '*' and line[i] != '>' and 561: # line[i] != '<'): 562: i = i + 1 563: else: 564: break 565: self.tokens.append(('name', line[s:i])) 566: 567: tok = self.tokens[0] 568: self.tokens = self.tokens[1:] 569: self.last = tok 570: return tok 571: 572: class CParser: 573: """The C module parser""" 574: def __init__(self, filename, idx = None): 575: self.filename = filename 576: if len(filename) > 2 and filename[-2:] == '.h': 577: self.is_header = 1 578: else: 579: self.is_header = 0 580: self.input = open(filename) 581: self.lexer = CLexer(self.input) 582: if idx == None: 583: self.index = index() 584: else: 585: self.index = idx 586: self.top_comment = "" 587: self.last_comment = "" 588: self.comment = None 589: self.collect_ref = 0 590: self.no_error = 0 591: self.conditionals = [] 592: self.defines = [] 593: 594: def collect_references(self): 595: self.collect_ref = 1 596: 597: def stop_error(self): 598: self.no_error = 1 599: 600: def start_error(self): 601: self.no_error = 0 602: 603: def lineno(self): 604: return self.lexer.getlineno() 605: 606: def index_add(self, name, module, static, type, info=None, extra = None): 607: if self.is_header == 1: 608: self.index.add(name, module, module, static, type, self.lineno(), 609: info, extra, self.conditionals) 610: else: 611: self.index.add(name, None, module, static, type, self.lineno(), 612: info, extra, self.conditionals) 613: 614: def index_add_ref(self, name, module, static, type, info=None, 615: extra = None): 616: if self.is_header == 1: 617: self.index.add_ref(name, module, module, static, type, 618: self.lineno(), info, extra, self.conditionals) 619: else: 620: self.index.add_ref(name, None, module, static, type, self.lineno(), 621: info, extra, self.conditionals) 622: 623: def warning(self, msg): 624: if self.no_error: 625: return 626: print msg 627: 628: def error(self, msg, token=-1): 629: if self.no_error: 630: return 631: 632: print "Parse Error: " + msg 633: if token != -1: 634: print "Got token ", token 635: self.lexer.debug() 636: sys.exit(1) 637: 638: def debug(self, msg, token=-1): 639: print "Debug: " + msg 640: if token != -1: 641: print "Got token ", token 642: self.lexer.debug() 643: 644: def parseTopComment(self, comment): 645: res = {} 646: lines = string.split(comment, "\n") 647: item = None 648: for line in lines: 649: while line != "" and (line[0] == ' ' or line[0] == '\t'): 650: line = line[1:] 651: while line != "" and line[0] == '*': 652: line = line[1:] 653: while line != "" and (line[0] == ' ' or line[0] == '\t'): 654: line = line[1:] 655: try: 656: (it, line) = string.split(line, ":", 1) 657: item = it 658: while line != "" and (line[0] == ' ' or line[0] == '\t'): 659: line = line[1:] 660: if res.has_key(item): 661: res[item] = res[item] + " " + line 662: else: 663: res[item] = line 664: except: 665: if item != None: 666: if res.has_key(item): 667: res[item] = res[item] + " " + line 668: else: 669: res[item] = line 670: self.index.info = res 671: 672: def parseComment(self, token): 673: if self.top_comment == "": 674: self.top_comment = token[1] 675: if self.comment == None or token[1][0] == '*': 676: self.comment = token[1]; 677: else: 678: self.comment = self.comment + token[1] 679: token = self.lexer.token() 680: 681: if string.find(self.comment, "DOC_DISABLE") != -1: 682: self.stop_error() 683: 684: if string.find(self.comment, "DOC_ENABLE") != -1: 685: self.start_error() 686: 687: return token 688: 689: # 690: # Parse a comment block associate to a typedef 691: # 692: def parseTypeComment(self, name, quiet = 0): 693: if name[0:2] == '__': 694: quiet = 1 695: 696: args = [] 697: desc = "" 698: 699: if self.comment == None: 700: if not quiet: 701: self.warning("Missing comment for type %s" % (name)) 702: return((args, desc)) 703: if self.comment[0] != '*': 704: if not quiet: 705: self.warning("Missing * in type comment for %s" % (name)) 706: return((args, desc)) 707: lines = string.split(self.comment, '\n') 708: if lines[0] == '*': 709: del lines[0] 710: if lines[0] != "* %s:" % (name): 711: if not quiet: 712: self.warning("Misformatted type comment for %s" % (name)) 713: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 714: return((args, desc)) 715: del lines[0] 716: while len(lines) > 0 and lines[0] == '*': 717: del lines[0] 718: desc = "" 719: while len(lines) > 0: 720: l = lines[0] 721: while len(l) > 0 and l[0] == '*': 722: l = l[1:] 723: l = string.strip(l) 724: desc = desc + " " + l 725: del lines[0] 726: 727: desc = string.strip(desc) 728: 729: if quiet == 0: 730: if desc == "": 731: self.warning("Type comment for %s lack description of the macro" % (name)) 732: 733: return(desc) 734: # 735: # Parse a comment block associate to a macro 736: # 737: def parseMacroComment(self, name, quiet = 0): 738: if name[0:2] == '__': 739: quiet = 1 740: 741: args = [] 742: desc = "" 743: 744: if self.comment == None: 745: if not quiet: 746: self.warning("Missing comment for macro %s" % (name)) 747: return((args, desc)) 748: if self.comment[0] != '*': 749: if not quiet: 750: self.warning("Missing * in macro comment for %s" % (name)) 751: return((args, desc)) 752: lines = string.split(self.comment, '\n') 753: if lines[0] == '*': 754: del lines[0] 755: if lines[0] != "* %s:" % (name): 756: if not quiet: 757: self.warning("Misformatted macro comment for %s" % (name)) 758: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 759: return((args, desc)) 760: del lines[0] 761: while lines[0] == '*': 762: del lines[0] 763: while len(lines) > 0 and lines[0][0:3] == '* @': 764: l = lines[0][3:] 765: try: 766: (arg, desc) = string.split(l, ':', 1) 767: desc=string.strip(desc) 768: arg=string.strip(arg) 769: except: 770: if not quiet: 771: self.warning("Misformatted macro comment for %s" % (name)) 772: self.warning(" problem with '%s'" % (lines[0])) 773: del lines[0] 774: continue 775: del lines[0] 776: l = string.strip(lines[0]) 777: while len(l) > 2 and l[0:3] != '* @': 778: while l[0] == '*': 779: l = l[1:] 780: desc = desc + ' ' + string.strip(l) 781: del lines[0] 782: if len(lines) == 0: 783: break 784: l = lines[0] 785: args.append((arg, desc)) 786: while len(lines) > 0 and lines[0] == '*': 787: del lines[0] 788: desc = "" 789: while len(lines) > 0: 790: l = lines[0] 791: while len(l) > 0 and l[0] == '*': 792: l = l[1:] 793: l = string.strip(l) 794: desc = desc + " " + l 795: del lines[0] 796: 797: desc = string.strip(desc) 798: 799: if quiet == 0: 800: if desc == "": 801: self.warning("Macro comment for %s lack description of the macro" % (name)) 802: 803: return((args, desc)) 804: 805: # 806: # Parse a comment block and merge the informations found in the 807: # parameters descriptions, finally returns a block as complete 808: # as possible 809: # 810: def mergeFunctionComment(self, name, description, quiet = 0): 811: if name == 'main': 812: quiet = 1 813: if name[0:2] == '__': 814: quiet = 1 815: 816: (ret, args) = description 817: desc = "" 818: retdesc = "" 819: 820: if self.comment == None: 821: if not quiet: 822: self.warning("Missing comment for function %s" % (name)) 823: return(((ret[0], retdesc), args, desc)) 824: if self.comment[0] != '*': 825: if not quiet: 826: self.warning("Missing * in function comment for %s" % (name)) 827: return(((ret[0], retdesc), args, desc)) 828: lines = string.split(self.comment, '\n') 829: if lines[0] == '*': 830: del lines[0] 831: if lines[0] != "* %s:" % (name): 832: if not quiet: 833: self.warning("Misformatted function comment for %s" % (name)) 834: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 835: return(((ret[0], retdesc), args, desc)) 836: del lines[0] 837: while lines[0] == '*': 838: del lines[0] 839: nbargs = len(args) 840: while len(lines) > 0 and lines[0][0:3] == '* @': 841: l = lines[0][3:] 842: try: 843: (arg, desc) = string.split(l, ':', 1) 844: desc=string.strip(desc) 845: arg=string.strip(arg) 846: except: 847: if not quiet: 848: self.warning("Misformatted function comment for %s" % (name)) 849: self.warning(" problem with '%s'" % (lines[0])) 850: del lines[0] 851: continue 852: del lines[0] 853: l = string.strip(lines[0]) 854: while len(l) > 2 and l[0:3] != '* @': 855: while l[0] == '*': 856: l = l[1:] 857: desc = desc + ' ' + string.strip(l) 858: del lines[0] 859: if len(lines) == 0: 860: break 861: l = lines[0] 862: i = 0 863: while i < nbargs: 864: if args[i][1] == arg: 865: args[i] = (args[i][0], arg, desc) 866: break; 867: i = i + 1 868: if i >= nbargs: 869: if not quiet: 870: self.warning("Unable to find arg %s from function comment for %s" % ( 871: arg, name)) 872: while len(lines) > 0 and lines[0] == '*': 873: del lines[0] 874: desc = "" 875: while len(lines) > 0: 876: l = lines[0] 877: while len(l) > 0 and l[0] == '*': 878: l = l[1:] 879: l = string.strip(l) 880: if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 881: try: 882: l = string.split(l, ' ', 1)[1] 883: except: 884: l = "" 885: retdesc = string.strip(l) 886: del lines[0] 887: while len(lines) > 0: 888: l = lines[0] 889: while len(l) > 0 and l[0] == '*': 890: l = l[1:] 891: l = string.strip(l) 892: retdesc = retdesc + " " + l 893: del lines[0] 894: else: 895: desc = desc + " " + l 896: del lines[0] 897: 898: retdesc = string.strip(retdesc) 899: desc = string.strip(desc) 900: 901: if quiet == 0: 902: # 903: # report missing comments 904: # 905: i = 0 906: while i < nbargs: 907: if args[i][2] == None and args[i][0] != "void" and \ 908: ((args[i][1] != None) or (args[i][1] == '')): 909: self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) 910: i = i + 1 911: if retdesc == "" and ret[0] != "void": 912: self.warning("Function comment for %s lacks description of return value" % (name)) 913: if desc == "": 914: self.warning("Function comment for %s lacks description of the function" % (name)) 915: 916: return(((ret[0], retdesc), args, desc)) 917: 918: def parsePreproc(self, token): 919: if debug: 920: print "=> preproc ", token, self.lexer.tokens 921: name = token[1] 922: if name == "#include": 923: token = self.lexer.token() 924: if token == None: 925: return None 926: if token[0] == 'preproc': 927: self.index_add(token[1], self.filename, not self.is_header, 928: "include") 929: return self.lexer.token() 930: return token 931: if name == "#define": 932: token = self.lexer.token() 933: if token == None: 934: return None 935: if token[0] == 'preproc': 936: # TODO macros with arguments 937: name = token[1] 938: lst = [] 939: token = self.lexer.token() 940: while token != None and token[0] == 'preproc' and \ 941: token[1][0] != '#': 942: lst.append(token[1]) 943: token = self.lexer.token() 944: try: 945: name = string.split(name, '(') [0] 946: except: 947: pass 948: info = self.parseMacroComment(name, not self.is_header) 949: self.index_add(name, self.filename, not self.is_header, 950: "macro", info) 951: return token 952: 953: # 954: # Processing of conditionals modified by Bill 1/1/05 955: # 956: # We process conditionals (i.e. tokens from #ifdef, #ifndef, 957: # #if, #else and #endif) for headers and mainline code, 958: # store the ones from the header in libxml2-api.xml, and later 959: # (in the routine merge_public) verify that the two (header and 960: # mainline code) agree. 961: # 962: # There is a small problem with processing the headers. Some of 963: # the variables are not concerned with enabling / disabling of 964: # library functions (e.g. '__XML_PARSER_H__'), and we don't want 965: # them to be included in libxml2-api.xml, or involved in 966: # the check between the header and the mainline code. To 967: # accomplish this, we ignore any conditional which doesn't include 968: # the string 'ENABLED' 969: # 970: if name == "#ifdef": 971: apstr = self.lexer.tokens[0][1] 972: try: 973: self.defines.append(apstr) 974: if string.find(apstr, 'ENABLED') != -1: 975: self.conditionals.append("defined(%s)" % apstr) 976: except: 977: pass 978: elif name == "#ifndef": 979: apstr = self.lexer.tokens[0][1] 980: try: 981: self.defines.append(apstr) 982: if string.find(apstr, 'ENABLED') != -1: 983: self.conditionals.append("!defined(%s)" % apstr) 984: except: 985: pass 986: elif name == "#if": 987: apstr = "" 988: for tok in self.lexer.tokens: 989: if apstr != "": 990: apstr = apstr + " " 991: apstr = apstr + tok[1] 992: try: 993: self.defines.append(apstr) 994: if string.find(apstr, 'ENABLED') != -1: 995: self.conditionals.append(apstr) 996: except: 997: pass 998: elif name == "#else": 999: if self.conditionals != [] and \ 1000: string.find(self.defines[-1], 'ENABLED') != -1: 1001: self.conditionals[-1] = "!(%s)" % self.conditionals[-1] 1002: elif name == "#endif": 1003: if self.conditionals != [] and \ 1004: string.find(self.defines[-1], 'ENABLED') != -1: 1005: self.conditionals = self.conditionals[:-1] 1006: self.defines = self.defines[:-1] 1007: token = self.lexer.token() 1008: while token != None and token[0] == 'preproc' and \ 1009: token[1][0] != '#': 1010: token = self.lexer.token() 1011: return token 1012: 1013: # 1014: # token acquisition on top of the lexer, it handle internally 1015: # preprocessor and comments since they are logically not part of 1016: # the program structure. 1017: # 1018: def token(self): 1019: global ignored_words 1020: 1021: token = self.lexer.token() 1022: while token != None: 1023: if token[0] == 'comment': 1024: token = self.parseComment(token) 1025: continue 1026: elif token[0] == 'preproc': 1027: token = self.parsePreproc(token) 1028: continue 1029: elif token[0] == "name" and token[1] == "__const": 1030: token = ("name", "const") 1031: return token 1032: elif token[0] == "name" and token[1] == "__attribute": 1033: token = self.lexer.token() 1034: while token != None and token[1] != ";": 1035: token = self.lexer.token() 1036: return token 1037: elif token[0] == "name" and ignored_words.has_key(token[1]): 1038: (n, info) = ignored_words[token[1]] 1039: i = 0 1040: while i < n: 1041: token = self.lexer.token() 1042: i = i + 1 1043: token = self.lexer.token() 1044: continue 1045: else: 1046: if debug: 1047: print "=> ", token 1048: return token 1049: return None 1050: 1051: # 1052: # Parse a typedef, it records the type and its name. 1053: # 1054: def parseTypedef(self, token): 1055: if token == None: 1056: return None 1057: token = self.parseType(token) 1058: if token == None: 1059: self.error("parsing typedef") 1060: return None 1061: base_type = self.type 1062: type = base_type 1063: #self.debug("end typedef type", token) 1064: while token != None: 1065: if token[0] == "name": 1066: name = token[1] 1067: signature = self.signature 1068: if signature != None: 1069: type = string.split(type, '(')[0] 1070: d = self.mergeFunctionComment(name, 1071: ((type, None), signature), 1) 1072: self.index_add(name, self.filename, not self.is_header, 1073: "functype", d) 1074: else: 1075: if base_type == "struct": 1076: self.index_add(name, self.filename, not self.is_header, 1077: "struct", type) 1078: base_type = "struct " + name 1079: else: 1080: # TODO report missing or misformatted comments 1081: info = self.parseTypeComment(name, 1) 1082: self.index_add(name, self.filename, not self.is_header, 1083: "typedef", type, info) 1084: token = self.token() 1085: else: 1086: self.error("parsing typedef: expecting a name") 1087: return token 1088: #self.debug("end typedef", token) 1089: if token != None and token[0] == 'sep' and token[1] == ',': 1090: type = base_type 1091: token = self.token() 1092: while token != None and token[0] == "op": 1093: type = type + token[1] 1094: token = self.token() 1095: elif token != None and token[0] == 'sep' and token[1] == ';': 1096: break; 1097: elif token != None and token[0] == 'name': 1098: type = base_type 1099: continue; 1100: else: 1101: self.error("parsing typedef: expecting ';'", token) 1102: return token 1103: token = self.token() 1104: return token 1105: 1106: # 1107: # Parse a C code block, used for functions it parse till 1108: # the balancing } included 1109: # 1110: def parseBlock(self, token): 1111: while token != None: 1112: if token[0] == "sep" and token[1] == "{": 1113: token = self.token() 1114: token = self.parseBlock(token) 1115: elif token[0] == "sep" and token[1] == "}": 1116: self.comment = None 1117: token = self.token() 1118: return token 1119: else: 1120: if self.collect_ref == 1: 1121: oldtok = token 1122: token = self.token() 1123: if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 1124: if token[0] == "sep" and token[1] == "(": 1125: self.index_add_ref(oldtok[1], self.filename, 1126: 0, "function") 1127: token = self.token() 1128: elif token[0] == "name": 1129: token = self.token() 1130: if token[0] == "sep" and (token[1] == ";" or 1131: token[1] == "," or token[1] == "="): 1132: self.index_add_ref(oldtok[1], self.filename, 1133: 0, "type") 1134: elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 1135: self.index_add_ref(oldtok[1], self.filename, 1136: 0, "typedef") 1137: elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 1138: self.index_add_ref(oldtok[1], self.filename, 1139: 0, "typedef") 1140: 1141: else: 1142: token = self.token() 1143: return token 1144: 1145: # 1146: # Parse a C struct definition till the balancing } 1147: # 1148: def parseStruct(self, token): 1149: fields = [] 1150: #self.debug("start parseStruct", token) 1151: while token != None: 1152: if token[0] == "sep" and token[1] == "{": 1153: token = self.token() 1154: token = self.parseTypeBlock(token) 1155: elif token[0] == "sep" and token[1] == "}": 1156: self.struct_fields = fields 1157: #self.debug("end parseStruct", token) 1158: #print fields 1159: token = self.token() 1160: return token 1161: else: 1162: base_type = self.type 1163: #self.debug("before parseType", token) 1164: token = self.parseType(token) 1165: #self.debug("after parseType", token) 1166: if token != None and token[0] == "name": 1167: fname = token[1] 1168: token = self.token() 1169: if token[0] == "sep" and token[1] == ";": 1170: self.comment = None 1171: token = self.token() 1172: fields.append((self.type, fname, self.comment)) 1173: self.comment = None 1174: else: 1175: self.error("parseStruct: expecting ;", token) 1176: elif token != None and token[0] == "sep" and token[1] == "{": 1177: token = self.token() 1178: token = self.parseTypeBlock(token) 1179: if token != None and token[0] == "name": 1180: token = self.token() 1181: if token != None and token[0] == "sep" and token[1] == ";": 1182: token = self.token() 1183: else: 1184: self.error("parseStruct: expecting ;", token) 1185: else: 1186: self.error("parseStruct: name", token) 1187: token = self.token() 1188: self.type = base_type; 1189: self.struct_fields = fields 1190: #self.debug("end parseStruct", token) 1191: #print fields 1192: return token 1193: 1194: # 1195: # Parse a C enum block, parse till the balancing } 1196: # 1197: def parseEnumBlock(self, token): 1198: self.enums = [] 1199: name = None 1200: self.comment = None 1201: comment = "" 1202: value = "0" 1203: while token != None: 1204: if token[0] == "sep" and token[1] == "{": 1205: token = self.token() 1206: token = self.parseTypeBlock(token) 1207: elif token[0] == "sep" and token[1] == "}": 1208: if name != None: 1209: if self.comment != None: 1210: comment = self.comment 1211: self.comment = None 1212: self.enums.append((name, value, comment)) 1213: token = self.token() 1214: return token 1215: elif token[0] == "name": 1216: if name != None: 1217: if self.comment != None: 1218: comment = string.strip(self.comment) 1219: self.comment = None 1220: self.enums.append((name, value, comment)) 1221: name = token[1] 1222: comment = "" 1223: token = self.token() 1224: if token[0] == "op" and token[1][0] == "=": 1225: value = "" 1226: if len(token[1]) > 1: 1227: value = token[1][1:] 1228: token = self.token() 1229: while token[0] != "sep" or (token[1] != ',' and 1230: token[1] != '}'): 1231: value = value + token[1] 1232: token = self.token() 1233: else: 1234: try: 1235: value = "%d" % (int(value) + 1) 1236: except: 1237: self.warning("Failed to compute value of enum %s" % (name)) 1238: value="" 1239: if token[0] == "sep" and token[1] == ",": 1240: token = self.token() 1241: else: 1242: token = self.token() 1243: return token 1244: 1245: # 1246: # Parse a C definition block, used for structs it parse till 1247: # the balancing } 1248: # 1249: def parseTypeBlock(self, token): 1250: while token != None: 1251: if token[0] == "sep" and token[1] == "{": 1252: token = self.token() 1253: token = self.parseTypeBlock(token) 1254: elif token[0] == "sep" and token[1] == "}": 1255: token = self.token() 1256: return token 1257: else: 1258: token = self.token() 1259: return token 1260: 1261: # 1262: # Parse a type: the fact that the type name can either occur after 1263: # the definition or within the definition makes it a little harder 1264: # if inside, the name token is pushed back before returning 1265: # 1266: def parseType(self, token): 1267: self.type = "" 1268: self.struct_fields = [] 1269: self.signature = None 1270: if token == None: 1271: return token 1272: 1273: while token[0] == "name" and ( 1274: token[1] == "const" or \ 1275: token[1] == "unsigned" or \ 1276: token[1] == "signed"): 1277: if self.type == "": 1278: self.type = token[1] 1279: else: 1280: self.type = self.type + " " + token[1] 1281: token = self.token() 1282: 1283: if token[0] == "name" and (token[1] == "long" or token[1] == "short"): 1284: if self.type == "": 1285: self.type = token[1] 1286: else: 1287: self.type = self.type + " " + token[1] 1288: if token[0] == "name" and token[1] == "int": 1289: if self.type == "": 1290: self.type = tmp[1] 1291: else: 1292: self.type = self.type + " " + tmp[1] 1293: 1294: elif token[0] == "name" and token[1] == "struct": 1295: if self.type == "": 1296: self.type = token[1] 1297: else: 1298: self.type = self.type + " " + token[1] 1299: token = self.token() 1300: nametok = None 1301: if token[0] == "name": 1302: nametok = token 1303: token = self.token() 1304: if token != None and token[0] == "sep" and token[1] == "{": 1305: token = self.token() 1306: token = self.parseStruct(token) 1307: elif token != None and token[0] == "op" and token[1] == "*": 1308: self.type = self.type + " " + nametok[1] + " *" 1309: token = self.token() 1310: while token != None and token[0] == "op" and token[1] == "*": 1311: self.type = self.type + " *" 1312: token = self.token() 1313: if token[0] == "name": 1314: nametok = token 1315: token = self.token() 1316: else: 1317: self.error("struct : expecting name", token) 1318: return token 1319: elif token != None and token[0] == "name" and nametok != None: 1320: self.type = self.type + " " + nametok[1] 1321: return token 1322: 1323: if nametok != None: 1324: self.lexer.push(token) 1325: token = nametok 1326: return token 1327: 1328: elif token[0] == "name" and token[1] == "enum": 1329: if self.type == "": 1330: self.type = token[1] 1331: else: 1332: self.type = self.type + " " + token[1] 1333: self.enums = [] 1334: token = self.token() 1335: if token != None and token[0] == "sep" and token[1] == "{": 1336: token = self.token() 1337: token = self.parseEnumBlock(token) 1338: else: 1339: self.error("parsing enum: expecting '{'", token) 1340: enum_type = None 1341: if token != None and token[0] != "name": 1342: self.lexer.push(token) 1343: token = ("name", "enum") 1344: else: 1345: enum_type = token[1] 1346: for enum in self.enums: 1347: self.index_add(enum[0], self.filename, 1348: not self.is_header, "enum", 1349: (enum[1], enum[2], enum_type)) 1350: return token 1351: 1352: elif token[0] == "name": 1353: if self.type == "": 1354: self.type = token[1] 1355: else: 1356: self.type = self.type + " " + token[1] 1357: else: 1358: self.error("parsing type %s: expecting a name" % (self.type), 1359: token) 1360: return token 1361: token = self.token() 1362: while token != None and (token[0] == "op" or 1363: token[0] == "name" and token[1] == "const"): 1364: self.type = self.type + " " + token[1] 1365: token = self.token() 1366: 1367: # 1368: # if there is a parenthesis here, this means a function type 1369: # 1370: if token != None and token[0] == "sep" and token[1] == '(': 1371: self.type = self.type + token[1] 1372: token = self.token() 1373: while token != None and token[0] == "op" and token[1] == '*': 1374: self.type = self.type + token[1] 1375: token = self.token() 1376: if token == None or token[0] != "name" : 1377: self.error("parsing function type, name expected", token); 1378: return token 1379: self.type = self.type + token[1] 1380: nametok = token 1381: token = self.token() 1382: if token != None and token[0] == "sep" and token[1] == ')': 1383: self.type = self.type + token[1] 1384: token = self.token() 1385: if token != None and token[0] == "sep" and token[1] == '(': 1386: token = self.token() 1387: type = self.type; 1388: token = self.parseSignature(token); 1389: self.type = type; 1390: else: 1391: self.error("parsing function type, '(' expected", token); 1392: return token 1393: else: 1394: self.error("parsing function type, ')' expected", token); 1395: return token 1396: self.lexer.push(token) 1397: token = nametok 1398: return token 1399: 1400: # 1401: # do some lookahead for arrays 1402: # 1403: if token != None and token[0] == "name": 1404: nametok = token 1405: token = self.token() 1406: if token != None and token[0] == "sep" and token[1] == '[': 1407: self.type = self.type + nametok[1] 1408: while token != None and token[0] == "sep" and token[1] == '[': 1409: self.type = self.type + token[1] 1410: token = self.token() 1411: while token != None and token[0] != 'sep' and \ 1412: token[1] != ']' and token[1] != ';': 1413: self.type = self.type + token[1] 1414: token = self.token() 1415: if token != None and token[0] == 'sep' and token[1] == ']': 1416: self.type = self.type + token[1] 1417: token = self.token() 1418: else: 1419: self.error("parsing array type, ']' expected", token); 1420: return token 1421: elif token != None and token[0] == "sep" and token[1] == ':': 1422: # remove :12 in case it's a limited int size 1423: token = self.token() 1424: token = self.token() 1425: self.lexer.push(token) 1426: token = nametok 1427: 1428: return token 1429: 1430: # 1431: # Parse a signature: '(' has been parsed and we scan the type definition 1432: # up to the ')' included 1433: def parseSignature(self, token): 1434: signature = [] 1435: if token != None and token[0] == "sep" and token[1] == ')': 1436: self.signature = [] 1437: token = self.token() 1438: return token 1439: while token != None: 1440: token = self.parseType(token) 1441: if token != None and token[0] == "name": 1442: signature.append((self.type, token[1], None)) 1443: token = self.token() 1444: elif token != None and token[0] == "sep" and token[1] == ',': 1445: token = self.token() 1446: continue 1447: elif token != None and token[0] == "sep" and token[1] == ')': 1448: # only the type was provided 1449: if self.type == "...": 1450: signature.append((self.type, "...", None)) 1451: else: 1452: signature.append((self.type, None, None)) 1453: if token != None and token[0] == "sep": 1454: if token[1] == ',': 1455: token = self.token() 1456: continue 1457: elif token[1] == ')': 1458: token = self.token() 1459: break 1460: self.signature = signature 1461: return token 1462: 1463: # 1464: # Parse a global definition, be it a type, variable or function 1465: # the extern "C" blocks are a bit nasty and require it to recurse. 1466: # 1467: def parseGlobal(self, token): 1468: static = 0 1469: if token[1] == 'extern': 1470: token = self.token() 1471: if token == None: 1472: return token 1473: if token[0] == 'string': 1474: if token[1] == 'C': 1475: token = self.token() 1476: if token == None: 1477: return token 1478: if token[0] == 'sep' and token[1] == "{": 1479: token = self.token() 1480: # print 'Entering extern "C line ', self.lineno() 1481: while token != None and (token[0] != 'sep' or 1482: token[1] != "}"): 1483: if token[0] == 'name': 1484: token = self.parseGlobal(token) 1485: else: 1486: self.error( 1487: "token %s %s unexpected at the top level" % ( 1488: token[0], token[1])) 1489: token = self.parseGlobal(token) 1490: # print 'Exiting extern "C" line', self.lineno() 1491: token = self.token() 1492: return token 1493: else: 1494: return token 1495: elif token[1] == 'static': 1496: static = 1 1497: token = self.token() 1498: if token == None or token[0] != 'name': 1499: return token 1500: 1501: if token[1] == 'typedef': 1502: token = self.token() 1503: return self.parseTypedef(token) 1504: else: 1505: token = self.parseType(token) 1506: type_orig = self.type 1507: if token == None or token[0] != "name": 1508: return token 1509: type = type_orig 1510: self.name = token[1] 1511: token = self.token() 1512: while token != None and (token[0] == "sep" or token[0] == "op"): 1513: if token[0] == "sep": 1514: if token[1] == "[": 1515: type = type + token[1] 1516: token = self.token() 1517: while token != None and (token[0] != "sep" or \ 1518: token[1] != ";"): 1519: type = type + token[1] 1520: token = self.token() 1521: 1522: if token != None and token[0] == "op" and token[1] == "=": 1523: # 1524: # Skip the initialization of the variable 1525: # 1526: token = self.token() 1527: if token[0] == 'sep' and token[1] == '{': 1528: token = self.token() 1529: token = self.parseBlock(token) 1530: else: 1531: self.comment = None 1532: while token != None and (token[0] != "sep" or \ 1533: (token[1] != ';' and token[1] != ',')): 1534: token = self.token() 1535: self.comment = None 1536: if token == None or token[0] != "sep" or (token[1] != ';' and 1537: token[1] != ','): 1538: self.error("missing ';' or ',' after value") 1539: 1540: if token != None and token[0] == "sep": 1541: if token[1] == ";": 1542: self.comment = None 1543: token = self.token() 1544: if type == "struct": 1545: self.index_add(self.name, self.filename, 1546: not self.is_header, "struct", self.struct_fields) 1547: else: 1548: self.index_add(self.name, self.filename, 1549: not self.is_header, "variable", type) 1550: break 1551: elif token[1] == "(": 1552: token = self.token() 1553: token = self.parseSignature(token) 1554: if token == None: 1555: return None 1556: if token[0] == "sep" and token[1] == ";": 1557: d = self.mergeFunctionComment(self.name, 1558: ((type, None), self.signature), 1) 1559: self.index_add(self.name, self.filename, static, 1560: "function", d) 1561: token = self.token() 1562: elif token[0] == "sep" and token[1] == "{": 1563: d = self.mergeFunctionComment(self.name, 1564: ((type, None), self.signature), static) 1565: self.index_add(self.name, self.filename, static, 1566: "function", d) 1567: token = self.token() 1568: token = self.parseBlock(token); 1569: elif token[1] == ',': 1570: self.comment = None 1571: self.index_add(self.name, self.filename, static, 1572: "variable", type) 1573: type = type_orig 1574: token = self.token() 1575: while token != None and token[0] == "sep": 1576: type = type + token[1] 1577: token = self.token() 1578: if token != None and token[0] == "name": 1579: self.name = token[1] 1580: token = self.token() 1581: else: 1582: break 1583: 1584: return token 1585: 1586: def parse(self): 1587: self.warning("Parsing %s" % (self.filename)) 1588: token = self.token() 1589: while token != None: 1590: if token[0] == 'name': 1591: token = self.parseGlobal(token) 1592: else: 1593: self.error("token %s %s unexpected at the top level" % ( 1594: token[0], token[1])) 1595: token = self.parseGlobal(token) 1596: return 1597: self.parseTopComment(self.top_comment) 1598: return self.index 1599: 1600: 1601: class docBuilder: 1602: """A documentation builder""" 1603: def __init__(self, name, directories=['.'], excludes=[]): 1604: self.name = name 1605: self.directories = directories 1606: self.excludes = excludes + ignored_files.keys() 1607: self.modules = {} 1608: self.headers = {} 1609: self.idx = index() 1610: self.xref = {} 1611: self.index = {} 1612: if name == 'libxml2': 1613: self.basename = 'libxml' 1614: else: 1615: self.basename = name 1616: 1617: def indexString(self, id, str): 1618: if str == None: 1619: return 1620: str = string.replace(str, "'", ' ') 1621: str = string.replace(str, '"', ' ') 1622: str = string.replace(str, "/", ' ') 1623: str = string.replace(str, '*', ' ') 1624: str = string.replace(str, "[", ' ') 1625: str = string.replace(str, "]", ' ') 1626: str = string.replace(str, "(", ' ') 1627: str = string.replace(str, ")", ' ') 1628: str = string.replace(str, "<", ' ') 1629: str = string.replace(str, '>', ' ') 1630: str = string.replace(str, "&", ' ') 1631: str = string.replace(str, '#', ' ') 1632: str = string.replace(str, ",", ' ') 1633: str = string.replace(str, '.', ' ') 1634: str = string.replace(str, ';', ' ') 1635: tokens = string.split(str) 1636: for token in tokens: 1637: try: 1638: c = token[0] 1639: if string.find(string.letters, c) < 0: 1640: pass 1641: elif len(token) < 3: 1642: pass 1643: else: 1644: lower = string.lower(token) 1645: # TODO: generalize this a bit 1646: if lower == 'and' or lower == 'the': 1647: pass 1648: elif self.xref.has_key(token): 1649: self.xref[token].append(id) 1650: else: 1651: self.xref[token] = [id] 1652: except: 1653: pass 1654: 1655: def analyze(self): 1656: print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())) 1657: self.idx.analyze() 1658: 1659: def scanHeaders(self): 1660: for header in self.headers.keys(): 1661: parser = CParser(header) 1662: idx = parser.parse() 1663: self.headers[header] = idx; 1664: self.idx.merge(idx) 1665: 1666: def scanModules(self): 1667: for module in self.modules.keys(): 1668: parser = CParser(module) 1669: idx = parser.parse() 1670: # idx.analyze() 1671: self.modules[module] = idx 1672: self.idx.merge_public(idx) 1673: 1674: def scan(self): 1675: for directory in self.directories: 1676: files = glob.glob(directory + "/*.c") 1677: for file in files: 1678: skip = 0 1679: for excl in self.excludes: 1680: if string.find(file, excl) != -1: 1681: skip = 1; 1682: break 1683: if skip == 0: 1684: self.modules[file] = None; 1685: files = glob.glob(directory + "/*.h") 1686: for file in files: 1687: skip = 0 1688: for excl in self.excludes: 1689: if string.find(file, excl) != -1: 1690: skip = 1; 1691: break 1692: if skip == 0: 1693: self.headers[file] = None; 1694: self.scanHeaders() 1695: self.scanModules() 1696: 1697: def modulename_file(self, file): 1698: module = os.path.basename(file) 1699: if module[-2:] == '.h': 1700: module = module[:-2] 1701: elif module[-2:] == '.c': 1702: module = module[:-2] 1703: return module 1704: 1705: def serialize_enum(self, output, name): 1706: id = self.idx.enums[name] 1707: output.write(" <enum name='%s' file='%s'" % (name, 1708: self.modulename_file(id.header))) 1709: if id.info != None: 1710: info = id.info 1711: if info[0] != None and info[0] != '': 1712: try: 1713: val = eval(info[0]) 1714: except: 1715: val = info[0] 1716: output.write(" value='%s'" % (val)); 1717: if info[2] != None and info[2] != '': 1718: output.write(" type='%s'" % info[2]); 1719: if info[1] != None and info[1] != '': 1720: output.write(" info='%s'" % escape(info[1])); 1721: output.write("/>\n") 1722: 1723: def serialize_macro(self, output, name): 1724: id = self.idx.macros[name] 1725: output.write(" <macro name='%s' file='%s'>\n" % (name, 1726: self.modulename_file(id.header))) 1727: if id.info != None: 1728: try: 1729: (args, desc) = id.info 1730: if desc != None and desc != "": 1731: output.write(" <info>%s</info>\n" % (escape(desc))) 1732: self.indexString(name, desc) 1733: for arg in args: 1734: (name, desc) = arg 1735: if desc != None and desc != "": 1736: output.write(" <arg name='%s' info='%s'/>\n" % ( 1737: name, escape(desc))) 1738: self.indexString(name, desc) 1739: else: 1740: output.write(" <arg name='%s'/>\n" % (name)) 1741: except: 1742: pass 1743: output.write(" </macro>\n") 1744: 1745: def serialize_typedef(self, output, name): 1746: id = self.idx.typedefs[name] 1747: if id.info[0:7] == 'struct ': 1748: output.write(" <struct name='%s' file='%s' type='%s'" % ( 1749: name, self.modulename_file(id.header), id.info)) 1750: name = id.info[7:] 1751: if self.idx.structs.has_key(name) and ( \ 1752: type(self.idx.structs[name].info) == type(()) or 1753: type(self.idx.structs[name].info) == type([])): 1754: output.write(">\n"); 1755: try: 1756: for field in self.idx.structs[name].info: 1757: desc = field[2] 1758: self.indexString(name, desc) 1759: if desc == None: 1760: desc = '' 1761: else: 1762: desc = escape(desc) 1763: output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) 1764: except: 1765: print "Failed to serialize struct %s" % (name) 1766: output.write(" </struct>\n") 1767: else: 1768: output.write("/>\n"); 1769: else : 1770: output.write(" <typedef name='%s' file='%s' type='%s'" % ( 1771: name, self.modulename_file(id.header), id.info)) 1772: try: 1773: desc = id.extra 1774: if desc != None and desc != "": 1775: output.write(">\n <info>%s</info>\n" % (escape(desc))) 1776: output.write(" </typedef>\n") 1777: else: 1778: output.write("/>\n") 1779: except: 1780: output.write("/>\n") 1781: 1782: def serialize_variable(self, output, name): 1783: id = self.idx.variables[name] 1784: if id.info != None: 1785: output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( 1786: name, self.modulename_file(id.header), id.info)) 1787: else: 1788: output.write(" <variable name='%s' file='%s'/>\n" % ( 1789: name, self.modulename_file(id.header))) 1790: 1791: def serialize_function(self, output, name): 1792: id = self.idx.functions[name] 1793: if name == debugsym: 1794: print "=>", id 1795: 1796: output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, 1797: name, self.modulename_file(id.header), 1798: self.modulename_file(id.module))) 1799: # 1800: # Processing of conditionals modified by Bill 1/1/05 1801: # 1802: if id.conditionals != None: 1803: apstr = "" 1804: for cond in id.conditionals: 1805: if apstr != "": 1806: apstr = apstr + " && " 1807: apstr = apstr + cond 1808: output.write(" <cond>%s</cond>\n"% (apstr)); 1809: try: 1810: (ret, params, desc) = id.info 1811: if (desc == None or desc == '') and \ 1812: name[0:9] != "xmlThrDef" and name != "xmlDllMain": 1813: print "%s %s from %s has no description" % (id.type, name, 1814: self.modulename_file(id.module)) 1815: 1816: output.write(" <info>%s</info>\n" % (escape(desc))) 1817: self.indexString(name, desc) 1818: if ret[0] != None: 1819: if ret[0] == "void": 1820: output.write(" <return type='void'/>\n") 1821: else: 1822: output.write(" <return type='%s' info='%s'/>\n" % ( 1823: ret[0], escape(ret[1]))) 1824: self.indexString(name, ret[1]) 1825: for param in params: 1826: if param[0] == 'void': 1827: continue 1828: if param[2] == None: 1829: output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1830: else: 1831: output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1832: self.indexString(name, param[2]) 1833: except: 1834: print "Failed to save function %s info: " % name, `id.info` 1835: output.write(" </%s>\n" % (id.type)) 1836: 1837: def serialize_exports(self, output, file): 1838: module = self.modulename_file(file) 1839: output.write(" <file name='%s'>\n" % (module)) 1840: dict = self.headers[file] 1841: if dict.info != None: 1842: for data in ('Summary', 'Description', 'Author'): 1843: try: 1844: output.write(" <%s>%s</%s>\n" % ( 1845: string.lower(data), 1846: escape(dict.info[data]), 1847: string.lower(data))) 1848: except: 1849: print "Header %s lacks a %s description" % (module, data) 1850: if dict.info.has_key('Description'): 1851: desc = dict.info['Description'] 1852: if string.find(desc, "DEPRECATED") != -1: 1853: output.write(" <deprecated/>\n") 1854: 1855: ids = dict.macros.keys() 1856: ids.sort() 1857: for id in uniq(ids): 1858: # Macros are sometime used to masquerade other types. 1859: if dict.functions.has_key(id): 1860: continue 1861: if dict.variables.has_key(id): 1862: continue 1863: if dict.typedefs.has_key(id): 1864: continue 1865: if dict.structs.has_key(id): 1866: continue 1867: if dict.enums.has_key(id): 1868: continue 1869: output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) 1870: ids = dict.enums.keys() 1871: ids.sort() 1872: for id in uniq(ids): 1873: output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) 1874: ids = dict.typedefs.keys() 1875: ids.sort() 1876: for id in uniq(ids): 1877: output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) 1878: ids = dict.structs.keys() 1879: ids.sort() 1880: for id in uniq(ids): 1881: output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) 1882: ids = dict.variables.keys() 1883: ids.sort() 1884: for id in uniq(ids): 1885: output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) 1886: ids = dict.functions.keys() 1887: ids.sort() 1888: for id in uniq(ids): 1889: output.write(" <exports symbol='%s' type='function'/>\n" % (id)) 1890: output.write(" </file>\n") 1891: 1892: def serialize_xrefs_files(self, output): 1893: headers = self.headers.keys() 1894: headers.sort() 1895: for file in headers: 1896: module = self.modulename_file(file) 1897: output.write(" <file name='%s'>\n" % (module)) 1898: dict = self.headers[file] 1899: ids = uniq(dict.functions.keys() + dict.variables.keys() + \ 1900: dict.macros.keys() + dict.typedefs.keys() + \ 1901: dict.structs.keys() + dict.enums.keys()) 1902: ids.sort() 1903: for id in ids: 1904: output.write(" <ref name='%s'/>\n" % (id)) 1905: output.write(" </file>\n") 1906: pass 1907: 1908: def serialize_xrefs_functions(self, output): 1909: funcs = {} 1910: for name in self.idx.functions.keys(): 1911: id = self.idx.functions[name] 1912: try: 1913: (ret, params, desc) = id.info 1914: for param in params: 1915: if param[0] == 'void': 1916: continue 1917: if funcs.has_key(param[0]): 1918: funcs[param[0]].append(name) 1919: else: 1920: funcs[param[0]] = [name] 1921: except: 1922: pass 1923: typ = funcs.keys() 1924: typ.sort() 1925: for type in typ: 1926: if type == '' or type == 'void' or type == "int" or \ 1927: type == "char *" or type == "const char *" : 1928: continue 1929: output.write(" <type name='%s'>\n" % (type)) 1930: ids = funcs[type] 1931: ids.sort() 1932: pid = '' # not sure why we have dups, but get rid of them! 1933: for id in ids: 1934: if id != pid: 1935: output.write(" <ref name='%s'/>\n" % (id)) 1936: pid = id 1937: output.write(" </type>\n") 1938: 1939: def serialize_xrefs_constructors(self, output): 1940: funcs = {} 1941: for name in self.idx.functions.keys(): 1942: id = self.idx.functions[name] 1943: try: 1944: (ret, params, desc) = id.info 1945: if ret[0] == "void": 1946: continue 1947: if funcs.has_key(ret[0]): 1948: funcs[ret[0]].append(name) 1949: else: 1950: funcs[ret[0]] = [name] 1951: except: 1952: pass 1953: typ = funcs.keys() 1954: typ.sort() 1955: for type in typ: 1956: if type == '' or type == 'void' or type == "int" or \ 1957: type == "char *" or type == "const char *" : 1958: continue 1959: output.write(" <type name='%s'>\n" % (type)) 1960: ids = funcs[type] 1961: ids.sort() 1962: for id in ids: 1963: output.write(" <ref name='%s'/>\n" % (id)) 1964: output.write(" </type>\n") 1965: 1966: def serialize_xrefs_alpha(self, output): 1967: letter = None 1968: ids = self.idx.identifiers.keys() 1969: ids.sort() 1970: for id in ids: 1971: if id[0] != letter: 1972: if letter != None: 1973: output.write(" </letter>\n") 1974: letter = id[0] 1975: output.write(" <letter name='%s'>\n" % (letter)) 1976: output.write(" <ref name='%s'/>\n" % (id)) 1977: if letter != None: 1978: output.write(" </letter>\n") 1979: 1980: def serialize_xrefs_references(self, output): 1981: typ = self.idx.identifiers.keys() 1982: typ.sort() 1983: for id in typ: 1984: idf = self.idx.identifiers[id] 1985: module = idf.header 1986: output.write(" <reference name='%s' href='%s'/>\n" % (id, 1987: 'html/' + self.basename + '-' + 1988: self.modulename_file(module) + '.html#' + 1989: id)) 1990: 1991: def serialize_xrefs_index(self, output): 1992: index = self.xref 1993: typ = index.keys() 1994: typ.sort() 1995: letter = None 1996: count = 0 1997: chunk = 0 1998: chunks = [] 1999: for id in typ: 2000: if len(index[id]) > 30: 2001: continue 2002: if id[0] != letter: 2003: if letter == None or count > 200: 2004: if letter != None: 2005: output.write(" </letter>\n") 2006: output.write(" </chunk>\n") 2007: count = 0 2008: chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 2009: output.write(" <chunk name='chunk%s'>\n" % (chunk)) 2010: first_letter = id[0] 2011: chunk = chunk + 1 2012: elif letter != None: 2013: output.write(" </letter>\n") 2014: letter = id[0] 2015: output.write(" <letter name='%s'>\n" % (letter)) 2016: output.write(" <word name='%s'>\n" % (id)) 2017: tokens = index[id]; 2018: tokens.sort() 2019: tok = None 2020: for token in tokens: 2021: if tok == token: 2022: continue 2023: tok = token 2024: output.write(" <ref name='%s'/>\n" % (token)) 2025: count = count + 1 2026: output.write(" </word>\n") 2027: if letter != None: 2028: output.write(" </letter>\n") 2029: output.write(" </chunk>\n") 2030: if count != 0: 2031: chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 2032: output.write(" <chunks>\n") 2033: for ch in chunks: 2034: output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % ( 2035: ch[0], ch[1], ch[2])) 2036: output.write(" </chunks>\n") 2037: 2038: def serialize_xrefs(self, output): 2039: output.write(" <references>\n") 2040: self.serialize_xrefs_references(output) 2041: output.write(" </references>\n") 2042: output.write(" <alpha>\n") 2043: self.serialize_xrefs_alpha(output) 2044: output.write(" </alpha>\n") 2045: output.write(" <constructors>\n") 2046: self.serialize_xrefs_constructors(output) 2047: output.write(" </constructors>\n") 2048: output.write(" <functions>\n") 2049: self.serialize_xrefs_functions(output) 2050: output.write(" </functions>\n") 2051: output.write(" <files>\n") 2052: self.serialize_xrefs_files(output) 2053: output.write(" </files>\n") 2054: output.write(" <index>\n") 2055: self.serialize_xrefs_index(output) 2056: output.write(" </index>\n") 2057: 2058: def serialize(self): 2059: filename = "%s-api.xml" % self.name 2060: print "Saving XML description %s" % (filename) 2061: output = open(filename, "w") 2062: output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2063: output.write("<api name='%s'>\n" % self.name) 2064: output.write(" <files>\n") 2065: headers = self.headers.keys() 2066: headers.sort() 2067: for file in headers: 2068: self.serialize_exports(output, file) 2069: output.write(" </files>\n") 2070: output.write(" <symbols>\n") 2071: macros = self.idx.macros.keys() 2072: macros.sort() 2073: for macro in macros: 2074: self.serialize_macro(output, macro) 2075: enums = self.idx.enums.keys() 2076: enums.sort() 2077: for enum in enums: 2078: self.serialize_enum(output, enum) 2079: typedefs = self.idx.typedefs.keys() 2080: typedefs.sort() 2081: for typedef in typedefs: 2082: self.serialize_typedef(output, typedef) 2083: variables = self.idx.variables.keys() 2084: variables.sort() 2085: for variable in variables: 2086: self.serialize_variable(output, variable) 2087: functions = self.idx.functions.keys() 2088: functions.sort() 2089: for function in functions: 2090: self.serialize_function(output, function) 2091: output.write(" </symbols>\n") 2092: output.write("</api>\n") 2093: output.close() 2094: 2095: filename = "%s-refs.xml" % self.name 2096: print "Saving XML Cross References %s" % (filename) 2097: output = open(filename, "w") 2098: output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2099: output.write("<apirefs name='%s'>\n" % self.name) 2100: self.serialize_xrefs(output) 2101: output.write("</apirefs>\n") 2102: output.close() 2103: 2104: 2105: def rebuild(): 2106: builder = None 2107: if glob.glob("parser.c") != [] : 2108: print "Rebuilding API description for libxml2" 2109: builder = docBuilder("libxml2", [".", "."], 2110: ["xmlwin32version.h", "tst.c"]) 2111: elif glob.glob("../parser.c") != [] : 2112: print "Rebuilding API description for libxml2" 2113: builder = docBuilder("libxml2", ["..", "../include/libxml"], 2114: ["xmlwin32version.h", "tst.c"]) 2115: elif glob.glob("../libxslt/transform.c") != [] : 2116: print "Rebuilding API description for libxslt" 2117: builder = docBuilder("libxslt", ["../libxslt"], 2118: ["win32config.h", "libxslt.h", "tst.c"]) 2119: else: 2120: print "rebuild() failed, unable to guess the module" 2121: return None 2122: builder.scan() 2123: builder.analyze() 2124: builder.serialize() 2125: if glob.glob("../libexslt/exslt.c") != [] : 2126: extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 2127: extra.scan() 2128: extra.analyze() 2129: extra.serialize() 2130: return builder 2131: 2132: # 2133: # for debugging the parser 2134: # 2135: def parse(filename): 2136: parser = CParser(filename) 2137: idx = parser.parse() 2138: return idx 2139: 2140: if __name__ == "__main__": 2141: if len(sys.argv) > 1: 2142: debug = 1 2143: parse(sys.argv[1]) 2144: else: 2145: rebuild()