embedaddon/libxml2/doc/apibuild.py - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / doc / apibuild.py
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:37:59 2012 UTC (12 years, 4 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_7_8, HEAD

libxml2

1: #!/usr/bin/python -u 2: # 3: # This is the API builder, it parses the C sources and build the 4: # API formal description in XML. 5: # 6: # See Copyright for the status of this software. 7: # 8: # daniel@veillard.com 9: # 10: import os, sys 11: import string 12: import glob 13: 14: debug=0 15: #debugsym='ignorableWhitespaceSAXFunc' 16: debugsym=None 17: 18: # 19: # C parser analysis code 20: # 21: ignored_files = { 22: "trio": "too many non standard macros", 23: "trio.c": "too many non standard macros", 24: "trionan.c": "too many non standard macros", 25: "triostr.c": "too many non standard macros", 26: "acconfig.h": "generated portability layer", 27: "config.h": "generated portability layer", 28: "libxml.h": "internal only", 29: "testOOM.c": "out of memory tester", 30: "testOOMlib.h": "out of memory tester", 31: "testOOMlib.c": "out of memory tester", 32: "rngparser.c": "not yet integrated", 33: "rngparser.h": "not yet integrated", 34: "elfgcchack.h": "not a normal header", 35: "testHTML.c": "test tool", 36: "testReader.c": "test tool", 37: "testSchemas.c": "test tool", 38: "testXPath.c": "test tool", 39: "testAutomata.c": "test tool", 40: "testModule.c": "test tool", 41: "testRegexp.c": "test tool", 42: "testThreads.c": "test tool", 43: "testC14N.c": "test tool", 44: "testRelax.c": "test tool", 45: "testThreadsWin32.c": "test tool", 46: "testSAX.c": "test tool", 47: "testURI.c": "test tool", 48: "testapi.c": "generated regression tests", 49: "runtest.c": "regression tests program", 50: "runsuite.c": "regression tests program", 51: "tst.c": "not part of the library", 52: "test.c": "not part of the library", 53: "testdso.c": "test for dynamid shared libraries", 54: "testrecurse.c": "test for entities recursions", 55: } 56: 57: ignored_words = { 58: "WINAPI": (0, "Windows keyword"), 59: "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 60: "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 61: "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), 62: "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), 63: "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 64: "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 65: "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 66: "XMLCALL": (0, "Special macro for win32 calls"), 67: "XSLTCALL": (0, "Special macro for win32 calls"), 68: "XMLCDECL": (0, "Special macro for win32 calls"), 69: "EXSLTCALL": (0, "Special macro for win32 calls"), 70: "__declspec": (3, "Windows keyword"), 71: "__stdcall": (0, "Windows keyword"), 72: "ATTRIBUTE_UNUSED": (0, "macro keyword"), 73: "LIBEXSLT_PUBLIC": (0, "macro keyword"), 74: "X_IN_Y": (5, "macro function builder"), 75: "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"), 76: "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"), 77: "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"), 78: "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"), 79: } 80: 81: def escape(raw): 82: raw = string.replace(raw, '&', '&') 83: raw = string.replace(raw, '<', '<') 84: raw = string.replace(raw, '>', '>') 85: raw = string.replace(raw, "'", ''') 86: raw = string.replace(raw, '"', '"') 87: return raw 88: 89: def uniq(items): 90: d = {} 91: for item in items: 92: d[item]=1 93: return d.keys() 94: 95: class identifier: 96: def __init__(self, name, header=None, module=None, type=None, lineno = 0, 97: info=None, extra=None, conditionals = None): 98: self.name = name 99: self.header = header 100: self.module = module 101: self.type = type 102: self.info = info 103: self.extra = extra 104: self.lineno = lineno 105: self.static = 0 106: if conditionals == None or len(conditionals) == 0: 107: self.conditionals = None 108: else: 109: self.conditionals = conditionals[:] 110: if self.name == debugsym: 111: print "=> define %s : %s" % (debugsym, (module, type, info, 112: extra, conditionals)) 113: 114: def __repr__(self): 115: r = "%s %s:" % (self.type, self.name) 116: if self.static: 117: r = r + " static" 118: if self.module != None: 119: r = r + " from %s" % (self.module) 120: if self.info != None: 121: r = r + " " + `self.info` 122: if self.extra != None: 123: r = r + " " + `self.extra` 124: if self.conditionals != None: 125: r = r + " " + `self.conditionals` 126: return r 127: 128: 129: def set_header(self, header): 130: self.header = header 131: def set_module(self, module): 132: self.module = module 133: def set_type(self, type): 134: self.type = type 135: def set_info(self, info): 136: self.info = info 137: def set_extra(self, extra): 138: self.extra = extra 139: def set_lineno(self, lineno): 140: self.lineno = lineno 141: def set_static(self, static): 142: self.static = static 143: def set_conditionals(self, conditionals): 144: if conditionals == None or len(conditionals) == 0: 145: self.conditionals = None 146: else: 147: self.conditionals = conditionals[:] 148: 149: def get_name(self): 150: return self.name 151: def get_header(self): 152: return self.module 153: def get_module(self): 154: return self.module 155: def get_type(self): 156: return self.type 157: def get_info(self): 158: return self.info 159: def get_lineno(self): 160: return self.lineno 161: def get_extra(self): 162: return self.extra 163: def get_static(self): 164: return self.static 165: def get_conditionals(self): 166: return self.conditionals 167: 168: def update(self, header, module, type = None, info = None, extra=None, 169: conditionals=None): 170: if self.name == debugsym: 171: print "=> update %s : %s" % (debugsym, (module, type, info, 172: extra, conditionals)) 173: if header != None and self.header == None: 174: self.set_header(module) 175: if module != None and (self.module == None or self.header == self.module): 176: self.set_module(module) 177: if type != None and self.type == None: 178: self.set_type(type) 179: if info != None: 180: self.set_info(info) 181: if extra != None: 182: self.set_extra(extra) 183: if conditionals != None: 184: self.set_conditionals(conditionals) 185: 186: class index: 187: def __init__(self, name = "noname"): 188: self.name = name 189: self.identifiers = {} 190: self.functions = {} 191: self.variables = {} 192: self.includes = {} 193: self.structs = {} 194: self.enums = {} 195: self.typedefs = {} 196: self.macros = {} 197: self.references = {} 198: self.info = {} 199: 200: def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 201: if name[0:2] == '__': 202: return None 203: d = None 204: try: 205: d = self.identifiers[name] 206: d.update(header, module, type, lineno, info, extra, conditionals) 207: except: 208: d = identifier(name, header, module, type, lineno, info, extra, conditionals) 209: self.identifiers[name] = d 210: 211: if d != None and static == 1: 212: d.set_static(1) 213: 214: if d != None and name != None and type != None: 215: self.references[name] = d 216: 217: if name == debugsym: 218: print "New ref: %s" % (d) 219: 220: return d 221: 222: def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 223: if name[0:2] == '__': 224: return None 225: d = None 226: try: 227: d = self.identifiers[name] 228: d.update(header, module, type, lineno, info, extra, conditionals) 229: except: 230: d = identifier(name, header, module, type, lineno, info, extra, conditionals) 231: self.identifiers[name] = d 232: 233: if d != None and static == 1: 234: d.set_static(1) 235: 236: if d != None and name != None and type != None: 237: if type == "function": 238: self.functions[name] = d 239: elif type == "functype": 240: self.functions[name] = d 241: elif type == "variable": 242: self.variables[name] = d 243: elif type == "include": 244: self.includes[name] = d 245: elif type == "struct": 246: self.structs[name] = d 247: elif type == "enum": 248: self.enums[name] = d 249: elif type == "typedef": 250: self.typedefs[name] = d 251: elif type == "macro": 252: self.macros[name] = d 253: else: 254: print "Unable to register type ", type 255: 256: if name == debugsym: 257: print "New symbol: %s" % (d) 258: 259: return d 260: 261: def merge(self, idx): 262: for id in idx.functions.keys(): 263: # 264: # macro might be used to override functions or variables 265: # definitions 266: # 267: if self.macros.has_key(id): 268: del self.macros[id] 269: if self.functions.has_key(id): 270: print "function %s from %s redeclared in %s" % ( 271: id, self.functions[id].header, idx.functions[id].header) 272: else: 273: self.functions[id] = idx.functions[id] 274: self.identifiers[id] = idx.functions[id] 275: for id in idx.variables.keys(): 276: # 277: # macro might be used to override functions or variables 278: # definitions 279: # 280: if self.macros.has_key(id): 281: del self.macros[id] 282: if self.variables.has_key(id): 283: print "variable %s from %s redeclared in %s" % ( 284: id, self.variables[id].header, idx.variables[id].header) 285: else: 286: self.variables[id] = idx.variables[id] 287: self.identifiers[id] = idx.variables[id] 288: for id in idx.structs.keys(): 289: if self.structs.has_key(id): 290: print "struct %s from %s redeclared in %s" % ( 291: id, self.structs[id].header, idx.structs[id].header) 292: else: 293: self.structs[id] = idx.structs[id] 294: self.identifiers[id] = idx.structs[id] 295: for id in idx.typedefs.keys(): 296: if self.typedefs.has_key(id): 297: print "typedef %s from %s redeclared in %s" % ( 298: id, self.typedefs[id].header, idx.typedefs[id].header) 299: else: 300: self.typedefs[id] = idx.typedefs[id] 301: self.identifiers[id] = idx.typedefs[id] 302: for id in idx.macros.keys(): 303: # 304: # macro might be used to override functions or variables 305: # definitions 306: # 307: if self.variables.has_key(id): 308: continue 309: if self.functions.has_key(id): 310: continue 311: if self.enums.has_key(id): 312: continue 313: if self.macros.has_key(id): 314: print "macro %s from %s redeclared in %s" % ( 315: id, self.macros[id].header, idx.macros[id].header) 316: else: 317: self.macros[id] = idx.macros[id] 318: self.identifiers[id] = idx.macros[id] 319: for id in idx.enums.keys(): 320: if self.enums.has_key(id): 321: print "enum %s from %s redeclared in %s" % ( 322: id, self.enums[id].header, idx.enums[id].header) 323: else: 324: self.enums[id] = idx.enums[id] 325: self.identifiers[id] = idx.enums[id] 326: 327: def merge_public(self, idx): 328: for id in idx.functions.keys(): 329: if self.functions.has_key(id): 330: # check that function condition agrees with header 331: if idx.functions[id].conditionals != \ 332: self.functions[id].conditionals: 333: print "Header condition differs from Function for %s:" \ 334: % id 335: print " H: %s" % self.functions[id].conditionals 336: print " C: %s" % idx.functions[id].conditionals 337: up = idx.functions[id] 338: self.functions[id].update(None, up.module, up.type, up.info, up.extra) 339: # else: 340: # print "Function %s from %s is not declared in headers" % ( 341: # id, idx.functions[id].module) 342: # TODO: do the same for variables. 343: 344: def analyze_dict(self, type, dict): 345: count = 0 346: public = 0 347: for name in dict.keys(): 348: id = dict[name] 349: count = count + 1 350: if id.static == 0: 351: public = public + 1 352: if count != public: 353: print " %d %s , %d public" % (count, type, public) 354: elif count != 0: 355: print " %d public %s" % (count, type) 356: 357: 358: def analyze(self): 359: self.analyze_dict("functions", self.functions) 360: self.analyze_dict("variables", self.variables) 361: self.analyze_dict("structs", self.structs) 362: self.analyze_dict("typedefs", self.typedefs) 363: self.analyze_dict("macros", self.macros) 364: 365: class CLexer: 366: """A lexer for the C language, tokenize the input by reading and 367: analyzing it line by line""" 368: def __init__(self, input): 369: self.input = input 370: self.tokens = [] 371: self.line = "" 372: self.lineno = 0 373: 374: def getline(self): 375: line = '' 376: while line == '': 377: line = self.input.readline() 378: if not line: 379: return None 380: self.lineno = self.lineno + 1 381: line = string.lstrip(line) 382: line = string.rstrip(line) 383: if line == '': 384: continue 385: while line[-1] == '\\': 386: line = line[:-1] 387: n = self.input.readline() 388: self.lineno = self.lineno + 1 389: n = string.lstrip(n) 390: n = string.rstrip(n) 391: if not n: 392: break 393: else: 394: line = line + n 395: return line 396: 397: def getlineno(self): 398: return self.lineno 399: 400: def push(self, token): 401: self.tokens.insert(0, token); 402: 403: def debug(self): 404: print "Last token: ", self.last 405: print "Token queue: ", self.tokens 406: print "Line %d end: " % (self.lineno), self.line 407: 408: def token(self): 409: while self.tokens == []: 410: if self.line == "": 411: line = self.getline() 412: else: 413: line = self.line 414: self.line = "" 415: if line == None: 416: return None 417: 418: if line[0] == '#': 419: self.tokens = map((lambda x: ('preproc', x)), 420: string.split(line)) 421: break; 422: l = len(line) 423: if line[0] == '"' or line[0] == "'": 424: end = line[0] 425: line = line[1:] 426: found = 0 427: tok = "" 428: while found == 0: 429: i = 0 430: l = len(line) 431: while i < l: 432: if line[i] == end: 433: self.line = line[i+1:] 434: line = line[:i] 435: l = i 436: found = 1 437: break 438: if line[i] == '\\': 439: i = i + 1 440: i = i + 1 441: tok = tok + line 442: if found == 0: 443: line = self.getline() 444: if line == None: 445: return None 446: self.last = ('string', tok) 447: return self.last 448: 449: if l >= 2 and line[0] == '/' and line[1] == '*': 450: line = line[2:] 451: found = 0 452: tok = "" 453: while found == 0: 454: i = 0 455: l = len(line) 456: while i < l: 457: if line[i] == '*' and i+1 < l and line[i+1] == '/': 458: self.line = line[i+2:] 459: line = line[:i-1] 460: l = i 461: found = 1 462: break 463: i = i + 1 464: if tok != "": 465: tok = tok + "\n" 466: tok = tok + line 467: if found == 0: 468: line = self.getline() 469: if line == None: 470: return None 471: self.last = ('comment', tok) 472: return self.last 473: if l >= 2 and line[0] == '/' and line[1] == '/': 474: line = line[2:] 475: self.last = ('comment', line) 476: return self.last 477: i = 0 478: while i < l: 479: if line[i] == '/' and i+1 < l and line[i+1] == '/': 480: self.line = line[i:] 481: line = line[:i] 482: break 483: if line[i] == '/' and i+1 < l and line[i+1] == '*': 484: self.line = line[i:] 485: line = line[:i] 486: break 487: if line[i] == '"' or line[i] == "'": 488: self.line = line[i:] 489: line = line[:i] 490: break 491: i = i + 1 492: l = len(line) 493: i = 0 494: while i < l: 495: if line[i] == ' ' or line[i] == '\t': 496: i = i + 1 497: continue 498: o = ord(line[i]) 499: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 500: (o >= 48 and o <= 57): 501: s = i 502: while i < l: 503: o = ord(line[i]) 504: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 505: (o >= 48 and o <= 57) or string.find( 506: " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1: 507: i = i + 1 508: else: 509: break 510: self.tokens.append(('name', line[s:i])) 511: continue 512: if string.find("(){}:;,[]", line[i]) != -1: 513: # if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 514: # line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 515: # line[i] == ',' or line[i] == '[' or line[i] == ']': 516: self.tokens.append(('sep', line[i])) 517: i = i + 1 518: continue 519: if string.find("+-*><=/%&!|.", line[i]) != -1: 520: # if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 521: # line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 522: # line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 523: # line[i] == '!' or line[i] == '|' or line[i] == '.': 524: if line[i] == '.' and i + 2 < l and \ 525: line[i+1] == '.' and line[i+2] == '.': 526: self.tokens.append(('name', '...')) 527: i = i + 3 528: continue 529: 530: j = i + 1 531: if j < l and ( 532: string.find("+-*><=/%&!|", line[j]) != -1): 533: # line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 534: # line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 535: # line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 536: # line[j] == '!' or line[j] == '|'): 537: self.tokens.append(('op', line[i:j+1])) 538: i = j + 1 539: else: 540: self.tokens.append(('op', line[i])) 541: i = i + 1 542: continue 543: s = i 544: while i < l: 545: o = ord(line[i]) 546: if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 547: (o >= 48 and o <= 57) or ( 548: string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1): 549: # line[i] != ' ' and line[i] != '\t' and 550: # line[i] != '(' and line[i] != ')' and 551: # line[i] != '{' and line[i] != '}' and 552: # line[i] != ':' and line[i] != ';' and 553: # line[i] != ',' and line[i] != '+' and 554: # line[i] != '-' and line[i] != '*' and 555: # line[i] != '/' and line[i] != '%' and 556: # line[i] != '&' and line[i] != '!' and 557: # line[i] != '|' and line[i] != '[' and 558: # line[i] != ']' and line[i] != '=' and 559: # line[i] != '*' and line[i] != '>' and 560: # line[i] != '<'): 561: i = i + 1 562: else: 563: break 564: self.tokens.append(('name', line[s:i])) 565: 566: tok = self.tokens[0] 567: self.tokens = self.tokens[1:] 568: self.last = tok 569: return tok 570: 571: class CParser: 572: """The C module parser""" 573: def __init__(self, filename, idx = None): 574: self.filename = filename 575: if len(filename) > 2 and filename[-2:] == '.h': 576: self.is_header = 1 577: else: 578: self.is_header = 0 579: self.input = open(filename) 580: self.lexer = CLexer(self.input) 581: if idx == None: 582: self.index = index() 583: else: 584: self.index = idx 585: self.top_comment = "" 586: self.last_comment = "" 587: self.comment = None 588: self.collect_ref = 0 589: self.no_error = 0 590: self.conditionals = [] 591: self.defines = [] 592: 593: def collect_references(self): 594: self.collect_ref = 1 595: 596: def stop_error(self): 597: self.no_error = 1 598: 599: def start_error(self): 600: self.no_error = 0 601: 602: def lineno(self): 603: return self.lexer.getlineno() 604: 605: def index_add(self, name, module, static, type, info=None, extra = None): 606: if self.is_header == 1: 607: self.index.add(name, module, module, static, type, self.lineno(), 608: info, extra, self.conditionals) 609: else: 610: self.index.add(name, None, module, static, type, self.lineno(), 611: info, extra, self.conditionals) 612: 613: def index_add_ref(self, name, module, static, type, info=None, 614: extra = None): 615: if self.is_header == 1: 616: self.index.add_ref(name, module, module, static, type, 617: self.lineno(), info, extra, self.conditionals) 618: else: 619: self.index.add_ref(name, None, module, static, type, self.lineno(), 620: info, extra, self.conditionals) 621: 622: def warning(self, msg): 623: if self.no_error: 624: return 625: print msg 626: 627: def error(self, msg, token=-1): 628: if self.no_error: 629: return 630: 631: print "Parse Error: " + msg 632: if token != -1: 633: print "Got token ", token 634: self.lexer.debug() 635: sys.exit(1) 636: 637: def debug(self, msg, token=-1): 638: print "Debug: " + msg 639: if token != -1: 640: print "Got token ", token 641: self.lexer.debug() 642: 643: def parseTopComment(self, comment): 644: res = {} 645: lines = string.split(comment, "\n") 646: item = None 647: for line in lines: 648: while line != "" and (line[0] == ' ' or line[0] == '\t'): 649: line = line[1:] 650: while line != "" and line[0] == '*': 651: line = line[1:] 652: while line != "" and (line[0] == ' ' or line[0] == '\t'): 653: line = line[1:] 654: try: 655: (it, line) = string.split(line, ":", 1) 656: item = it 657: while line != "" and (line[0] == ' ' or line[0] == '\t'): 658: line = line[1:] 659: if res.has_key(item): 660: res[item] = res[item] + " " + line 661: else: 662: res[item] = line 663: except: 664: if item != None: 665: if res.has_key(item): 666: res[item] = res[item] + " " + line 667: else: 668: res[item] = line 669: self.index.info = res 670: 671: def parseComment(self, token): 672: if self.top_comment == "": 673: self.top_comment = token[1] 674: if self.comment == None or token[1][0] == '*': 675: self.comment = token[1]; 676: else: 677: self.comment = self.comment + token[1] 678: token = self.lexer.token() 679: 680: if string.find(self.comment, "DOC_DISABLE") != -1: 681: self.stop_error() 682: 683: if string.find(self.comment, "DOC_ENABLE") != -1: 684: self.start_error() 685: 686: return token 687: 688: # 689: # Parse a comment block associate to a typedef 690: # 691: def parseTypeComment(self, name, quiet = 0): 692: if name[0:2] == '__': 693: quiet = 1 694: 695: args = [] 696: desc = "" 697: 698: if self.comment == None: 699: if not quiet: 700: self.warning("Missing comment for type %s" % (name)) 701: return((args, desc)) 702: if self.comment[0] != '*': 703: if not quiet: 704: self.warning("Missing * in type comment for %s" % (name)) 705: return((args, desc)) 706: lines = string.split(self.comment, '\n') 707: if lines[0] == '*': 708: del lines[0] 709: if lines[0] != "* %s:" % (name): 710: if not quiet: 711: self.warning("Misformatted type comment for %s" % (name)) 712: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 713: return((args, desc)) 714: del lines[0] 715: while len(lines) > 0 and lines[0] == '*': 716: del lines[0] 717: desc = "" 718: while len(lines) > 0: 719: l = lines[0] 720: while len(l) > 0 and l[0] == '*': 721: l = l[1:] 722: l = string.strip(l) 723: desc = desc + " " + l 724: del lines[0] 725: 726: desc = string.strip(desc) 727: 728: if quiet == 0: 729: if desc == "": 730: self.warning("Type comment for %s lack description of the macro" % (name)) 731: 732: return(desc) 733: # 734: # Parse a comment block associate to a macro 735: # 736: def parseMacroComment(self, name, quiet = 0): 737: if name[0:2] == '__': 738: quiet = 1 739: 740: args = [] 741: desc = "" 742: 743: if self.comment == None: 744: if not quiet: 745: self.warning("Missing comment for macro %s" % (name)) 746: return((args, desc)) 747: if self.comment[0] != '*': 748: if not quiet: 749: self.warning("Missing * in macro comment for %s" % (name)) 750: return((args, desc)) 751: lines = string.split(self.comment, '\n') 752: if lines[0] == '*': 753: del lines[0] 754: if lines[0] != "* %s:" % (name): 755: if not quiet: 756: self.warning("Misformatted macro comment for %s" % (name)) 757: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 758: return((args, desc)) 759: del lines[0] 760: while lines[0] == '*': 761: del lines[0] 762: while len(lines) > 0 and lines[0][0:3] == '* @': 763: l = lines[0][3:] 764: try: 765: (arg, desc) = string.split(l, ':', 1) 766: desc=string.strip(desc) 767: arg=string.strip(arg) 768: except: 769: if not quiet: 770: self.warning("Misformatted macro comment for %s" % (name)) 771: self.warning(" problem with '%s'" % (lines[0])) 772: del lines[0] 773: continue 774: del lines[0] 775: l = string.strip(lines[0]) 776: while len(l) > 2 and l[0:3] != '* @': 777: while l[0] == '*': 778: l = l[1:] 779: desc = desc + ' ' + string.strip(l) 780: del lines[0] 781: if len(lines) == 0: 782: break 783: l = lines[0] 784: args.append((arg, desc)) 785: while len(lines) > 0 and lines[0] == '*': 786: del lines[0] 787: desc = "" 788: while len(lines) > 0: 789: l = lines[0] 790: while len(l) > 0 and l[0] == '*': 791: l = l[1:] 792: l = string.strip(l) 793: desc = desc + " " + l 794: del lines[0] 795: 796: desc = string.strip(desc) 797: 798: if quiet == 0: 799: if desc == "": 800: self.warning("Macro comment for %s lack description of the macro" % (name)) 801: 802: return((args, desc)) 803: 804: # 805: # Parse a comment block and merge the informations found in the 806: # parameters descriptions, finally returns a block as complete 807: # as possible 808: # 809: def mergeFunctionComment(self, name, description, quiet = 0): 810: if name == 'main': 811: quiet = 1 812: if name[0:2] == '__': 813: quiet = 1 814: 815: (ret, args) = description 816: desc = "" 817: retdesc = "" 818: 819: if self.comment == None: 820: if not quiet: 821: self.warning("Missing comment for function %s" % (name)) 822: return(((ret[0], retdesc), args, desc)) 823: if self.comment[0] != '*': 824: if not quiet: 825: self.warning("Missing * in function comment for %s" % (name)) 826: return(((ret[0], retdesc), args, desc)) 827: lines = string.split(self.comment, '\n') 828: if lines[0] == '*': 829: del lines[0] 830: if lines[0] != "* %s:" % (name): 831: if not quiet: 832: self.warning("Misformatted function comment for %s" % (name)) 833: self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 834: return(((ret[0], retdesc), args, desc)) 835: del lines[0] 836: while lines[0] == '*': 837: del lines[0] 838: nbargs = len(args) 839: while len(lines) > 0 and lines[0][0:3] == '* @': 840: l = lines[0][3:] 841: try: 842: (arg, desc) = string.split(l, ':', 1) 843: desc=string.strip(desc) 844: arg=string.strip(arg) 845: except: 846: if not quiet: 847: self.warning("Misformatted function comment for %s" % (name)) 848: self.warning(" problem with '%s'" % (lines[0])) 849: del lines[0] 850: continue 851: del lines[0] 852: l = string.strip(lines[0]) 853: while len(l) > 2 and l[0:3] != '* @': 854: while l[0] == '*': 855: l = l[1:] 856: desc = desc + ' ' + string.strip(l) 857: del lines[0] 858: if len(lines) == 0: 859: break 860: l = lines[0] 861: i = 0 862: while i < nbargs: 863: if args[i][1] == arg: 864: args[i] = (args[i][0], arg, desc) 865: break; 866: i = i + 1 867: if i >= nbargs: 868: if not quiet: 869: self.warning("Unable to find arg %s from function comment for %s" % ( 870: arg, name)) 871: while len(lines) > 0 and lines[0] == '*': 872: del lines[0] 873: desc = "" 874: while len(lines) > 0: 875: l = lines[0] 876: while len(l) > 0 and l[0] == '*': 877: l = l[1:] 878: l = string.strip(l) 879: if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 880: try: 881: l = string.split(l, ' ', 1)[1] 882: except: 883: l = "" 884: retdesc = string.strip(l) 885: del lines[0] 886: while len(lines) > 0: 887: l = lines[0] 888: while len(l) > 0 and l[0] == '*': 889: l = l[1:] 890: l = string.strip(l) 891: retdesc = retdesc + " " + l 892: del lines[0] 893: else: 894: desc = desc + " " + l 895: del lines[0] 896: 897: retdesc = string.strip(retdesc) 898: desc = string.strip(desc) 899: 900: if quiet == 0: 901: # 902: # report missing comments 903: # 904: i = 0 905: while i < nbargs: 906: if args[i][2] == None and args[i][0] != "void" and \ 907: ((args[i][1] != None) or (args[i][1] == '')): 908: self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) 909: i = i + 1 910: if retdesc == "" and ret[0] != "void": 911: self.warning("Function comment for %s lacks description of return value" % (name)) 912: if desc == "": 913: self.warning("Function comment for %s lacks description of the function" % (name)) 914: 915: return(((ret[0], retdesc), args, desc)) 916: 917: def parsePreproc(self, token): 918: if debug: 919: print "=> preproc ", token, self.lexer.tokens 920: name = token[1] 921: if name == "#include": 922: token = self.lexer.token() 923: if token == None: 924: return None 925: if token[0] == 'preproc': 926: self.index_add(token[1], self.filename, not self.is_header, 927: "include") 928: return self.lexer.token() 929: return token 930: if name == "#define": 931: token = self.lexer.token() 932: if token == None: 933: return None 934: if token[0] == 'preproc': 935: # TODO macros with arguments 936: name = token[1] 937: lst = [] 938: token = self.lexer.token() 939: while token != None and token[0] == 'preproc' and \ 940: token[1][0] != '#': 941: lst.append(token[1]) 942: token = self.lexer.token() 943: try: 944: name = string.split(name, '(') [0] 945: except: 946: pass 947: info = self.parseMacroComment(name, not self.is_header) 948: self.index_add(name, self.filename, not self.is_header, 949: "macro", info) 950: return token 951: 952: # 953: # Processing of conditionals modified by Bill 1/1/05 954: # 955: # We process conditionals (i.e. tokens from #ifdef, #ifndef, 956: # #if, #else and #endif) for headers and mainline code, 957: # store the ones from the header in libxml2-api.xml, and later 958: # (in the routine merge_public) verify that the two (header and 959: # mainline code) agree. 960: # 961: # There is a small problem with processing the headers. Some of 962: # the variables are not concerned with enabling / disabling of 963: # library functions (e.g. '__XML_PARSER_H__'), and we don't want 964: # them to be included in libxml2-api.xml, or involved in 965: # the check between the header and the mainline code. To 966: # accomplish this, we ignore any conditional which doesn't include 967: # the string 'ENABLED' 968: # 969: if name == "#ifdef": 970: apstr = self.lexer.tokens[0][1] 971: try: 972: self.defines.append(apstr) 973: if string.find(apstr, 'ENABLED') != -1: 974: self.conditionals.append("defined(%s)" % apstr) 975: except: 976: pass 977: elif name == "#ifndef": 978: apstr = self.lexer.tokens[0][1] 979: try: 980: self.defines.append(apstr) 981: if string.find(apstr, 'ENABLED') != -1: 982: self.conditionals.append("!defined(%s)" % apstr) 983: except: 984: pass 985: elif name == "#if": 986: apstr = "" 987: for tok in self.lexer.tokens: 988: if apstr != "": 989: apstr = apstr + " " 990: apstr = apstr + tok[1] 991: try: 992: self.defines.append(apstr) 993: if string.find(apstr, 'ENABLED') != -1: 994: self.conditionals.append(apstr) 995: except: 996: pass 997: elif name == "#else": 998: if self.conditionals != [] and \ 999: string.find(self.defines[-1], 'ENABLED') != -1: 1000: self.conditionals[-1] = "!(%s)" % self.conditionals[-1] 1001: elif name == "#endif": 1002: if self.conditionals != [] and \ 1003: string.find(self.defines[-1], 'ENABLED') != -1: 1004: self.conditionals = self.conditionals[:-1] 1005: self.defines = self.defines[:-1] 1006: token = self.lexer.token() 1007: while token != None and token[0] == 'preproc' and \ 1008: token[1][0] != '#': 1009: token = self.lexer.token() 1010: return token 1011: 1012: # 1013: # token acquisition on top of the lexer, it handle internally 1014: # preprocessor and comments since they are logically not part of 1015: # the program structure. 1016: # 1017: def token(self): 1018: global ignored_words 1019: 1020: token = self.lexer.token() 1021: while token != None: 1022: if token[0] == 'comment': 1023: token = self.parseComment(token) 1024: continue 1025: elif token[0] == 'preproc': 1026: token = self.parsePreproc(token) 1027: continue 1028: elif token[0] == "name" and token[1] == "__const": 1029: token = ("name", "const") 1030: return token 1031: elif token[0] == "name" and token[1] == "__attribute": 1032: token = self.lexer.token() 1033: while token != None and token[1] != ";": 1034: token = self.lexer.token() 1035: return token 1036: elif token[0] == "name" and ignored_words.has_key(token[1]): 1037: (n, info) = ignored_words[token[1]] 1038: i = 0 1039: while i < n: 1040: token = self.lexer.token() 1041: i = i + 1 1042: token = self.lexer.token() 1043: continue 1044: else: 1045: if debug: 1046: print "=> ", token 1047: return token 1048: return None 1049: 1050: # 1051: # Parse a typedef, it records the type and its name. 1052: # 1053: def parseTypedef(self, token): 1054: if token == None: 1055: return None 1056: token = self.parseType(token) 1057: if token == None: 1058: self.error("parsing typedef") 1059: return None 1060: base_type = self.type 1061: type = base_type 1062: #self.debug("end typedef type", token) 1063: while token != None: 1064: if token[0] == "name": 1065: name = token[1] 1066: signature = self.signature 1067: if signature != None: 1068: type = string.split(type, '(')[0] 1069: d = self.mergeFunctionComment(name, 1070: ((type, None), signature), 1) 1071: self.index_add(name, self.filename, not self.is_header, 1072: "functype", d) 1073: else: 1074: if base_type == "struct": 1075: self.index_add(name, self.filename, not self.is_header, 1076: "struct", type) 1077: base_type = "struct " + name 1078: else: 1079: # TODO report missing or misformatted comments 1080: info = self.parseTypeComment(name, 1) 1081: self.index_add(name, self.filename, not self.is_header, 1082: "typedef", type, info) 1083: token = self.token() 1084: else: 1085: self.error("parsing typedef: expecting a name") 1086: return token 1087: #self.debug("end typedef", token) 1088: if token != None and token[0] == 'sep' and token[1] == ',': 1089: type = base_type 1090: token = self.token() 1091: while token != None and token[0] == "op": 1092: type = type + token[1] 1093: token = self.token() 1094: elif token != None and token[0] == 'sep' and token[1] == ';': 1095: break; 1096: elif token != None and token[0] == 'name': 1097: type = base_type 1098: continue; 1099: else: 1100: self.error("parsing typedef: expecting ';'", token) 1101: return token 1102: token = self.token() 1103: return token 1104: 1105: # 1106: # Parse a C code block, used for functions it parse till 1107: # the balancing } included 1108: # 1109: def parseBlock(self, token): 1110: while token != None: 1111: if token[0] == "sep" and token[1] == "{": 1112: token = self.token() 1113: token = self.parseBlock(token) 1114: elif token[0] == "sep" and token[1] == "}": 1115: self.comment = None 1116: token = self.token() 1117: return token 1118: else: 1119: if self.collect_ref == 1: 1120: oldtok = token 1121: token = self.token() 1122: if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 1123: if token[0] == "sep" and token[1] == "(": 1124: self.index_add_ref(oldtok[1], self.filename, 1125: 0, "function") 1126: token = self.token() 1127: elif token[0] == "name": 1128: token = self.token() 1129: if token[0] == "sep" and (token[1] == ";" or 1130: token[1] == "," or token[1] == "="): 1131: self.index_add_ref(oldtok[1], self.filename, 1132: 0, "type") 1133: elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 1134: self.index_add_ref(oldtok[1], self.filename, 1135: 0, "typedef") 1136: elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 1137: self.index_add_ref(oldtok[1], self.filename, 1138: 0, "typedef") 1139: 1140: else: 1141: token = self.token() 1142: return token 1143: 1144: # 1145: # Parse a C struct definition till the balancing } 1146: # 1147: def parseStruct(self, token): 1148: fields = [] 1149: #self.debug("start parseStruct", token) 1150: while token != None: 1151: if token[0] == "sep" and token[1] == "{": 1152: token = self.token() 1153: token = self.parseTypeBlock(token) 1154: elif token[0] == "sep" and token[1] == "}": 1155: self.struct_fields = fields 1156: #self.debug("end parseStruct", token) 1157: #print fields 1158: token = self.token() 1159: return token 1160: else: 1161: base_type = self.type 1162: #self.debug("before parseType", token) 1163: token = self.parseType(token) 1164: #self.debug("after parseType", token) 1165: if token != None and token[0] == "name": 1166: fname = token[1] 1167: token = self.token() 1168: if token[0] == "sep" and token[1] == ";": 1169: self.comment = None 1170: token = self.token() 1171: fields.append((self.type, fname, self.comment)) 1172: self.comment = None 1173: else: 1174: self.error("parseStruct: expecting ;", token) 1175: elif token != None and token[0] == "sep" and token[1] == "{": 1176: token = self.token() 1177: token = self.parseTypeBlock(token) 1178: if token != None and token[0] == "name": 1179: token = self.token() 1180: if token != None and token[0] == "sep" and token[1] == ";": 1181: token = self.token() 1182: else: 1183: self.error("parseStruct: expecting ;", token) 1184: else: 1185: self.error("parseStruct: name", token) 1186: token = self.token() 1187: self.type = base_type; 1188: self.struct_fields = fields 1189: #self.debug("end parseStruct", token) 1190: #print fields 1191: return token 1192: 1193: # 1194: # Parse a C enum block, parse till the balancing } 1195: # 1196: def parseEnumBlock(self, token): 1197: self.enums = [] 1198: name = None 1199: self.comment = None 1200: comment = "" 1201: value = "0" 1202: while token != None: 1203: if token[0] == "sep" and token[1] == "{": 1204: token = self.token() 1205: token = self.parseTypeBlock(token) 1206: elif token[0] == "sep" and token[1] == "}": 1207: if name != None: 1208: if self.comment != None: 1209: comment = self.comment 1210: self.comment = None 1211: self.enums.append((name, value, comment)) 1212: token = self.token() 1213: return token 1214: elif token[0] == "name": 1215: if name != None: 1216: if self.comment != None: 1217: comment = string.strip(self.comment) 1218: self.comment = None 1219: self.enums.append((name, value, comment)) 1220: name = token[1] 1221: comment = "" 1222: token = self.token() 1223: if token[0] == "op" and token[1][0] == "=": 1224: value = "" 1225: if len(token[1]) > 1: 1226: value = token[1][1:] 1227: token = self.token() 1228: while token[0] != "sep" or (token[1] != ',' and 1229: token[1] != '}'): 1230: value = value + token[1] 1231: token = self.token() 1232: else: 1233: try: 1234: value = "%d" % (int(value) + 1) 1235: except: 1236: self.warning("Failed to compute value of enum %s" % (name)) 1237: value="" 1238: if token[0] == "sep" and token[1] == ",": 1239: token = self.token() 1240: else: 1241: token = self.token() 1242: return token 1243: 1244: # 1245: # Parse a C definition block, used for structs it parse till 1246: # the balancing } 1247: # 1248: def parseTypeBlock(self, token): 1249: while token != None: 1250: if token[0] == "sep" and token[1] == "{": 1251: token = self.token() 1252: token = self.parseTypeBlock(token) 1253: elif token[0] == "sep" and token[1] == "}": 1254: token = self.token() 1255: return token 1256: else: 1257: token = self.token() 1258: return token 1259: 1260: # 1261: # Parse a type: the fact that the type name can either occur after 1262: # the definition or within the definition makes it a little harder 1263: # if inside, the name token is pushed back before returning 1264: # 1265: def parseType(self, token): 1266: self.type = "" 1267: self.struct_fields = [] 1268: self.signature = None 1269: if token == None: 1270: return token 1271: 1272: while token[0] == "name" and ( 1273: token[1] == "const" or \ 1274: token[1] == "unsigned" or \ 1275: token[1] == "signed"): 1276: if self.type == "": 1277: self.type = token[1] 1278: else: 1279: self.type = self.type + " " + token[1] 1280: token = self.token() 1281: 1282: if token[0] == "name" and (token[1] == "long" or token[1] == "short"): 1283: if self.type == "": 1284: self.type = token[1] 1285: else: 1286: self.type = self.type + " " + token[1] 1287: if token[0] == "name" and token[1] == "int": 1288: if self.type == "": 1289: self.type = tmp[1] 1290: else: 1291: self.type = self.type + " " + tmp[1] 1292: 1293: elif token[0] == "name" and token[1] == "struct": 1294: if self.type == "": 1295: self.type = token[1] 1296: else: 1297: self.type = self.type + " " + token[1] 1298: token = self.token() 1299: nametok = None 1300: if token[0] == "name": 1301: nametok = token 1302: token = self.token() 1303: if token != None and token[0] == "sep" and token[1] == "{": 1304: token = self.token() 1305: token = self.parseStruct(token) 1306: elif token != None and token[0] == "op" and token[1] == "*": 1307: self.type = self.type + " " + nametok[1] + " *" 1308: token = self.token() 1309: while token != None and token[0] == "op" and token[1] == "*": 1310: self.type = self.type + " *" 1311: token = self.token() 1312: if token[0] == "name": 1313: nametok = token 1314: token = self.token() 1315: else: 1316: self.error("struct : expecting name", token) 1317: return token 1318: elif token != None and token[0] == "name" and nametok != None: 1319: self.type = self.type + " " + nametok[1] 1320: return token 1321: 1322: if nametok != None: 1323: self.lexer.push(token) 1324: token = nametok 1325: return token 1326: 1327: elif token[0] == "name" and token[1] == "enum": 1328: if self.type == "": 1329: self.type = token[1] 1330: else: 1331: self.type = self.type + " " + token[1] 1332: self.enums = [] 1333: token = self.token() 1334: if token != None and token[0] == "sep" and token[1] == "{": 1335: token = self.token() 1336: token = self.parseEnumBlock(token) 1337: else: 1338: self.error("parsing enum: expecting '{'", token) 1339: enum_type = None 1340: if token != None and token[0] != "name": 1341: self.lexer.push(token) 1342: token = ("name", "enum") 1343: else: 1344: enum_type = token[1] 1345: for enum in self.enums: 1346: self.index_add(enum[0], self.filename, 1347: not self.is_header, "enum", 1348: (enum[1], enum[2], enum_type)) 1349: return token 1350: 1351: elif token[0] == "name": 1352: if self.type == "": 1353: self.type = token[1] 1354: else: 1355: self.type = self.type + " " + token[1] 1356: else: 1357: self.error("parsing type %s: expecting a name" % (self.type), 1358: token) 1359: return token 1360: token = self.token() 1361: while token != None and (token[0] == "op" or 1362: token[0] == "name" and token[1] == "const"): 1363: self.type = self.type + " " + token[1] 1364: token = self.token() 1365: 1366: # 1367: # if there is a parenthesis here, this means a function type 1368: # 1369: if token != None and token[0] == "sep" and token[1] == '(': 1370: self.type = self.type + token[1] 1371: token = self.token() 1372: while token != None and token[0] == "op" and token[1] == '*': 1373: self.type = self.type + token[1] 1374: token = self.token() 1375: if token == None or token[0] != "name" : 1376: self.error("parsing function type, name expected", token); 1377: return token 1378: self.type = self.type + token[1] 1379: nametok = token 1380: token = self.token() 1381: if token != None and token[0] == "sep" and token[1] == ')': 1382: self.type = self.type + token[1] 1383: token = self.token() 1384: if token != None and token[0] == "sep" and token[1] == '(': 1385: token = self.token() 1386: type = self.type; 1387: token = self.parseSignature(token); 1388: self.type = type; 1389: else: 1390: self.error("parsing function type, '(' expected", token); 1391: return token 1392: else: 1393: self.error("parsing function type, ')' expected", token); 1394: return token 1395: self.lexer.push(token) 1396: token = nametok 1397: return token 1398: 1399: # 1400: # do some lookahead for arrays 1401: # 1402: if token != None and token[0] == "name": 1403: nametok = token 1404: token = self.token() 1405: if token != None and token[0] == "sep" and token[1] == '[': 1406: self.type = self.type + nametok[1] 1407: while token != None and token[0] == "sep" and token[1] == '[': 1408: self.type = self.type + token[1] 1409: token = self.token() 1410: while token != None and token[0] != 'sep' and \ 1411: token[1] != ']' and token[1] != ';': 1412: self.type = self.type + token[1] 1413: token = self.token() 1414: if token != None and token[0] == 'sep' and token[1] == ']': 1415: self.type = self.type + token[1] 1416: token = self.token() 1417: else: 1418: self.error("parsing array type, ']' expected", token); 1419: return token 1420: elif token != None and token[0] == "sep" and token[1] == ':': 1421: # remove :12 in case it's a limited int size 1422: token = self.token() 1423: token = self.token() 1424: self.lexer.push(token) 1425: token = nametok 1426: 1427: return token 1428: 1429: # 1430: # Parse a signature: '(' has been parsed and we scan the type definition 1431: # up to the ')' included 1432: def parseSignature(self, token): 1433: signature = [] 1434: if token != None and token[0] == "sep" and token[1] == ')': 1435: self.signature = [] 1436: token = self.token() 1437: return token 1438: while token != None: 1439: token = self.parseType(token) 1440: if token != None and token[0] == "name": 1441: signature.append((self.type, token[1], None)) 1442: token = self.token() 1443: elif token != None and token[0] == "sep" and token[1] == ',': 1444: token = self.token() 1445: continue 1446: elif token != None and token[0] == "sep" and token[1] == ')': 1447: # only the type was provided 1448: if self.type == "...": 1449: signature.append((self.type, "...", None)) 1450: else: 1451: signature.append((self.type, None, None)) 1452: if token != None and token[0] == "sep": 1453: if token[1] == ',': 1454: token = self.token() 1455: continue 1456: elif token[1] == ')': 1457: token = self.token() 1458: break 1459: self.signature = signature 1460: return token 1461: 1462: # 1463: # Parse a global definition, be it a type, variable or function 1464: # the extern "C" blocks are a bit nasty and require it to recurse. 1465: # 1466: def parseGlobal(self, token): 1467: static = 0 1468: if token[1] == 'extern': 1469: token = self.token() 1470: if token == None: 1471: return token 1472: if token[0] == 'string': 1473: if token[1] == 'C': 1474: token = self.token() 1475: if token == None: 1476: return token 1477: if token[0] == 'sep' and token[1] == "{": 1478: token = self.token() 1479: # print 'Entering extern "C line ', self.lineno() 1480: while token != None and (token[0] != 'sep' or 1481: token[1] != "}"): 1482: if token[0] == 'name': 1483: token = self.parseGlobal(token) 1484: else: 1485: self.error( 1486: "token %s %s unexpected at the top level" % ( 1487: token[0], token[1])) 1488: token = self.parseGlobal(token) 1489: # print 'Exiting extern "C" line', self.lineno() 1490: token = self.token() 1491: return token 1492: else: 1493: return token 1494: elif token[1] == 'static': 1495: static = 1 1496: token = self.token() 1497: if token == None or token[0] != 'name': 1498: return token 1499: 1500: if token[1] == 'typedef': 1501: token = self.token() 1502: return self.parseTypedef(token) 1503: else: 1504: token = self.parseType(token) 1505: type_orig = self.type 1506: if token == None or token[0] != "name": 1507: return token 1508: type = type_orig 1509: self.name = token[1] 1510: token = self.token() 1511: while token != None and (token[0] == "sep" or token[0] == "op"): 1512: if token[0] == "sep": 1513: if token[1] == "[": 1514: type = type + token[1] 1515: token = self.token() 1516: while token != None and (token[0] != "sep" or \ 1517: token[1] != ";"): 1518: type = type + token[1] 1519: token = self.token() 1520: 1521: if token != None and token[0] == "op" and token[1] == "=": 1522: # 1523: # Skip the initialization of the variable 1524: # 1525: token = self.token() 1526: if token[0] == 'sep' and token[1] == '{': 1527: token = self.token() 1528: token = self.parseBlock(token) 1529: else: 1530: self.comment = None 1531: while token != None and (token[0] != "sep" or \ 1532: (token[1] != ';' and token[1] != ',')): 1533: token = self.token() 1534: self.comment = None 1535: if token == None or token[0] != "sep" or (token[1] != ';' and 1536: token[1] != ','): 1537: self.error("missing ';' or ',' after value") 1538: 1539: if token != None and token[0] == "sep": 1540: if token[1] == ";": 1541: self.comment = None 1542: token = self.token() 1543: if type == "struct": 1544: self.index_add(self.name, self.filename, 1545: not self.is_header, "struct", self.struct_fields) 1546: else: 1547: self.index_add(self.name, self.filename, 1548: not self.is_header, "variable", type) 1549: break 1550: elif token[1] == "(": 1551: token = self.token() 1552: token = self.parseSignature(token) 1553: if token == None: 1554: return None 1555: if token[0] == "sep" and token[1] == ";": 1556: d = self.mergeFunctionComment(self.name, 1557: ((type, None), self.signature), 1) 1558: self.index_add(self.name, self.filename, static, 1559: "function", d) 1560: token = self.token() 1561: elif token[0] == "sep" and token[1] == "{": 1562: d = self.mergeFunctionComment(self.name, 1563: ((type, None), self.signature), static) 1564: self.index_add(self.name, self.filename, static, 1565: "function", d) 1566: token = self.token() 1567: token = self.parseBlock(token); 1568: elif token[1] == ',': 1569: self.comment = None 1570: self.index_add(self.name, self.filename, static, 1571: "variable", type) 1572: type = type_orig 1573: token = self.token() 1574: while token != None and token[0] == "sep": 1575: type = type + token[1] 1576: token = self.token() 1577: if token != None and token[0] == "name": 1578: self.name = token[1] 1579: token = self.token() 1580: else: 1581: break 1582: 1583: return token 1584: 1585: def parse(self): 1586: self.warning("Parsing %s" % (self.filename)) 1587: token = self.token() 1588: while token != None: 1589: if token[0] == 'name': 1590: token = self.parseGlobal(token) 1591: else: 1592: self.error("token %s %s unexpected at the top level" % ( 1593: token[0], token[1])) 1594: token = self.parseGlobal(token) 1595: return 1596: self.parseTopComment(self.top_comment) 1597: return self.index 1598: 1599: 1600: class docBuilder: 1601: """A documentation builder""" 1602: def __init__(self, name, directories=['.'], excludes=[]): 1603: self.name = name 1604: self.directories = directories 1605: self.excludes = excludes + ignored_files.keys() 1606: self.modules = {} 1607: self.headers = {} 1608: self.idx = index() 1609: self.xref = {} 1610: self.index = {} 1611: if name == 'libxml2': 1612: self.basename = 'libxml' 1613: else: 1614: self.basename = name 1615: 1616: def indexString(self, id, str): 1617: if str == None: 1618: return 1619: str = string.replace(str, "'", ' ') 1620: str = string.replace(str, '"', ' ') 1621: str = string.replace(str, "/", ' ') 1622: str = string.replace(str, '*', ' ') 1623: str = string.replace(str, "[", ' ') 1624: str = string.replace(str, "]", ' ') 1625: str = string.replace(str, "(", ' ') 1626: str = string.replace(str, ")", ' ') 1627: str = string.replace(str, "<", ' ') 1628: str = string.replace(str, '>', ' ') 1629: str = string.replace(str, "&", ' ') 1630: str = string.replace(str, '#', ' ') 1631: str = string.replace(str, ",", ' ') 1632: str = string.replace(str, '.', ' ') 1633: str = string.replace(str, ';', ' ') 1634: tokens = string.split(str) 1635: for token in tokens: 1636: try: 1637: c = token[0] 1638: if string.find(string.letters, c) < 0: 1639: pass 1640: elif len(token) < 3: 1641: pass 1642: else: 1643: lower = string.lower(token) 1644: # TODO: generalize this a bit 1645: if lower == 'and' or lower == 'the': 1646: pass 1647: elif self.xref.has_key(token): 1648: self.xref[token].append(id) 1649: else: 1650: self.xref[token] = [id] 1651: except: 1652: pass 1653: 1654: def analyze(self): 1655: print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())) 1656: self.idx.analyze() 1657: 1658: def scanHeaders(self): 1659: for header in self.headers.keys(): 1660: parser = CParser(header) 1661: idx = parser.parse() 1662: self.headers[header] = idx; 1663: self.idx.merge(idx) 1664: 1665: def scanModules(self): 1666: for module in self.modules.keys(): 1667: parser = CParser(module) 1668: idx = parser.parse() 1669: # idx.analyze() 1670: self.modules[module] = idx 1671: self.idx.merge_public(idx) 1672: 1673: def scan(self): 1674: for directory in self.directories: 1675: files = glob.glob(directory + "/*.c") 1676: for file in files: 1677: skip = 0 1678: for excl in self.excludes: 1679: if string.find(file, excl) != -1: 1680: skip = 1; 1681: break 1682: if skip == 0: 1683: self.modules[file] = None; 1684: files = glob.glob(directory + "/*.h") 1685: for file in files: 1686: skip = 0 1687: for excl in self.excludes: 1688: if string.find(file, excl) != -1: 1689: skip = 1; 1690: break 1691: if skip == 0: 1692: self.headers[file] = None; 1693: self.scanHeaders() 1694: self.scanModules() 1695: 1696: def modulename_file(self, file): 1697: module = os.path.basename(file) 1698: if module[-2:] == '.h': 1699: module = module[:-2] 1700: elif module[-2:] == '.c': 1701: module = module[:-2] 1702: return module 1703: 1704: def serialize_enum(self, output, name): 1705: id = self.idx.enums[name] 1706: output.write(" <enum name='%s' file='%s'" % (name, 1707: self.modulename_file(id.header))) 1708: if id.info != None: 1709: info = id.info 1710: if info[0] != None and info[0] != '': 1711: try: 1712: val = eval(info[0]) 1713: except: 1714: val = info[0] 1715: output.write(" value='%s'" % (val)); 1716: if info[2] != None and info[2] != '': 1717: output.write(" type='%s'" % info[2]); 1718: if info[1] != None and info[1] != '': 1719: output.write(" info='%s'" % escape(info[1])); 1720: output.write("/>\n") 1721: 1722: def serialize_macro(self, output, name): 1723: id = self.idx.macros[name] 1724: output.write(" <macro name='%s' file='%s'>\n" % (name, 1725: self.modulename_file(id.header))) 1726: if id.info != None: 1727: try: 1728: (args, desc) = id.info 1729: if desc != None and desc != "": 1730: output.write(" <info>%s</info>\n" % (escape(desc))) 1731: self.indexString(name, desc) 1732: for arg in args: 1733: (name, desc) = arg 1734: if desc != None and desc != "": 1735: output.write(" <arg name='%s' info='%s'/>\n" % ( 1736: name, escape(desc))) 1737: self.indexString(name, desc) 1738: else: 1739: output.write(" <arg name='%s'/>\n" % (name)) 1740: except: 1741: pass 1742: output.write(" </macro>\n") 1743: 1744: def serialize_typedef(self, output, name): 1745: id = self.idx.typedefs[name] 1746: if id.info[0:7] == 'struct ': 1747: output.write(" <struct name='%s' file='%s' type='%s'" % ( 1748: name, self.modulename_file(id.header), id.info)) 1749: name = id.info[7:] 1750: if self.idx.structs.has_key(name) and ( \ 1751: type(self.idx.structs[name].info) == type(()) or 1752: type(self.idx.structs[name].info) == type([])): 1753: output.write(">\n"); 1754: try: 1755: for field in self.idx.structs[name].info: 1756: desc = field[2] 1757: self.indexString(name, desc) 1758: if desc == None: 1759: desc = '' 1760: else: 1761: desc = escape(desc) 1762: output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) 1763: except: 1764: print "Failed to serialize struct %s" % (name) 1765: output.write(" </struct>\n") 1766: else: 1767: output.write("/>\n"); 1768: else : 1769: output.write(" <typedef name='%s' file='%s' type='%s'" % ( 1770: name, self.modulename_file(id.header), id.info)) 1771: try: 1772: desc = id.extra 1773: if desc != None and desc != "": 1774: output.write(">\n <info>%s</info>\n" % (escape(desc))) 1775: output.write(" </typedef>\n") 1776: else: 1777: output.write("/>\n") 1778: except: 1779: output.write("/>\n") 1780: 1781: def serialize_variable(self, output, name): 1782: id = self.idx.variables[name] 1783: if id.info != None: 1784: output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( 1785: name, self.modulename_file(id.header), id.info)) 1786: else: 1787: output.write(" <variable name='%s' file='%s'/>\n" % ( 1788: name, self.modulename_file(id.header))) 1789: 1790: def serialize_function(self, output, name): 1791: id = self.idx.functions[name] 1792: if name == debugsym: 1793: print "=>", id 1794: 1795: output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, 1796: name, self.modulename_file(id.header), 1797: self.modulename_file(id.module))) 1798: # 1799: # Processing of conditionals modified by Bill 1/1/05 1800: # 1801: if id.conditionals != None: 1802: apstr = "" 1803: for cond in id.conditionals: 1804: if apstr != "": 1805: apstr = apstr + " && " 1806: apstr = apstr + cond 1807: output.write(" <cond>%s</cond>\n"% (apstr)); 1808: try: 1809: (ret, params, desc) = id.info 1810: if (desc == None or desc == '') and \ 1811: name[0:9] != "xmlThrDef" and name != "xmlDllMain": 1812: print "%s %s from %s has no description" % (id.type, name, 1813: self.modulename_file(id.module)) 1814: 1815: output.write(" <info>%s</info>\n" % (escape(desc))) 1816: self.indexString(name, desc) 1817: if ret[0] != None: 1818: if ret[0] == "void": 1819: output.write(" <return type='void'/>\n") 1820: else: 1821: output.write(" <return type='%s' info='%s'/>\n" % ( 1822: ret[0], escape(ret[1]))) 1823: self.indexString(name, ret[1]) 1824: for param in params: 1825: if param[0] == 'void': 1826: continue 1827: if param[2] == None: 1828: output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1829: else: 1830: output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1831: self.indexString(name, param[2]) 1832: except: 1833: print "Failed to save function %s info: " % name, `id.info` 1834: output.write(" </%s>\n" % (id.type)) 1835: 1836: def serialize_exports(self, output, file): 1837: module = self.modulename_file(file) 1838: output.write(" <file name='%s'>\n" % (module)) 1839: dict = self.headers[file] 1840: if dict.info != None: 1841: for data in ('Summary', 'Description', 'Author'): 1842: try: 1843: output.write(" <%s>%s</%s>\n" % ( 1844: string.lower(data), 1845: escape(dict.info[data]), 1846: string.lower(data))) 1847: except: 1848: print "Header %s lacks a %s description" % (module, data) 1849: if dict.info.has_key('Description'): 1850: desc = dict.info['Description'] 1851: if string.find(desc, "DEPRECATED") != -1: 1852: output.write(" <deprecated/>\n") 1853: 1854: ids = dict.macros.keys() 1855: ids.sort() 1856: for id in uniq(ids): 1857: # Macros are sometime used to masquerade other types. 1858: if dict.functions.has_key(id): 1859: continue 1860: if dict.variables.has_key(id): 1861: continue 1862: if dict.typedefs.has_key(id): 1863: continue 1864: if dict.structs.has_key(id): 1865: continue 1866: if dict.enums.has_key(id): 1867: continue 1868: output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) 1869: ids = dict.enums.keys() 1870: ids.sort() 1871: for id in uniq(ids): 1872: output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) 1873: ids = dict.typedefs.keys() 1874: ids.sort() 1875: for id in uniq(ids): 1876: output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) 1877: ids = dict.structs.keys() 1878: ids.sort() 1879: for id in uniq(ids): 1880: output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) 1881: ids = dict.variables.keys() 1882: ids.sort() 1883: for id in uniq(ids): 1884: output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) 1885: ids = dict.functions.keys() 1886: ids.sort() 1887: for id in uniq(ids): 1888: output.write(" <exports symbol='%s' type='function'/>\n" % (id)) 1889: output.write(" </file>\n") 1890: 1891: def serialize_xrefs_files(self, output): 1892: headers = self.headers.keys() 1893: headers.sort() 1894: for file in headers: 1895: module = self.modulename_file(file) 1896: output.write(" <file name='%s'>\n" % (module)) 1897: dict = self.headers[file] 1898: ids = uniq(dict.functions.keys() + dict.variables.keys() + \ 1899: dict.macros.keys() + dict.typedefs.keys() + \ 1900: dict.structs.keys() + dict.enums.keys()) 1901: ids.sort() 1902: for id in ids: 1903: output.write(" <ref name='%s'/>\n" % (id)) 1904: output.write(" </file>\n") 1905: pass 1906: 1907: def serialize_xrefs_functions(self, output): 1908: funcs = {} 1909: for name in self.idx.functions.keys(): 1910: id = self.idx.functions[name] 1911: try: 1912: (ret, params, desc) = id.info 1913: for param in params: 1914: if param[0] == 'void': 1915: continue 1916: if funcs.has_key(param[0]): 1917: funcs[param[0]].append(name) 1918: else: 1919: funcs[param[0]] = [name] 1920: except: 1921: pass 1922: typ = funcs.keys() 1923: typ.sort() 1924: for type in typ: 1925: if type == '' or type == 'void' or type == "int" or \ 1926: type == "char *" or type == "const char *" : 1927: continue 1928: output.write(" <type name='%s'>\n" % (type)) 1929: ids = funcs[type] 1930: ids.sort() 1931: pid = '' # not sure why we have dups, but get rid of them! 1932: for id in ids: 1933: if id != pid: 1934: output.write(" <ref name='%s'/>\n" % (id)) 1935: pid = id 1936: output.write(" </type>\n") 1937: 1938: def serialize_xrefs_constructors(self, output): 1939: funcs = {} 1940: for name in self.idx.functions.keys(): 1941: id = self.idx.functions[name] 1942: try: 1943: (ret, params, desc) = id.info 1944: if ret[0] == "void": 1945: continue 1946: if funcs.has_key(ret[0]): 1947: funcs[ret[0]].append(name) 1948: else: 1949: funcs[ret[0]] = [name] 1950: except: 1951: pass 1952: typ = funcs.keys() 1953: typ.sort() 1954: for type in typ: 1955: if type == '' or type == 'void' or type == "int" or \ 1956: type == "char *" or type == "const char *" : 1957: continue 1958: output.write(" <type name='%s'>\n" % (type)) 1959: ids = funcs[type] 1960: ids.sort() 1961: for id in ids: 1962: output.write(" <ref name='%s'/>\n" % (id)) 1963: output.write(" </type>\n") 1964: 1965: def serialize_xrefs_alpha(self, output): 1966: letter = None 1967: ids = self.idx.identifiers.keys() 1968: ids.sort() 1969: for id in ids: 1970: if id[0] != letter: 1971: if letter != None: 1972: output.write(" </letter>\n") 1973: letter = id[0] 1974: output.write(" <letter name='%s'>\n" % (letter)) 1975: output.write(" <ref name='%s'/>\n" % (id)) 1976: if letter != None: 1977: output.write(" </letter>\n") 1978: 1979: def serialize_xrefs_references(self, output): 1980: typ = self.idx.identifiers.keys() 1981: typ.sort() 1982: for id in typ: 1983: idf = self.idx.identifiers[id] 1984: module = idf.header 1985: output.write(" <reference name='%s' href='%s'/>\n" % (id, 1986: 'html/' + self.basename + '-' + 1987: self.modulename_file(module) + '.html#' + 1988: id)) 1989: 1990: def serialize_xrefs_index(self, output): 1991: index = self.xref 1992: typ = index.keys() 1993: typ.sort() 1994: letter = None 1995: count = 0 1996: chunk = 0 1997: chunks = [] 1998: for id in typ: 1999: if len(index[id]) > 30: 2000: continue 2001: if id[0] != letter: 2002: if letter == None or count > 200: 2003: if letter != None: 2004: output.write(" </letter>\n") 2005: output.write(" </chunk>\n") 2006: count = 0 2007: chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 2008: output.write(" <chunk name='chunk%s'>\n" % (chunk)) 2009: first_letter = id[0] 2010: chunk = chunk + 1 2011: elif letter != None: 2012: output.write(" </letter>\n") 2013: letter = id[0] 2014: output.write(" <letter name='%s'>\n" % (letter)) 2015: output.write(" <word name='%s'>\n" % (id)) 2016: tokens = index[id]; 2017: tokens.sort() 2018: tok = None 2019: for token in tokens: 2020: if tok == token: 2021: continue 2022: tok = token 2023: output.write(" <ref name='%s'/>\n" % (token)) 2024: count = count + 1 2025: output.write(" </word>\n") 2026: if letter != None: 2027: output.write(" </letter>\n") 2028: output.write(" </chunk>\n") 2029: if count != 0: 2030: chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 2031: output.write(" <chunks>\n") 2032: for ch in chunks: 2033: output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % ( 2034: ch[0], ch[1], ch[2])) 2035: output.write(" </chunks>\n") 2036: 2037: def serialize_xrefs(self, output): 2038: output.write(" <references>\n") 2039: self.serialize_xrefs_references(output) 2040: output.write(" </references>\n") 2041: output.write(" <alpha>\n") 2042: self.serialize_xrefs_alpha(output) 2043: output.write(" </alpha>\n") 2044: output.write(" <constructors>\n") 2045: self.serialize_xrefs_constructors(output) 2046: output.write(" </constructors>\n") 2047: output.write(" <functions>\n") 2048: self.serialize_xrefs_functions(output) 2049: output.write(" </functions>\n") 2050: output.write(" <files>\n") 2051: self.serialize_xrefs_files(output) 2052: output.write(" </files>\n") 2053: output.write(" <index>\n") 2054: self.serialize_xrefs_index(output) 2055: output.write(" </index>\n") 2056: 2057: def serialize(self): 2058: filename = "%s-api.xml" % self.name 2059: print "Saving XML description %s" % (filename) 2060: output = open(filename, "w") 2061: output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2062: output.write("<api name='%s'>\n" % self.name) 2063: output.write(" <files>\n") 2064: headers = self.headers.keys() 2065: headers.sort() 2066: for file in headers: 2067: self.serialize_exports(output, file) 2068: output.write(" </files>\n") 2069: output.write(" <symbols>\n") 2070: macros = self.idx.macros.keys() 2071: macros.sort() 2072: for macro in macros: 2073: self.serialize_macro(output, macro) 2074: enums = self.idx.enums.keys() 2075: enums.sort() 2076: for enum in enums: 2077: self.serialize_enum(output, enum) 2078: typedefs = self.idx.typedefs.keys() 2079: typedefs.sort() 2080: for typedef in typedefs: 2081: self.serialize_typedef(output, typedef) 2082: variables = self.idx.variables.keys() 2083: variables.sort() 2084: for variable in variables: 2085: self.serialize_variable(output, variable) 2086: functions = self.idx.functions.keys() 2087: functions.sort() 2088: for function in functions: 2089: self.serialize_function(output, function) 2090: output.write(" </symbols>\n") 2091: output.write("</api>\n") 2092: output.close() 2093: 2094: filename = "%s-refs.xml" % self.name 2095: print "Saving XML Cross References %s" % (filename) 2096: output = open(filename, "w") 2097: output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2098: output.write("<apirefs name='%s'>\n" % self.name) 2099: self.serialize_xrefs(output) 2100: output.write("</apirefs>\n") 2101: output.close() 2102: 2103: 2104: def rebuild(): 2105: builder = None 2106: if glob.glob("parser.c") != [] : 2107: print "Rebuilding API description for libxml2" 2108: builder = docBuilder("libxml2", [".", "."], 2109: ["xmlwin32version.h", "tst.c"]) 2110: elif glob.glob("../parser.c") != [] : 2111: print "Rebuilding API description for libxml2" 2112: builder = docBuilder("libxml2", ["..", "../include/libxml"], 2113: ["xmlwin32version.h", "tst.c"]) 2114: elif glob.glob("../libxslt/transform.c") != [] : 2115: print "Rebuilding API description for libxslt" 2116: builder = docBuilder("libxslt", ["../libxslt"], 2117: ["win32config.h", "libxslt.h", "tst.c"]) 2118: else: 2119: print "rebuild() failed, unable to guess the module" 2120: return None 2121: builder.scan() 2122: builder.analyze() 2123: builder.serialize() 2124: if glob.glob("../libexslt/exslt.c") != [] : 2125: extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 2126: extra.scan() 2127: extra.analyze() 2128: extra.serialize() 2129: return builder 2130: 2131: # 2132: # for debugging the parser 2133: # 2134: def parse(filename): 2135: parser = CParser(filename) 2136: idx = parser.parse() 2137: return idx 2138: 2139: if __name__ == "__main__": 2140: if len(sys.argv) > 1: 2141: debug = 1 2142: parse(sys.argv[1]) 2143: else: 2144: rebuild()