File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / doc / apibuild.py
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 01:22:24 2013 UTC (10 years, 11 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_8_0p0, v2_8_0, HEAD
2.8.0

    1: #!/usr/bin/python -u
    2: #
    3: # This is the API builder, it parses the C sources and build the
    4: # API formal description in XML.
    5: #
    6: # See Copyright for the status of this software.
    7: #
    8: # daniel@veillard.com
    9: #
   10: import os, sys
   11: import string
   12: import glob
   13: 
   14: debug=0
   15: #debugsym='ignorableWhitespaceSAXFunc'
   16: debugsym=None
   17: 
   18: #
   19: # C parser analysis code
   20: #
   21: ignored_files = {
   22:   "trio": "too many non standard macros",
   23:   "trio.c": "too many non standard macros",
   24:   "trionan.c": "too many non standard macros",
   25:   "triostr.c": "too many non standard macros",
   26:   "acconfig.h": "generated portability layer",
   27:   "config.h": "generated portability layer",
   28:   "libxml.h": "internal only",
   29:   "testOOM.c": "out of memory tester",
   30:   "testOOMlib.h": "out of memory tester",
   31:   "testOOMlib.c": "out of memory tester",
   32:   "rngparser.c": "not yet integrated",
   33:   "rngparser.h": "not yet integrated",
   34:   "elfgcchack.h": "not a normal header",
   35:   "testHTML.c": "test tool",
   36:   "testReader.c": "test tool",
   37:   "testSchemas.c": "test tool",
   38:   "testXPath.c": "test tool",
   39:   "testAutomata.c": "test tool",
   40:   "testModule.c": "test tool",
   41:   "testRegexp.c": "test tool",
   42:   "testThreads.c": "test tool",
   43:   "testC14N.c": "test tool",
   44:   "testRelax.c": "test tool",
   45:   "testThreadsWin32.c": "test tool",
   46:   "testSAX.c": "test tool",
   47:   "testURI.c": "test tool",
   48:   "testapi.c": "generated regression tests",
   49:   "runtest.c": "regression tests program",
   50:   "runsuite.c": "regression tests program",
   51:   "tst.c": "not part of the library",
   52:   "test.c": "not part of the library",
   53:   "testdso.c": "test for dynamid shared libraries",
   54:   "testrecurse.c": "test for entities recursions",
   55:   "xzlib.h": "Internal API only",
   56: }
   57: 
   58: ignored_words = {
   59:   "WINAPI": (0, "Windows keyword"),
   60:   "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
   61:   "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
   62:   "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
   63:   "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
   64:   "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
   65:   "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
   66:   "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
   67:   "XMLCALL": (0, "Special macro for win32 calls"),
   68:   "XSLTCALL": (0, "Special macro for win32 calls"),
   69:   "XMLCDECL": (0, "Special macro for win32 calls"),
   70:   "EXSLTCALL": (0, "Special macro for win32 calls"),
   71:   "__declspec": (3, "Windows keyword"),
   72:   "__stdcall": (0, "Windows keyword"),
   73:   "ATTRIBUTE_UNUSED": (0, "macro keyword"),
   74:   "LIBEXSLT_PUBLIC": (0, "macro keyword"),
   75:   "X_IN_Y": (5, "macro function builder"),
   76:   "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
   77:   "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
   78:   "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
   79:   "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
   80: }
   81: 
   82: def escape(raw):
   83:     raw = string.replace(raw, '&', '&')
   84:     raw = string.replace(raw, '<', '&lt;')
   85:     raw = string.replace(raw, '>', '&gt;')
   86:     raw = string.replace(raw, "'", '&apos;')
   87:     raw = string.replace(raw, '"', '&quot;')
   88:     return raw
   89: 
   90: def uniq(items):
   91:     d = {}
   92:     for item in items:
   93:         d[item]=1
   94:     return d.keys()
   95: 
   96: class identifier:
   97:     def __init__(self, name, header=None, module=None, type=None, lineno = 0,
   98:                  info=None, extra=None, conditionals = None):
   99:         self.name = name
  100: 	self.header = header
  101: 	self.module = module
  102: 	self.type = type
  103: 	self.info = info
  104: 	self.extra = extra
  105: 	self.lineno = lineno
  106: 	self.static = 0
  107: 	if conditionals == None or len(conditionals) == 0:
  108: 	    self.conditionals = None
  109: 	else:
  110: 	    self.conditionals = conditionals[:]
  111: 	if self.name == debugsym:
  112: 	    print "=> define %s : %s" % (debugsym, (module, type, info,
  113: 	                                 extra, conditionals))
  114: 
  115:     def __repr__(self):
  116:         r = "%s %s:" % (self.type, self.name)
  117: 	if self.static:
  118: 	    r = r + " static"
  119: 	if self.module != None:
  120: 	    r = r + " from %s" % (self.module)
  121: 	if self.info != None:
  122: 	    r = r + " " +  `self.info`
  123: 	if self.extra != None:
  124: 	    r = r + " " + `self.extra`
  125: 	if self.conditionals != None:
  126: 	    r = r + " " + `self.conditionals`
  127: 	return r
  128: 
  129: 
  130:     def set_header(self, header):
  131:         self.header = header
  132:     def set_module(self, module):
  133:         self.module = module
  134:     def set_type(self, type):
  135:         self.type = type
  136:     def set_info(self, info):
  137:         self.info = info
  138:     def set_extra(self, extra):
  139:         self.extra = extra
  140:     def set_lineno(self, lineno):
  141:         self.lineno = lineno
  142:     def set_static(self, static):
  143:         self.static = static
  144:     def set_conditionals(self, conditionals):
  145: 	if conditionals == None or len(conditionals) == 0:
  146: 	    self.conditionals = None
  147: 	else:
  148: 	    self.conditionals = conditionals[:]
  149: 
  150:     def get_name(self):
  151:         return self.name
  152:     def get_header(self):
  153:         return self.module
  154:     def get_module(self):
  155:         return self.module
  156:     def get_type(self):
  157:         return self.type
  158:     def get_info(self):
  159:         return self.info
  160:     def get_lineno(self):
  161:         return self.lineno
  162:     def get_extra(self):
  163:         return self.extra
  164:     def get_static(self):
  165:         return self.static
  166:     def get_conditionals(self):
  167:         return self.conditionals
  168: 
  169:     def update(self, header, module, type = None, info = None, extra=None,
  170:                conditionals=None):
  171: 	if self.name == debugsym:
  172: 	    print "=> update %s : %s" % (debugsym, (module, type, info,
  173: 	                                 extra, conditionals))
  174:         if header != None and self.header == None:
  175: 	    self.set_header(module)
  176:         if module != None and (self.module == None or self.header == self.module):
  177: 	    self.set_module(module)
  178:         if type != None and self.type == None:
  179: 	    self.set_type(type)
  180:         if info != None:
  181: 	    self.set_info(info)
  182:         if extra != None:
  183: 	    self.set_extra(extra)
  184:         if conditionals != None:
  185: 	    self.set_conditionals(conditionals)
  186: 
  187: class index:
  188:     def __init__(self, name = "noname"):
  189:         self.name = name
  190:         self.identifiers = {}
  191:         self.functions = {}
  192: 	self.variables = {}
  193: 	self.includes = {}
  194: 	self.structs = {}
  195: 	self.enums = {}
  196: 	self.typedefs = {}
  197: 	self.macros = {}
  198: 	self.references = {}
  199: 	self.info = {}
  200: 
  201:     def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
  202:         if name[0:2] == '__':
  203: 	    return None
  204:         d = None
  205:         try:
  206: 	   d = self.identifiers[name]
  207: 	   d.update(header, module, type, lineno, info, extra, conditionals)
  208: 	except:
  209: 	   d = identifier(name, header, module, type, lineno, info, extra, conditionals)
  210: 	   self.identifiers[name] = d
  211: 
  212: 	if d != None and static == 1:
  213: 	    d.set_static(1)
  214: 
  215: 	if d != None and name != None and type != None:
  216: 	    self.references[name] = d
  217: 
  218: 	if name == debugsym:
  219: 	    print "New ref: %s" % (d)
  220: 
  221: 	return d
  222: 
  223:     def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
  224:         if name[0:2] == '__':
  225: 	    return None
  226:         d = None
  227:         try:
  228: 	   d = self.identifiers[name]
  229: 	   d.update(header, module, type, lineno, info, extra, conditionals)
  230: 	except:
  231: 	   d = identifier(name, header, module, type, lineno, info, extra, conditionals)
  232: 	   self.identifiers[name] = d
  233: 
  234: 	if d != None and static == 1:
  235: 	    d.set_static(1)
  236: 
  237: 	if d != None and name != None and type != None:
  238: 	    if type == "function":
  239: 	        self.functions[name] = d
  240: 	    elif type == "functype":
  241: 	        self.functions[name] = d
  242: 	    elif type == "variable":
  243: 	        self.variables[name] = d
  244: 	    elif type == "include":
  245: 	        self.includes[name] = d
  246: 	    elif type == "struct":
  247: 	        self.structs[name] = d
  248: 	    elif type == "enum":
  249: 	        self.enums[name] = d
  250: 	    elif type == "typedef":
  251: 	        self.typedefs[name] = d
  252: 	    elif type == "macro":
  253: 	        self.macros[name] = d
  254: 	    else:
  255: 	        print "Unable to register type ", type
  256: 
  257: 	if name == debugsym:
  258: 	    print "New symbol: %s" % (d)
  259: 
  260: 	return d
  261: 
  262:     def merge(self, idx):
  263:         for id in idx.functions.keys():
  264:               #
  265:               # macro might be used to override functions or variables
  266:               # definitions
  267:               #
  268: 	     if self.macros.has_key(id):
  269: 	         del self.macros[id]
  270: 	     if self.functions.has_key(id):
  271: 	         print "function %s from %s redeclared in %s" % (
  272: 		    id, self.functions[id].header, idx.functions[id].header)
  273: 	     else:
  274: 	         self.functions[id] = idx.functions[id]
  275: 		 self.identifiers[id] = idx.functions[id]
  276:         for id in idx.variables.keys():
  277:               #
  278:               # macro might be used to override functions or variables
  279:               # definitions
  280:               #
  281: 	     if self.macros.has_key(id):
  282: 	         del self.macros[id]
  283: 	     if self.variables.has_key(id):
  284: 	         print "variable %s from %s redeclared in %s" % (
  285: 		    id, self.variables[id].header, idx.variables[id].header)
  286: 	     else:
  287: 	         self.variables[id] = idx.variables[id]
  288: 		 self.identifiers[id] = idx.variables[id]
  289:         for id in idx.structs.keys():
  290: 	     if self.structs.has_key(id):
  291: 	         print "struct %s from %s redeclared in %s" % (
  292: 		    id, self.structs[id].header, idx.structs[id].header)
  293: 	     else:
  294: 	         self.structs[id] = idx.structs[id]
  295: 		 self.identifiers[id] = idx.structs[id]
  296:         for id in idx.typedefs.keys():
  297: 	     if self.typedefs.has_key(id):
  298: 	         print "typedef %s from %s redeclared in %s" % (
  299: 		    id, self.typedefs[id].header, idx.typedefs[id].header)
  300: 	     else:
  301: 	         self.typedefs[id] = idx.typedefs[id]
  302: 		 self.identifiers[id] = idx.typedefs[id]
  303:         for id in idx.macros.keys():
  304:               #
  305:               # macro might be used to override functions or variables
  306:               # definitions
  307:               #
  308:              if self.variables.has_key(id):
  309:                  continue
  310:              if self.functions.has_key(id):
  311:                  continue
  312:              if self.enums.has_key(id):
  313:                  continue
  314: 	     if self.macros.has_key(id):
  315: 	         print "macro %s from %s redeclared in %s" % (
  316: 		    id, self.macros[id].header, idx.macros[id].header)
  317: 	     else:
  318: 	         self.macros[id] = idx.macros[id]
  319: 		 self.identifiers[id] = idx.macros[id]
  320:         for id in idx.enums.keys():
  321: 	     if self.enums.has_key(id):
  322: 	         print "enum %s from %s redeclared in %s" % (
  323: 		    id, self.enums[id].header, idx.enums[id].header)
  324: 	     else:
  325: 	         self.enums[id] = idx.enums[id]
  326: 		 self.identifiers[id] = idx.enums[id]
  327: 
  328:     def merge_public(self, idx):
  329:         for id in idx.functions.keys():
  330: 	     if self.functions.has_key(id):
  331: 	         # check that function condition agrees with header
  332: 	         if idx.functions[id].conditionals != \
  333: 		    self.functions[id].conditionals:
  334: 		     print "Header condition differs from Function for %s:" \
  335: 		        % id
  336: 		     print "  H: %s" % self.functions[id].conditionals
  337: 		     print "  C: %s" % idx.functions[id].conditionals
  338: 	         up = idx.functions[id]
  339: 	         self.functions[id].update(None, up.module, up.type, up.info, up.extra)
  340: 	 #     else:
  341: 	 #         print "Function %s from %s is not declared in headers" % (
  342: 	 #	        id, idx.functions[id].module)
  343: 	 # TODO: do the same for variables.
  344: 
  345:     def analyze_dict(self, type, dict):
  346:         count = 0
  347: 	public = 0
  348:         for name in dict.keys():
  349: 	    id = dict[name]
  350: 	    count = count + 1
  351: 	    if id.static == 0:
  352: 	        public = public + 1
  353:         if count != public:
  354: 	    print "  %d %s , %d public" % (count, type, public)
  355: 	elif count != 0:
  356: 	    print "  %d public %s" % (count, type)
  357: 
  358: 
  359:     def analyze(self):
  360: 	self.analyze_dict("functions", self.functions)
  361: 	self.analyze_dict("variables", self.variables)
  362: 	self.analyze_dict("structs", self.structs)
  363: 	self.analyze_dict("typedefs", self.typedefs)
  364: 	self.analyze_dict("macros", self.macros)
  365: 
  366: class CLexer:
  367:     """A lexer for the C language, tokenize the input by reading and
  368:        analyzing it line by line"""
  369:     def __init__(self, input):
  370:         self.input = input
  371: 	self.tokens = []
  372: 	self.line = ""
  373: 	self.lineno = 0
  374: 
  375:     def getline(self):
  376:         line = ''
  377: 	while line == '':
  378: 	    line = self.input.readline()
  379: 	    if not line:
  380: 		return None
  381: 	    self.lineno = self.lineno + 1
  382: 	    line = string.lstrip(line)
  383: 	    line = string.rstrip(line)
  384: 	    if line == '':
  385: 	        continue
  386: 	    while line[-1] == '\\':
  387: 	        line = line[:-1]
  388: 		n = self.input.readline()
  389: 		self.lineno = self.lineno + 1
  390: 		n = string.lstrip(n)
  391: 		n = string.rstrip(n)
  392: 		if not n:
  393: 		    break
  394: 		else:
  395: 		    line = line + n
  396:         return line
  397: 
  398:     def getlineno(self):
  399:         return self.lineno
  400: 
  401:     def push(self, token):
  402:         self.tokens.insert(0, token);
  403: 
  404:     def debug(self):
  405:         print "Last token: ", self.last
  406: 	print "Token queue: ", self.tokens
  407: 	print "Line %d end: " % (self.lineno), self.line
  408: 
  409:     def token(self):
  410:         while self.tokens == []:
  411: 	    if self.line == "":
  412: 		line = self.getline()
  413: 	    else:
  414: 	        line = self.line
  415: 		self.line = ""
  416: 	    if line == None:
  417: 	        return None
  418: 
  419: 	    if line[0] == '#':
  420: 	        self.tokens = map((lambda x: ('preproc', x)),
  421: 		                  string.split(line))
  422: 		break;
  423: 	    l = len(line)
  424: 	    if line[0] == '"' or line[0] == "'":
  425: 	        end = line[0]
  426: 	        line = line[1:]
  427: 		found = 0
  428: 		tok = ""
  429: 		while found == 0:
  430: 		    i = 0
  431: 		    l = len(line)
  432: 		    while i < l:
  433: 			if line[i] == end:
  434: 			    self.line = line[i+1:]
  435: 			    line = line[:i]
  436: 			    l = i
  437: 			    found = 1
  438: 			    break
  439: 			if line[i] == '\\':
  440: 			    i = i + 1
  441: 			i = i + 1
  442: 		    tok = tok + line
  443: 		    if found == 0:
  444: 		        line = self.getline()
  445: 			if line == None:
  446: 			    return None
  447: 		self.last = ('string', tok)
  448: 		return self.last
  449: 
  450: 	    if l >= 2 and line[0] == '/' and line[1] == '*':
  451: 	        line = line[2:]
  452: 		found = 0
  453: 		tok = ""
  454: 		while found == 0:
  455: 		    i = 0
  456: 		    l = len(line)
  457: 		    while i < l:
  458: 			if line[i] == '*' and i+1 < l and line[i+1] == '/':
  459: 			    self.line = line[i+2:]
  460: 			    line = line[:i-1]
  461: 			    l = i
  462: 			    found = 1
  463: 			    break
  464: 			i = i + 1
  465: 	            if tok != "":
  466: 		        tok = tok + "\n"
  467: 		    tok = tok + line
  468: 		    if found == 0:
  469: 		        line = self.getline()
  470: 			if line == None:
  471: 			    return None
  472: 		self.last = ('comment', tok)
  473: 		return self.last
  474: 	    if l >= 2 and line[0] == '/' and line[1] == '/':
  475: 	        line = line[2:]
  476: 		self.last = ('comment', line)
  477: 		return self.last
  478: 	    i = 0
  479: 	    while i < l:
  480: 	        if line[i] == '/' and i+1 < l and line[i+1] == '/':
  481: 		    self.line = line[i:]
  482: 		    line = line[:i]
  483: 		    break
  484: 	        if line[i] == '/' and i+1 < l and line[i+1] == '*':
  485: 		    self.line = line[i:]
  486: 		    line = line[:i]
  487: 		    break
  488: 		if line[i] == '"' or line[i] == "'":
  489: 		    self.line = line[i:]
  490: 		    line = line[:i]
  491: 		    break
  492: 		i = i + 1
  493: 	    l = len(line)
  494: 	    i = 0
  495: 	    while i < l:
  496: 	        if line[i] == ' ' or line[i] == '\t':
  497: 		    i = i + 1
  498: 		    continue
  499: 		o = ord(line[i])
  500: 		if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
  501: 		   (o >= 48 and o <= 57):
  502: 		    s = i
  503: 		    while i < l:
  504: 			o = ord(line[i])
  505: 			if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
  506: 			   (o >= 48 and o <= 57) or string.find(
  507: 			       " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
  508: 			    i = i + 1
  509: 			else:
  510: 			    break
  511: 		    self.tokens.append(('name', line[s:i]))
  512: 		    continue
  513: 		if string.find("(){}:;,[]", line[i]) != -1:
  514: #                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
  515: #		    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
  516: #		    line[i] == ',' or line[i] == '[' or line[i] == ']':
  517: 		    self.tokens.append(('sep', line[i]))
  518: 		    i = i + 1
  519: 		    continue
  520: 		if string.find("+-*><=/%&!|.", line[i]) != -1:
  521: #                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
  522: #		    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
  523: #		    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
  524: #		    line[i] == '!' or line[i] == '|' or line[i] == '.':
  525: 		    if line[i] == '.' and  i + 2 < l and \
  526: 		       line[i+1] == '.' and line[i+2] == '.':
  527: 			self.tokens.append(('name', '...'))
  528: 			i = i + 3
  529: 			continue
  530: 
  531: 		    j = i + 1
  532: 		    if j < l and (
  533: 		       string.find("+-*><=/%&!|", line[j]) != -1):
  534: #		        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
  535: #			line[j] == '>' or line[j] == '<' or line[j] == '=' or \
  536: #			line[j] == '/' or line[j] == '%' or line[j] == '&' or \
  537: #			line[j] == '!' or line[j] == '|'):
  538: 			self.tokens.append(('op', line[i:j+1]))
  539: 			i = j + 1
  540: 		    else:
  541: 			self.tokens.append(('op', line[i]))
  542: 			i = i + 1
  543: 		    continue
  544: 		s = i
  545: 		while i < l:
  546: 		    o = ord(line[i])
  547: 		    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
  548: 		       (o >= 48 and o <= 57) or (
  549: 		        string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
  550: #		         line[i] != ' ' and line[i] != '\t' and
  551: #			 line[i] != '(' and line[i] != ')' and
  552: #			 line[i] != '{'  and line[i] != '}' and
  553: #			 line[i] != ':' and line[i] != ';' and
  554: #			 line[i] != ',' and line[i] != '+' and
  555: #			 line[i] != '-' and line[i] != '*' and
  556: #			 line[i] != '/' and line[i] != '%' and
  557: #			 line[i] != '&' and line[i] != '!' and
  558: #			 line[i] != '|' and line[i] != '[' and
  559: #			 line[i] != ']' and line[i] != '=' and
  560: #			 line[i] != '*' and line[i] != '>' and
  561: #			 line[i] != '<'):
  562: 			i = i + 1
  563: 		    else:
  564: 		        break
  565: 		self.tokens.append(('name', line[s:i]))
  566: 
  567: 	tok = self.tokens[0]
  568: 	self.tokens = self.tokens[1:]
  569: 	self.last = tok
  570: 	return tok
  571: 
  572: class CParser:
  573:     """The C module parser"""
  574:     def __init__(self, filename, idx = None):
  575:         self.filename = filename
  576: 	if len(filename) > 2 and filename[-2:] == '.h':
  577: 	    self.is_header = 1
  578: 	else:
  579: 	    self.is_header = 0
  580:         self.input = open(filename)
  581: 	self.lexer = CLexer(self.input)
  582: 	if idx == None:
  583: 	    self.index = index()
  584: 	else:
  585: 	    self.index = idx
  586: 	self.top_comment = ""
  587: 	self.last_comment = ""
  588: 	self.comment = None
  589: 	self.collect_ref = 0
  590: 	self.no_error = 0
  591: 	self.conditionals = []
  592: 	self.defines = []
  593: 
  594:     def collect_references(self):
  595:         self.collect_ref = 1
  596: 
  597:     def stop_error(self):
  598:         self.no_error = 1
  599: 
  600:     def start_error(self):
  601:         self.no_error = 0
  602: 
  603:     def lineno(self):
  604:         return self.lexer.getlineno()
  605: 
  606:     def index_add(self, name, module, static, type, info=None, extra = None):
  607: 	if self.is_header == 1:
  608: 	    self.index.add(name, module, module, static, type, self.lineno(),
  609: 			   info, extra, self.conditionals)
  610: 	else:
  611: 	    self.index.add(name, None, module, static, type, self.lineno(),
  612: 			   info, extra, self.conditionals)
  613: 
  614:     def index_add_ref(self, name, module, static, type, info=None,
  615:                       extra = None):
  616: 	if self.is_header == 1:
  617: 	    self.index.add_ref(name, module, module, static, type,
  618: 	                       self.lineno(), info, extra, self.conditionals)
  619: 	else:
  620: 	    self.index.add_ref(name, None, module, static, type, self.lineno(),
  621: 			       info, extra, self.conditionals)
  622: 
  623:     def warning(self, msg):
  624:         if self.no_error:
  625: 	    return
  626: 	print msg
  627: 
  628:     def error(self, msg, token=-1):
  629:         if self.no_error:
  630: 	    return
  631: 
  632:         print "Parse Error: " + msg
  633: 	if token != -1:
  634: 	    print "Got token ", token
  635: 	self.lexer.debug()
  636: 	sys.exit(1)
  637: 
  638:     def debug(self, msg, token=-1):
  639:         print "Debug: " + msg
  640: 	if token != -1:
  641: 	    print "Got token ", token
  642: 	self.lexer.debug()
  643: 
  644:     def parseTopComment(self, comment):
  645: 	res = {}
  646: 	lines = string.split(comment, "\n")
  647: 	item = None
  648: 	for line in lines:
  649: 	    while line != "" and (line[0] == ' ' or line[0] == '\t'):
  650: 		line = line[1:]
  651: 	    while line != "" and line[0] == '*':
  652: 		line = line[1:]
  653: 	    while line != "" and (line[0] == ' ' or line[0] == '\t'):
  654: 		line = line[1:]
  655: 	    try:
  656: 		(it, line) = string.split(line, ":", 1)
  657: 		item = it
  658: 		while line != "" and (line[0] == ' ' or line[0] == '\t'):
  659: 		    line = line[1:]
  660: 		if res.has_key(item):
  661: 		    res[item] = res[item] + " " + line
  662: 		else:
  663: 		    res[item] = line
  664: 	    except:
  665: 		if item != None:
  666: 		    if res.has_key(item):
  667: 			res[item] = res[item] + " " + line
  668: 		    else:
  669: 			res[item] = line
  670: 	self.index.info = res
  671: 
  672:     def parseComment(self, token):
  673:         if self.top_comment == "":
  674: 	    self.top_comment = token[1]
  675: 	if self.comment == None or token[1][0] == '*':
  676: 	    self.comment = token[1];
  677: 	else:
  678: 	    self.comment = self.comment + token[1]
  679: 	token = self.lexer.token()
  680: 
  681:         if string.find(self.comment, "DOC_DISABLE") != -1:
  682: 	    self.stop_error()
  683: 
  684:         if string.find(self.comment, "DOC_ENABLE") != -1:
  685: 	    self.start_error()
  686: 
  687: 	return token
  688: 
  689:     #
  690:     # Parse a comment block associate to a typedef
  691:     #
  692:     def parseTypeComment(self, name, quiet = 0):
  693:         if name[0:2] == '__':
  694: 	    quiet = 1
  695: 
  696:         args = []
  697: 	desc = ""
  698: 
  699:         if self.comment == None:
  700: 	    if not quiet:
  701: 		self.warning("Missing comment for type %s" % (name))
  702: 	    return((args, desc))
  703:         if self.comment[0] != '*':
  704: 	    if not quiet:
  705: 		self.warning("Missing * in type comment for %s" % (name))
  706: 	    return((args, desc))
  707: 	lines = string.split(self.comment, '\n')
  708: 	if lines[0] == '*':
  709: 	    del lines[0]
  710: 	if lines[0] != "* %s:" % (name):
  711: 	    if not quiet:
  712: 		self.warning("Misformatted type comment for %s" % (name))
  713: 		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
  714: 	    return((args, desc))
  715: 	del lines[0]
  716: 	while len(lines) > 0 and lines[0] == '*':
  717: 	    del lines[0]
  718: 	desc = ""
  719: 	while len(lines) > 0:
  720: 	    l = lines[0]
  721: 	    while len(l) > 0 and l[0] == '*':
  722: 	        l = l[1:]
  723: 	    l = string.strip(l)
  724: 	    desc = desc + " " + l
  725: 	    del lines[0]
  726: 
  727: 	desc = string.strip(desc)
  728: 
  729: 	if quiet == 0:
  730: 	    if desc == "":
  731: 	        self.warning("Type comment for %s lack description of the macro" % (name))
  732: 
  733: 	return(desc)
  734:     #
  735:     # Parse a comment block associate to a macro
  736:     #
  737:     def parseMacroComment(self, name, quiet = 0):
  738:         if name[0:2] == '__':
  739: 	    quiet = 1
  740: 
  741:         args = []
  742: 	desc = ""
  743: 
  744:         if self.comment == None:
  745: 	    if not quiet:
  746: 		self.warning("Missing comment for macro %s" % (name))
  747: 	    return((args, desc))
  748:         if self.comment[0] != '*':
  749: 	    if not quiet:
  750: 		self.warning("Missing * in macro comment for %s" % (name))
  751: 	    return((args, desc))
  752: 	lines = string.split(self.comment, '\n')
  753: 	if lines[0] == '*':
  754: 	    del lines[0]
  755: 	if lines[0] != "* %s:" % (name):
  756: 	    if not quiet:
  757: 		self.warning("Misformatted macro comment for %s" % (name))
  758: 		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
  759: 	    return((args, desc))
  760: 	del lines[0]
  761: 	while lines[0] == '*':
  762: 	    del lines[0]
  763: 	while len(lines) > 0 and lines[0][0:3] == '* @':
  764: 	    l = lines[0][3:]
  765: 	    try:
  766: 	        (arg, desc) = string.split(l, ':', 1)
  767: 		desc=string.strip(desc)
  768: 		arg=string.strip(arg)
  769:             except:
  770: 		if not quiet:
  771: 		    self.warning("Misformatted macro comment for %s" % (name))
  772: 		    self.warning("  problem with '%s'" % (lines[0]))
  773: 		del lines[0]
  774: 		continue
  775: 	    del lines[0]
  776: 	    l = string.strip(lines[0])
  777: 	    while len(l) > 2 and l[0:3] != '* @':
  778: 	        while l[0] == '*':
  779: 		    l = l[1:]
  780: 		desc = desc + ' ' + string.strip(l)
  781: 		del lines[0]
  782: 		if len(lines) == 0:
  783: 		    break
  784: 		l = lines[0]
  785:             args.append((arg, desc))
  786: 	while len(lines) > 0 and lines[0] == '*':
  787: 	    del lines[0]
  788: 	desc = ""
  789: 	while len(lines) > 0:
  790: 	    l = lines[0]
  791: 	    while len(l) > 0 and l[0] == '*':
  792: 	        l = l[1:]
  793: 	    l = string.strip(l)
  794: 	    desc = desc + " " + l
  795: 	    del lines[0]
  796: 
  797: 	desc = string.strip(desc)
  798: 
  799: 	if quiet == 0:
  800: 	    if desc == "":
  801: 	        self.warning("Macro comment for %s lack description of the macro" % (name))
  802: 
  803: 	return((args, desc))
  804: 
  805:      #
  806:      # Parse a comment block and merge the informations found in the
  807:      # parameters descriptions, finally returns a block as complete
  808:      # as possible
  809:      #
  810:     def mergeFunctionComment(self, name, description, quiet = 0):
  811:         if name == 'main':
  812: 	    quiet = 1
  813:         if name[0:2] == '__':
  814: 	    quiet = 1
  815: 
  816: 	(ret, args) = description
  817: 	desc = ""
  818: 	retdesc = ""
  819: 
  820:         if self.comment == None:
  821: 	    if not quiet:
  822: 		self.warning("Missing comment for function %s" % (name))
  823: 	    return(((ret[0], retdesc), args, desc))
  824:         if self.comment[0] != '*':
  825: 	    if not quiet:
  826: 		self.warning("Missing * in function comment for %s" % (name))
  827: 	    return(((ret[0], retdesc), args, desc))
  828: 	lines = string.split(self.comment, '\n')
  829: 	if lines[0] == '*':
  830: 	    del lines[0]
  831: 	if lines[0] != "* %s:" % (name):
  832: 	    if not quiet:
  833: 		self.warning("Misformatted function comment for %s" % (name))
  834: 		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
  835: 	    return(((ret[0], retdesc), args, desc))
  836: 	del lines[0]
  837: 	while lines[0] == '*':
  838: 	    del lines[0]
  839: 	nbargs = len(args)
  840: 	while len(lines) > 0 and lines[0][0:3] == '* @':
  841: 	    l = lines[0][3:]
  842: 	    try:
  843: 	        (arg, desc) = string.split(l, ':', 1)
  844: 		desc=string.strip(desc)
  845: 		arg=string.strip(arg)
  846:             except:
  847: 		if not quiet:
  848: 		    self.warning("Misformatted function comment for %s" % (name))
  849: 		    self.warning("  problem with '%s'" % (lines[0]))
  850: 		del lines[0]
  851: 		continue
  852: 	    del lines[0]
  853: 	    l = string.strip(lines[0])
  854: 	    while len(l) > 2 and l[0:3] != '* @':
  855: 	        while l[0] == '*':
  856: 		    l = l[1:]
  857: 		desc = desc + ' ' + string.strip(l)
  858: 		del lines[0]
  859: 		if len(lines) == 0:
  860: 		    break
  861: 		l = lines[0]
  862: 	    i = 0
  863: 	    while i < nbargs:
  864: 	        if args[i][1] == arg:
  865: 		    args[i] = (args[i][0], arg, desc)
  866: 		    break;
  867: 		i = i + 1
  868: 	    if i >= nbargs:
  869: 		if not quiet:
  870: 		    self.warning("Unable to find arg %s from function comment for %s" % (
  871: 		       arg, name))
  872: 	while len(lines) > 0 and lines[0] == '*':
  873: 	    del lines[0]
  874: 	desc = ""
  875: 	while len(lines) > 0:
  876: 	    l = lines[0]
  877: 	    while len(l) > 0 and l[0] == '*':
  878: 	        l = l[1:]
  879: 	    l = string.strip(l)
  880: 	    if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
  881: 	        try:
  882: 		    l = string.split(l, ' ', 1)[1]
  883: 		except:
  884: 		    l = ""
  885: 		retdesc = string.strip(l)
  886: 		del lines[0]
  887: 		while len(lines) > 0:
  888: 		    l = lines[0]
  889: 		    while len(l) > 0 and l[0] == '*':
  890: 			l = l[1:]
  891: 		    l = string.strip(l)
  892: 		    retdesc = retdesc + " " + l
  893: 		    del lines[0]
  894: 	    else:
  895: 	        desc = desc + " " + l
  896: 		del lines[0]
  897: 
  898: 	retdesc = string.strip(retdesc)
  899: 	desc = string.strip(desc)
  900: 
  901: 	if quiet == 0:
  902: 	     #
  903: 	     # report missing comments
  904: 	     #
  905: 	    i = 0
  906: 	    while i < nbargs:
  907: 	        if args[i][2] == None and args[i][0] != "void" and \
  908: 		   ((args[i][1] != None) or (args[i][1] == '')):
  909: 		    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
  910: 		i = i + 1
  911: 	    if retdesc == "" and ret[0] != "void":
  912: 		self.warning("Function comment for %s lacks description of return value" % (name))
  913: 	    if desc == "":
  914: 	        self.warning("Function comment for %s lacks description of the function" % (name))
  915: 
  916: 	return(((ret[0], retdesc), args, desc))
  917: 
  918:     def parsePreproc(self, token):
  919: 	if debug:
  920: 	    print "=> preproc ", token, self.lexer.tokens
  921:         name = token[1]
  922: 	if name == "#include":
  923: 	    token = self.lexer.token()
  924: 	    if token == None:
  925: 	        return None
  926: 	    if token[0] == 'preproc':
  927: 		self.index_add(token[1], self.filename, not self.is_header,
  928: 		                "include")
  929: 		return self.lexer.token()
  930: 	    return token
  931: 	if name == "#define":
  932: 	    token = self.lexer.token()
  933: 	    if token == None:
  934: 	        return None
  935: 	    if token[0] == 'preproc':
  936: 	         # TODO macros with arguments
  937: 		name = token[1]
  938: 	        lst = []
  939: 		token = self.lexer.token()
  940: 		while token != None and token[0] == 'preproc' and \
  941: 		      token[1][0] != '#':
  942: 		    lst.append(token[1])
  943: 		    token = self.lexer.token()
  944:                 try:
  945: 		    name = string.split(name, '(') [0]
  946:                 except:
  947:                     pass
  948:                 info = self.parseMacroComment(name, not self.is_header)
  949: 		self.index_add(name, self.filename, not self.is_header,
  950: 		                "macro", info)
  951: 		return token
  952: 
  953: 	#
  954: 	# Processing of conditionals modified by Bill 1/1/05
  955: 	#
  956: 	# We process conditionals (i.e. tokens from #ifdef, #ifndef,
  957: 	# #if, #else and #endif) for headers and mainline code,
  958: 	# store the ones from the header in libxml2-api.xml, and later
  959: 	# (in the routine merge_public) verify that the two (header and
  960: 	# mainline code) agree.
  961: 	#
  962: 	# There is a small problem with processing the headers. Some of
  963: 	# the variables are not concerned with enabling / disabling of
  964: 	# library functions (e.g. '__XML_PARSER_H__'), and we don't want
  965: 	# them to be included in libxml2-api.xml, or involved in
  966: 	# the check between the header and the mainline code.  To
  967: 	# accomplish this, we ignore any conditional which doesn't include
  968: 	# the string 'ENABLED'
  969: 	#
  970: 	if name == "#ifdef":
  971: 	    apstr = self.lexer.tokens[0][1]
  972: 	    try:
  973: 	        self.defines.append(apstr)
  974: 		if string.find(apstr, 'ENABLED') != -1:
  975: 		    self.conditionals.append("defined(%s)" % apstr)
  976: 	    except:
  977: 	        pass
  978: 	elif name == "#ifndef":
  979: 	    apstr = self.lexer.tokens[0][1]
  980: 	    try:
  981: 	        self.defines.append(apstr)
  982: 		if string.find(apstr, 'ENABLED') != -1:
  983: 		    self.conditionals.append("!defined(%s)" % apstr)
  984: 	    except:
  985: 	        pass
  986: 	elif name == "#if":
  987: 	    apstr = ""
  988: 	    for tok in self.lexer.tokens:
  989: 	        if apstr != "":
  990: 		    apstr = apstr + " "
  991: 	        apstr = apstr + tok[1]
  992: 	    try:
  993: 	        self.defines.append(apstr)
  994: 		if string.find(apstr, 'ENABLED') != -1:
  995: 		    self.conditionals.append(apstr)
  996: 	    except:
  997: 	        pass
  998: 	elif name == "#else":
  999: 	    if self.conditionals != [] and \
 1000: 	       string.find(self.defines[-1], 'ENABLED') != -1:
 1001: 	        self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
 1002: 	elif name == "#endif":
 1003: 	    if self.conditionals != [] and \
 1004: 	       string.find(self.defines[-1], 'ENABLED') != -1:
 1005: 	        self.conditionals = self.conditionals[:-1]
 1006: 	    self.defines = self.defines[:-1]
 1007: 	token = self.lexer.token()
 1008: 	while token != None and token[0] == 'preproc' and \
 1009: 	    token[1][0] != '#':
 1010: 	    token = self.lexer.token()
 1011: 	return token
 1012: 
 1013:      #
 1014:      # token acquisition on top of the lexer, it handle internally
 1015:      # preprocessor and comments since they are logically not part of
 1016:      # the program structure.
 1017:      #
 1018:     def token(self):
 1019:         global ignored_words
 1020: 
 1021:         token = self.lexer.token()
 1022: 	while token != None:
 1023: 	    if token[0] == 'comment':
 1024: 		token = self.parseComment(token)
 1025: 		continue
 1026: 	    elif token[0] == 'preproc':
 1027: 		token = self.parsePreproc(token)
 1028: 		continue
 1029: 	    elif token[0] == "name" and token[1] == "__const":
 1030: 	        token = ("name", "const")
 1031: 		return token
 1032: 	    elif token[0] == "name" and token[1] == "__attribute":
 1033: 		token = self.lexer.token()
 1034: 		while token != None and token[1] != ";":
 1035: 		    token = self.lexer.token()
 1036: 		return token
 1037: 	    elif token[0] == "name" and ignored_words.has_key(token[1]):
 1038: 	        (n, info) = ignored_words[token[1]]
 1039: 		i = 0
 1040: 		while i < n:
 1041: 		    token = self.lexer.token()
 1042: 		    i = i + 1
 1043: 		token = self.lexer.token()
 1044: 		continue
 1045: 	    else:
 1046: 	        if debug:
 1047: 		    print "=> ", token
 1048: 	        return token
 1049: 	return None
 1050: 
 1051:      #
 1052:      # Parse a typedef, it records the type and its name.
 1053:      #
 1054:     def parseTypedef(self, token):
 1055:         if token == None:
 1056: 	    return None
 1057: 	token = self.parseType(token)
 1058: 	if token == None:
 1059: 	    self.error("parsing typedef")
 1060: 	    return None
 1061: 	base_type = self.type
 1062: 	type = base_type
 1063: 	 #self.debug("end typedef type", token)
 1064: 	while token != None:
 1065: 	    if token[0] == "name":
 1066: 		name = token[1]
 1067: 		signature = self.signature
 1068: 		if signature != None:
 1069: 		    type = string.split(type, '(')[0]
 1070: 		    d = self.mergeFunctionComment(name,
 1071: 			    ((type, None), signature), 1)
 1072: 		    self.index_add(name, self.filename, not self.is_header,
 1073: 				    "functype", d)
 1074: 		else:
 1075: 		    if base_type == "struct":
 1076: 			self.index_add(name, self.filename, not self.is_header,
 1077: 					"struct", type)
 1078: 			base_type = "struct " + name
 1079: 	            else:
 1080: 			# TODO report missing or misformatted comments
 1081: 			info = self.parseTypeComment(name, 1)
 1082: 			self.index_add(name, self.filename, not self.is_header,
 1083: 		                    "typedef", type, info)
 1084: 		token = self.token()
 1085: 	    else:
 1086: 		self.error("parsing typedef: expecting a name")
 1087: 		return token
 1088: 	     #self.debug("end typedef", token)
 1089: 	    if token != None and token[0] == 'sep' and token[1] == ',':
 1090: 	        type = base_type
 1091: 	        token = self.token()
 1092: 		while token != None and token[0] == "op":
 1093: 		    type = type + token[1]
 1094: 		    token = self.token()
 1095: 	    elif token != None and token[0] == 'sep' and token[1] == ';':
 1096: 	        break;
 1097: 	    elif token != None and token[0] == 'name':
 1098: 	        type = base_type
 1099: 	        continue;
 1100: 	    else:
 1101: 		self.error("parsing typedef: expecting ';'", token)
 1102: 		return token
 1103: 	token = self.token()
 1104: 	return token
 1105: 
 1106:      #
 1107:      # Parse a C code block, used for functions it parse till
 1108:      # the balancing } included
 1109:      #
 1110:     def parseBlock(self, token):
 1111:         while token != None:
 1112: 	    if token[0] == "sep" and token[1] == "{":
 1113: 	        token = self.token()
 1114: 		token = self.parseBlock(token)
 1115: 	    elif token[0] == "sep" and token[1] == "}":
 1116: 	        self.comment = None
 1117: 	        token = self.token()
 1118: 		return token
 1119: 	    else:
 1120: 	        if self.collect_ref == 1:
 1121: 		    oldtok = token
 1122: 		    token = self.token()
 1123: 		    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
 1124: 		        if token[0] == "sep" and token[1] == "(":
 1125: 			    self.index_add_ref(oldtok[1], self.filename,
 1126: 			                        0, "function")
 1127: 			    token = self.token()
 1128: 			elif token[0] == "name":
 1129: 			    token = self.token()
 1130: 			    if token[0] == "sep" and (token[1] == ";" or
 1131: 			       token[1] == "," or token[1] == "="):
 1132: 				self.index_add_ref(oldtok[1], self.filename,
 1133: 						    0, "type")
 1134: 		    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
 1135: 			self.index_add_ref(oldtok[1], self.filename,
 1136: 					    0, "typedef")
 1137: 		    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
 1138: 			self.index_add_ref(oldtok[1], self.filename,
 1139: 					    0, "typedef")
 1140: 
 1141: 		else:
 1142: 		    token = self.token()
 1143: 	return token
 1144: 
 1145:      #
 1146:      # Parse a C struct definition till the balancing }
 1147:      #
 1148:     def parseStruct(self, token):
 1149:         fields = []
 1150: 	 #self.debug("start parseStruct", token)
 1151:         while token != None:
 1152: 	    if token[0] == "sep" and token[1] == "{":
 1153: 	        token = self.token()
 1154: 		token = self.parseTypeBlock(token)
 1155: 	    elif token[0] == "sep" and token[1] == "}":
 1156: 		self.struct_fields = fields
 1157: 		 #self.debug("end parseStruct", token)
 1158: 		 #print fields
 1159: 	        token = self.token()
 1160: 		return token
 1161: 	    else:
 1162: 	        base_type = self.type
 1163: 		 #self.debug("before parseType", token)
 1164: 		token = self.parseType(token)
 1165: 		 #self.debug("after parseType", token)
 1166: 		if token != None and token[0] == "name":
 1167: 		    fname = token[1]
 1168: 		    token = self.token()
 1169: 		    if token[0] == "sep" and token[1] == ";":
 1170: 		        self.comment = None
 1171: 		        token = self.token()
 1172: 			fields.append((self.type, fname, self.comment))
 1173: 			self.comment = None
 1174: 		    else:
 1175: 		        self.error("parseStruct: expecting ;", token)
 1176: 		elif token != None and token[0] == "sep" and token[1] == "{":
 1177: 		    token = self.token()
 1178: 		    token = self.parseTypeBlock(token)
 1179: 		    if token != None and token[0] == "name":
 1180: 			token = self.token()
 1181: 		    if token != None and token[0] == "sep" and token[1] == ";":
 1182: 			token = self.token()
 1183: 		    else:
 1184: 		        self.error("parseStruct: expecting ;", token)
 1185: 		else:
 1186: 		    self.error("parseStruct: name", token)
 1187: 		    token = self.token()
 1188: 		self.type = base_type;
 1189:         self.struct_fields = fields
 1190: 	 #self.debug("end parseStruct", token)
 1191: 	 #print fields
 1192: 	return token
 1193: 
 1194:      #
 1195:      # Parse a C enum block, parse till the balancing }
 1196:      #
 1197:     def parseEnumBlock(self, token):
 1198:         self.enums = []
 1199: 	name = None
 1200: 	self.comment = None
 1201: 	comment = ""
 1202: 	value = "0"
 1203:         while token != None:
 1204: 	    if token[0] == "sep" and token[1] == "{":
 1205: 	        token = self.token()
 1206: 		token = self.parseTypeBlock(token)
 1207: 	    elif token[0] == "sep" and token[1] == "}":
 1208: 		if name != None:
 1209: 		    if self.comment != None:
 1210: 			comment = self.comment
 1211: 			self.comment = None
 1212: 		    self.enums.append((name, value, comment))
 1213: 	        token = self.token()
 1214: 		return token
 1215: 	    elif token[0] == "name":
 1216: 		    if name != None:
 1217: 			if self.comment != None:
 1218: 			    comment = string.strip(self.comment)
 1219: 			    self.comment = None
 1220: 			self.enums.append((name, value, comment))
 1221: 		    name = token[1]
 1222: 		    comment = ""
 1223: 		    token = self.token()
 1224: 		    if token[0] == "op" and token[1][0] == "=":
 1225: 		        value = ""
 1226: 		        if len(token[1]) > 1:
 1227: 			    value = token[1][1:]
 1228: 		        token = self.token()
 1229: 		        while token[0] != "sep" or (token[1] != ',' and
 1230: 			      token[1] != '}'):
 1231: 			    value = value + token[1]
 1232: 			    token = self.token()
 1233: 		    else:
 1234: 		        try:
 1235: 			    value = "%d" % (int(value) + 1)
 1236: 			except:
 1237: 			    self.warning("Failed to compute value of enum %s" % (name))
 1238: 			    value=""
 1239: 		    if token[0] == "sep" and token[1] == ",":
 1240: 			token = self.token()
 1241: 	    else:
 1242: 	        token = self.token()
 1243: 	return token
 1244: 
 1245:      #
 1246:      # Parse a C definition block, used for structs it parse till
 1247:      # the balancing }
 1248:      #
 1249:     def parseTypeBlock(self, token):
 1250:         while token != None:
 1251: 	    if token[0] == "sep" and token[1] == "{":
 1252: 	        token = self.token()
 1253: 		token = self.parseTypeBlock(token)
 1254: 	    elif token[0] == "sep" and token[1] == "}":
 1255: 	        token = self.token()
 1256: 		return token
 1257: 	    else:
 1258: 	        token = self.token()
 1259: 	return token
 1260: 
 1261:      #
 1262:      # Parse a type: the fact that the type name can either occur after
 1263:      #    the definition or within the definition makes it a little harder
 1264:      #    if inside, the name token is pushed back before returning
 1265:      #
 1266:     def parseType(self, token):
 1267:         self.type = ""
 1268: 	self.struct_fields = []
 1269:         self.signature = None
 1270: 	if token == None:
 1271: 	    return token
 1272: 
 1273: 	while token[0] == "name" and (
 1274: 	      token[1] == "const" or \
 1275: 	      token[1] == "unsigned" or \
 1276: 	      token[1] == "signed"):
 1277: 	    if self.type == "":
 1278: 	        self.type = token[1]
 1279: 	    else:
 1280: 	        self.type = self.type + " " + token[1]
 1281: 	    token = self.token()
 1282: 
 1283:         if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
 1284: 	    if self.type == "":
 1285: 	        self.type = token[1]
 1286: 	    else:
 1287: 	        self.type = self.type + " " + token[1]
 1288: 	    if token[0] == "name" and token[1] == "int":
 1289: 		if self.type == "":
 1290: 		    self.type = tmp[1]
 1291: 		else:
 1292: 		    self.type = self.type + " " + tmp[1]
 1293: 
 1294:         elif token[0] == "name" and token[1] == "struct":
 1295: 	    if self.type == "":
 1296: 	        self.type = token[1]
 1297: 	    else:
 1298: 	        self.type = self.type + " " + token[1]
 1299: 	    token = self.token()
 1300: 	    nametok = None
 1301: 	    if token[0] == "name":
 1302: 	        nametok = token
 1303: 		token = self.token()
 1304: 	    if token != None and token[0] == "sep" and token[1] == "{":
 1305: 		token = self.token()
 1306: 		token = self.parseStruct(token)
 1307: 	    elif token != None and token[0] == "op" and token[1] == "*":
 1308: 	        self.type = self.type + " " + nametok[1] + " *"
 1309: 		token = self.token()
 1310: 		while token != None and token[0] == "op" and token[1] == "*":
 1311: 		    self.type = self.type + " *"
 1312: 		    token = self.token()
 1313: 		if token[0] == "name":
 1314: 		    nametok = token
 1315: 		    token = self.token()
 1316: 		else:
 1317: 		    self.error("struct : expecting name", token)
 1318: 		    return token
 1319: 	    elif token != None and token[0] == "name" and nametok != None:
 1320: 	        self.type = self.type + " " + nametok[1]
 1321: 		return token
 1322: 
 1323: 	    if nametok != None:
 1324: 		self.lexer.push(token)
 1325: 		token = nametok
 1326: 	    return token
 1327: 
 1328:         elif token[0] == "name" and token[1] == "enum":
 1329: 	    if self.type == "":
 1330: 	        self.type = token[1]
 1331: 	    else:
 1332: 	        self.type = self.type + " " + token[1]
 1333: 	    self.enums = []
 1334: 	    token = self.token()
 1335: 	    if token != None and token[0] == "sep" and token[1] == "{":
 1336: 		token = self.token()
 1337: 		token = self.parseEnumBlock(token)
 1338: 	    else:
 1339: 		self.error("parsing enum: expecting '{'", token)
 1340: 	    enum_type = None
 1341: 	    if token != None and token[0] != "name":
 1342: 	        self.lexer.push(token)
 1343: 	        token = ("name", "enum")
 1344: 	    else:
 1345: 	        enum_type = token[1]
 1346: 	    for enum in self.enums:
 1347: 		self.index_add(enum[0], self.filename,
 1348: 			       not self.is_header, "enum",
 1349: 			       (enum[1], enum[2], enum_type))
 1350: 	    return token
 1351: 
 1352: 	elif token[0] == "name":
 1353: 	    if self.type == "":
 1354: 	        self.type = token[1]
 1355: 	    else:
 1356: 	        self.type = self.type + " " + token[1]
 1357: 	else:
 1358: 	    self.error("parsing type %s: expecting a name" % (self.type),
 1359: 	               token)
 1360: 	    return token
 1361: 	token = self.token()
 1362:         while token != None and (token[0] == "op" or
 1363: 	      token[0] == "name" and token[1] == "const"):
 1364: 	    self.type = self.type + " " + token[1]
 1365: 	    token = self.token()
 1366: 
 1367: 	 #
 1368: 	 # if there is a parenthesis here, this means a function type
 1369: 	 #
 1370: 	if token != None and token[0] == "sep" and token[1] == '(':
 1371: 	    self.type = self.type + token[1]
 1372: 	    token = self.token()
 1373: 	    while token != None and token[0] == "op" and token[1] == '*':
 1374: 	        self.type = self.type + token[1]
 1375: 		token = self.token()
 1376: 	    if token == None or token[0] != "name" :
 1377: 		self.error("parsing function type, name expected", token);
 1378: 	        return token
 1379: 	    self.type = self.type + token[1]
 1380: 	    nametok = token
 1381: 	    token = self.token()
 1382: 	    if token != None and token[0] == "sep" and token[1] == ')':
 1383: 		self.type = self.type + token[1]
 1384: 		token = self.token()
 1385: 		if token != None and token[0] == "sep" and token[1] == '(':
 1386: 		    token = self.token()
 1387: 		    type = self.type;
 1388: 		    token = self.parseSignature(token);
 1389: 		    self.type = type;
 1390: 		else:
 1391: 		    self.error("parsing function type, '(' expected", token);
 1392: 		    return token
 1393: 	    else:
 1394: 	        self.error("parsing function type, ')' expected", token);
 1395: 		return token
 1396: 	    self.lexer.push(token)
 1397: 	    token = nametok
 1398: 	    return token
 1399: 
 1400:          #
 1401: 	 # do some lookahead for arrays
 1402: 	 #
 1403: 	if token != None and token[0] == "name":
 1404: 	    nametok = token
 1405: 	    token = self.token()
 1406: 	    if token != None and token[0] == "sep" and token[1] == '[':
 1407: 	        self.type = self.type + nametok[1]
 1408: 		while token != None and token[0] == "sep" and token[1] == '[':
 1409: 		    self.type = self.type + token[1]
 1410: 		    token = self.token()
 1411: 		    while token != None and token[0] != 'sep' and \
 1412: 		          token[1] != ']' and token[1] != ';':
 1413: 			self.type = self.type + token[1]
 1414: 			token = self.token()
 1415: 		if token != None and token[0] == 'sep' and token[1] == ']':
 1416: 		    self.type = self.type + token[1]
 1417: 		    token = self.token()
 1418: 		else:
 1419: 		    self.error("parsing array type, ']' expected", token);
 1420: 		    return token
 1421: 	    elif token != None and token[0] == "sep" and token[1] == ':':
 1422: 	         # remove :12 in case it's a limited int size
 1423: 		token = self.token()
 1424: 		token = self.token()
 1425: 	    self.lexer.push(token)
 1426: 	    token = nametok
 1427: 
 1428: 	return token
 1429: 
 1430:      #
 1431:      # Parse a signature: '(' has been parsed and we scan the type definition
 1432:      #    up to the ')' included
 1433:     def parseSignature(self, token):
 1434:         signature = []
 1435: 	if token != None and token[0] == "sep" and token[1] == ')':
 1436: 	    self.signature = []
 1437: 	    token = self.token()
 1438: 	    return token
 1439: 	while token != None:
 1440: 	    token = self.parseType(token)
 1441: 	    if token != None and token[0] == "name":
 1442: 	        signature.append((self.type, token[1], None))
 1443: 		token = self.token()
 1444: 	    elif token != None and token[0] == "sep" and token[1] == ',':
 1445: 		token = self.token()
 1446: 		continue
 1447: 	    elif token != None and token[0] == "sep" and token[1] == ')':
 1448: 	         # only the type was provided
 1449: 		if self.type == "...":
 1450: 		    signature.append((self.type, "...", None))
 1451: 		else:
 1452: 		    signature.append((self.type, None, None))
 1453: 	    if token != None and token[0] == "sep":
 1454: 	        if token[1] == ',':
 1455: 		    token = self.token()
 1456: 		    continue
 1457: 		elif token[1] == ')':
 1458: 		    token = self.token()
 1459: 		    break
 1460: 	self.signature = signature
 1461: 	return token
 1462: 
 1463:      #
 1464:      # Parse a global definition, be it a type, variable or function
 1465:      # the extern "C" blocks are a bit nasty and require it to recurse.
 1466:      #
 1467:     def parseGlobal(self, token):
 1468:         static = 0
 1469:         if token[1] == 'extern':
 1470: 	    token = self.token()
 1471: 	    if token == None:
 1472: 	        return token
 1473: 	    if token[0] == 'string':
 1474: 	        if token[1] == 'C':
 1475: 		    token = self.token()
 1476: 		    if token == None:
 1477: 			return token
 1478: 		    if token[0] == 'sep' and token[1] == "{":
 1479: 		        token = self.token()
 1480: #			 print 'Entering extern "C line ', self.lineno()
 1481: 			while token != None and (token[0] != 'sep' or
 1482: 			      token[1] != "}"):
 1483: 			    if token[0] == 'name':
 1484: 				token = self.parseGlobal(token)
 1485: 			    else:
 1486: 				self.error(
 1487: 				 "token %s %s unexpected at the top level" % (
 1488: 					token[0], token[1]))
 1489: 				token = self.parseGlobal(token)
 1490: #			 print 'Exiting extern "C" line', self.lineno()
 1491: 			token = self.token()
 1492: 			return token
 1493: 		else:
 1494: 		    return token
 1495: 	elif token[1] == 'static':
 1496: 	    static = 1
 1497: 	    token = self.token()
 1498: 	    if token == None or  token[0] != 'name':
 1499: 	        return token
 1500: 
 1501: 	if token[1] == 'typedef':
 1502: 	    token = self.token()
 1503: 	    return self.parseTypedef(token)
 1504: 	else:
 1505: 	    token = self.parseType(token)
 1506: 	    type_orig = self.type
 1507: 	if token == None or token[0] != "name":
 1508: 	    return token
 1509: 	type = type_orig
 1510: 	self.name = token[1]
 1511: 	token = self.token()
 1512: 	while token != None and (token[0] == "sep" or token[0] == "op"):
 1513: 	    if token[0] == "sep":
 1514: 		if token[1] == "[":
 1515: 		    type = type + token[1]
 1516: 		    token = self.token()
 1517: 		    while token != None and (token[0] != "sep" or \
 1518: 		          token[1] != ";"):
 1519: 			type = type + token[1]
 1520: 			token = self.token()
 1521: 
 1522: 	    if token != None and token[0] == "op" and token[1] == "=":
 1523: 		 #
 1524: 		 # Skip the initialization of the variable
 1525: 		 #
 1526: 		token = self.token()
 1527: 		if token[0] == 'sep' and token[1] == '{':
 1528: 		    token = self.token()
 1529: 		    token = self.parseBlock(token)
 1530: 		else:
 1531: 		    self.comment = None
 1532: 		    while token != None and (token[0] != "sep" or \
 1533: 			  (token[1] != ';' and token[1] != ',')):
 1534: 			    token = self.token()
 1535: 		self.comment = None
 1536: 		if token == None or token[0] != "sep" or (token[1] != ';' and
 1537: 		   token[1] != ','):
 1538: 		    self.error("missing ';' or ',' after value")
 1539: 
 1540: 	    if token != None and token[0] == "sep":
 1541: 		if token[1] == ";":
 1542: 		    self.comment = None
 1543: 		    token = self.token()
 1544: 		    if type == "struct":
 1545: 		        self.index_add(self.name, self.filename,
 1546: 			     not self.is_header, "struct", self.struct_fields)
 1547: 		    else:
 1548: 			self.index_add(self.name, self.filename,
 1549: 			     not self.is_header, "variable", type)
 1550: 		    break
 1551: 		elif token[1] == "(":
 1552: 		    token = self.token()
 1553: 		    token = self.parseSignature(token)
 1554: 		    if token == None:
 1555: 			return None
 1556: 		    if token[0] == "sep" and token[1] == ";":
 1557: 		        d = self.mergeFunctionComment(self.name,
 1558: 				((type, None), self.signature), 1)
 1559: 			self.index_add(self.name, self.filename, static,
 1560: 			                "function", d)
 1561: 			token = self.token()
 1562: 		    elif token[0] == "sep" and token[1] == "{":
 1563: 		        d = self.mergeFunctionComment(self.name,
 1564: 				((type, None), self.signature), static)
 1565: 			self.index_add(self.name, self.filename, static,
 1566: 			                "function", d)
 1567: 			token = self.token()
 1568: 			token = self.parseBlock(token);
 1569: 		elif token[1] == ',':
 1570: 		    self.comment = None
 1571: 		    self.index_add(self.name, self.filename, static,
 1572: 		                    "variable", type)
 1573: 		    type = type_orig
 1574: 		    token = self.token()
 1575: 		    while token != None and token[0] == "sep":
 1576: 		        type = type + token[1]
 1577: 			token = self.token()
 1578: 		    if token != None and token[0] == "name":
 1579: 		        self.name = token[1]
 1580: 			token = self.token()
 1581: 		else:
 1582: 		    break
 1583: 
 1584: 	return token
 1585: 
 1586:     def parse(self):
 1587:         self.warning("Parsing %s" % (self.filename))
 1588:         token = self.token()
 1589: 	while token != None:
 1590:             if token[0] == 'name':
 1591: 	        token = self.parseGlobal(token)
 1592:             else:
 1593: 	        self.error("token %s %s unexpected at the top level" % (
 1594: 		       token[0], token[1]))
 1595: 		token = self.parseGlobal(token)
 1596: 		return
 1597: 	self.parseTopComment(self.top_comment)
 1598:         return self.index
 1599: 
 1600: 
 1601: class docBuilder:
 1602:     """A documentation builder"""
 1603:     def __init__(self, name, directories=['.'], excludes=[]):
 1604:         self.name = name
 1605:         self.directories = directories
 1606: 	self.excludes = excludes + ignored_files.keys()
 1607: 	self.modules = {}
 1608: 	self.headers = {}
 1609: 	self.idx = index()
 1610:         self.xref = {}
 1611: 	self.index = {}
 1612: 	if name == 'libxml2':
 1613: 	    self.basename = 'libxml'
 1614: 	else:
 1615: 	    self.basename = name
 1616: 
 1617:     def indexString(self, id, str):
 1618: 	if str == None:
 1619: 	    return
 1620: 	str = string.replace(str, "'", ' ')
 1621: 	str = string.replace(str, '"', ' ')
 1622: 	str = string.replace(str, "/", ' ')
 1623: 	str = string.replace(str, '*', ' ')
 1624: 	str = string.replace(str, "[", ' ')
 1625: 	str = string.replace(str, "]", ' ')
 1626: 	str = string.replace(str, "(", ' ')
 1627: 	str = string.replace(str, ")", ' ')
 1628: 	str = string.replace(str, "<", ' ')
 1629: 	str = string.replace(str, '>', ' ')
 1630: 	str = string.replace(str, "&", ' ')
 1631: 	str = string.replace(str, '#', ' ')
 1632: 	str = string.replace(str, ",", ' ')
 1633: 	str = string.replace(str, '.', ' ')
 1634: 	str = string.replace(str, ';', ' ')
 1635: 	tokens = string.split(str)
 1636: 	for token in tokens:
 1637: 	    try:
 1638: 		c = token[0]
 1639: 		if string.find(string.letters, c) < 0:
 1640: 		    pass
 1641: 		elif len(token) < 3:
 1642: 		    pass
 1643: 		else:
 1644: 		    lower = string.lower(token)
 1645: 		    # TODO: generalize this a bit
 1646: 		    if lower == 'and' or lower == 'the':
 1647: 			pass
 1648: 		    elif self.xref.has_key(token):
 1649: 			self.xref[token].append(id)
 1650: 		    else:
 1651: 			self.xref[token] = [id]
 1652: 	    except:
 1653: 		pass
 1654: 
 1655:     def analyze(self):
 1656:         print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
 1657: 	self.idx.analyze()
 1658: 
 1659:     def scanHeaders(self):
 1660: 	for header in self.headers.keys():
 1661: 	    parser = CParser(header)
 1662: 	    idx = parser.parse()
 1663: 	    self.headers[header] = idx;
 1664: 	    self.idx.merge(idx)
 1665: 
 1666:     def scanModules(self):
 1667: 	for module in self.modules.keys():
 1668: 	    parser = CParser(module)
 1669: 	    idx = parser.parse()
 1670: 	    # idx.analyze()
 1671: 	    self.modules[module] = idx
 1672: 	    self.idx.merge_public(idx)
 1673: 
 1674:     def scan(self):
 1675:         for directory in self.directories:
 1676: 	    files = glob.glob(directory + "/*.c")
 1677: 	    for file in files:
 1678: 	        skip = 0
 1679: 		for excl in self.excludes:
 1680: 		    if string.find(file, excl) != -1:
 1681: 		        skip = 1;
 1682: 			break
 1683: 		if skip == 0:
 1684: 		    self.modules[file] = None;
 1685: 	    files = glob.glob(directory + "/*.h")
 1686: 	    for file in files:
 1687: 	        skip = 0
 1688: 		for excl in self.excludes:
 1689: 		    if string.find(file, excl) != -1:
 1690: 		        skip = 1;
 1691: 			break
 1692: 		if skip == 0:
 1693: 		    self.headers[file] = None;
 1694: 	self.scanHeaders()
 1695: 	self.scanModules()
 1696: 
 1697:     def modulename_file(self, file):
 1698:         module = os.path.basename(file)
 1699: 	if module[-2:] == '.h':
 1700: 	    module = module[:-2]
 1701: 	elif module[-2:] == '.c':
 1702: 	    module = module[:-2]
 1703: 	return module
 1704: 
 1705:     def serialize_enum(self, output, name):
 1706:         id = self.idx.enums[name]
 1707:         output.write("    <enum name='%s' file='%s'" % (name,
 1708: 	             self.modulename_file(id.header)))
 1709: 	if id.info != None:
 1710: 	    info = id.info
 1711: 	    if info[0] != None and info[0] != '':
 1712: 	        try:
 1713: 		    val = eval(info[0])
 1714: 		except:
 1715: 		    val = info[0]
 1716: 		output.write(" value='%s'" % (val));
 1717: 	    if info[2] != None and info[2] != '':
 1718: 		output.write(" type='%s'" % info[2]);
 1719: 	    if info[1] != None and info[1] != '':
 1720: 		output.write(" info='%s'" % escape(info[1]));
 1721:         output.write("/>\n")
 1722: 
 1723:     def serialize_macro(self, output, name):
 1724:         id = self.idx.macros[name]
 1725:         output.write("    <macro name='%s' file='%s'>\n" % (name,
 1726: 	             self.modulename_file(id.header)))
 1727: 	if id.info != None:
 1728:             try:
 1729: 		(args, desc) = id.info
 1730: 		if desc != None and desc != "":
 1731: 		    output.write("      <info>%s</info>\n" % (escape(desc)))
 1732: 		    self.indexString(name, desc)
 1733: 		for arg in args:
 1734: 		    (name, desc) = arg
 1735: 		    if desc != None and desc != "":
 1736: 			output.write("      <arg name='%s' info='%s'/>\n" % (
 1737: 				     name, escape(desc)))
 1738: 			self.indexString(name, desc)
 1739: 		    else:
 1740: 			output.write("      <arg name='%s'/>\n" % (name))
 1741:             except:
 1742:                 pass
 1743:         output.write("    </macro>\n")
 1744: 
 1745:     def serialize_typedef(self, output, name):
 1746:         id = self.idx.typedefs[name]
 1747: 	if id.info[0:7] == 'struct ':
 1748: 	    output.write("    <struct name='%s' file='%s' type='%s'" % (
 1749: 	             name, self.modulename_file(id.header), id.info))
 1750: 	    name = id.info[7:]
 1751: 	    if self.idx.structs.has_key(name) and ( \
 1752: 	       type(self.idx.structs[name].info) == type(()) or
 1753: 		type(self.idx.structs[name].info) == type([])):
 1754: 	        output.write(">\n");
 1755: 		try:
 1756: 		    for field in self.idx.structs[name].info:
 1757: 			desc = field[2]
 1758: 			self.indexString(name, desc)
 1759: 			if desc == None:
 1760: 			    desc = ''
 1761: 			else:
 1762: 			    desc = escape(desc)
 1763: 			output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
 1764: 		except:
 1765: 		    print "Failed to serialize struct %s" % (name)
 1766: 		output.write("    </struct>\n")
 1767: 	    else:
 1768: 	        output.write("/>\n");
 1769: 	else :
 1770: 	    output.write("    <typedef name='%s' file='%s' type='%s'" % (
 1771: 	                 name, self.modulename_file(id.header), id.info))
 1772:             try:
 1773: 		desc = id.extra
 1774: 		if desc != None and desc != "":
 1775: 		    output.write(">\n      <info>%s</info>\n" % (escape(desc)))
 1776: 		    output.write("    </typedef>\n")
 1777: 		else:
 1778: 		    output.write("/>\n")
 1779: 	    except:
 1780: 		output.write("/>\n")
 1781: 
 1782:     def serialize_variable(self, output, name):
 1783:         id = self.idx.variables[name]
 1784: 	if id.info != None:
 1785: 	    output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
 1786: 		    name, self.modulename_file(id.header), id.info))
 1787: 	else:
 1788: 	    output.write("    <variable name='%s' file='%s'/>\n" % (
 1789: 	            name, self.modulename_file(id.header)))
 1790: 
 1791:     def serialize_function(self, output, name):
 1792:         id = self.idx.functions[name]
 1793: 	if name == debugsym:
 1794: 	    print "=>", id
 1795: 
 1796:         output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
 1797: 	             name, self.modulename_file(id.header),
 1798: 		     self.modulename_file(id.module)))
 1799: 	#
 1800: 	# Processing of conditionals modified by Bill 1/1/05
 1801: 	#
 1802: 	if id.conditionals != None:
 1803: 	    apstr = ""
 1804: 	    for cond in id.conditionals:
 1805: 	        if apstr != "":
 1806: 		    apstr = apstr + " &amp;&amp; "
 1807: 		apstr = apstr + cond
 1808: 	    output.write("      <cond>%s</cond>\n"% (apstr));
 1809: 	try:
 1810: 	    (ret, params, desc) = id.info
 1811: 	    if (desc == None or desc == '') and \
 1812: 	       name[0:9] != "xmlThrDef" and name != "xmlDllMain":
 1813: 	        print "%s %s from %s has no description" % (id.type, name,
 1814: 		       self.modulename_file(id.module))
 1815: 
 1816: 	    output.write("      <info>%s</info>\n" % (escape(desc)))
 1817: 	    self.indexString(name, desc)
 1818: 	    if ret[0] != None:
 1819: 	        if ret[0] == "void":
 1820: 		    output.write("      <return type='void'/>\n")
 1821: 		else:
 1822: 		    output.write("      <return type='%s' info='%s'/>\n" % (
 1823: 			     ret[0], escape(ret[1])))
 1824: 		    self.indexString(name, ret[1])
 1825: 	    for param in params:
 1826: 	        if param[0] == 'void':
 1827: 		    continue
 1828: 	        if param[2] == None:
 1829: 		    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
 1830: 		else:
 1831: 		    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
 1832: 		    self.indexString(name, param[2])
 1833: 	except:
 1834: 	    print "Failed to save function %s info: " % name, `id.info`
 1835:         output.write("    </%s>\n" % (id.type))
 1836: 
 1837:     def serialize_exports(self, output, file):
 1838:         module = self.modulename_file(file)
 1839: 	output.write("    <file name='%s'>\n" % (module))
 1840: 	dict = self.headers[file]
 1841: 	if dict.info != None:
 1842: 	    for data in ('Summary', 'Description', 'Author'):
 1843: 		try:
 1844: 		    output.write("     <%s>%s</%s>\n" % (
 1845: 		                 string.lower(data),
 1846: 				 escape(dict.info[data]),
 1847: 				 string.lower(data)))
 1848: 		except:
 1849: 		    print "Header %s lacks a %s description" % (module, data)
 1850: 	    if dict.info.has_key('Description'):
 1851: 	        desc = dict.info['Description']
 1852: 		if string.find(desc, "DEPRECATED") != -1:
 1853: 		    output.write("     <deprecated/>\n")
 1854: 
 1855:         ids = dict.macros.keys()
 1856: 	ids.sort()
 1857: 	for id in uniq(ids):
 1858: 	    # Macros are sometime used to masquerade other types.
 1859: 	    if dict.functions.has_key(id):
 1860: 	        continue
 1861: 	    if dict.variables.has_key(id):
 1862: 	        continue
 1863: 	    if dict.typedefs.has_key(id):
 1864: 	        continue
 1865: 	    if dict.structs.has_key(id):
 1866: 	        continue
 1867: 	    if dict.enums.has_key(id):
 1868: 	        continue
 1869: 	    output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
 1870:         ids = dict.enums.keys()
 1871: 	ids.sort()
 1872: 	for id in uniq(ids):
 1873: 	    output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
 1874:         ids = dict.typedefs.keys()
 1875: 	ids.sort()
 1876: 	for id in uniq(ids):
 1877: 	    output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
 1878:         ids = dict.structs.keys()
 1879: 	ids.sort()
 1880: 	for id in uniq(ids):
 1881: 	    output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
 1882:         ids = dict.variables.keys()
 1883: 	ids.sort()
 1884: 	for id in uniq(ids):
 1885: 	    output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
 1886:         ids = dict.functions.keys()
 1887: 	ids.sort()
 1888: 	for id in uniq(ids):
 1889: 	    output.write("     <exports symbol='%s' type='function'/>\n" % (id))
 1890: 	output.write("    </file>\n")
 1891: 
 1892:     def serialize_xrefs_files(self, output):
 1893:         headers = self.headers.keys()
 1894:         headers.sort()
 1895:         for file in headers:
 1896: 	    module = self.modulename_file(file)
 1897: 	    output.write("    <file name='%s'>\n" % (module))
 1898: 	    dict = self.headers[file]
 1899: 	    ids = uniq(dict.functions.keys() + dict.variables.keys() + \
 1900: 		  dict.macros.keys() + dict.typedefs.keys() + \
 1901: 		  dict.structs.keys() + dict.enums.keys())
 1902: 	    ids.sort()
 1903: 	    for id in ids:
 1904: 		output.write("      <ref name='%s'/>\n" % (id))
 1905: 	    output.write("    </file>\n")
 1906:         pass
 1907: 
 1908:     def serialize_xrefs_functions(self, output):
 1909:         funcs = {}
 1910: 	for name in self.idx.functions.keys():
 1911: 	    id = self.idx.functions[name]
 1912: 	    try:
 1913: 		(ret, params, desc) = id.info
 1914: 		for param in params:
 1915: 		    if param[0] == 'void':
 1916: 			continue
 1917: 		    if funcs.has_key(param[0]):
 1918: 		        funcs[param[0]].append(name)
 1919: 		    else:
 1920: 		        funcs[param[0]] = [name]
 1921: 	    except:
 1922: 	        pass
 1923: 	typ = funcs.keys()
 1924: 	typ.sort()
 1925: 	for type in typ:
 1926: 	    if type == '' or type == 'void' or type == "int" or \
 1927: 	       type == "char *" or type == "const char *" :
 1928: 	        continue
 1929: 	    output.write("    <type name='%s'>\n" % (type))
 1930: 	    ids = funcs[type]
 1931: 	    ids.sort()
 1932: 	    pid = ''	# not sure why we have dups, but get rid of them!
 1933: 	    for id in ids:
 1934: 	        if id != pid:
 1935: 	            output.write("      <ref name='%s'/>\n" % (id))
 1936: 		    pid = id
 1937: 	    output.write("    </type>\n")
 1938: 
 1939:     def serialize_xrefs_constructors(self, output):
 1940:         funcs = {}
 1941: 	for name in self.idx.functions.keys():
 1942: 	    id = self.idx.functions[name]
 1943: 	    try:
 1944: 		(ret, params, desc) = id.info
 1945: 		if ret[0] == "void":
 1946: 		    continue
 1947: 		if funcs.has_key(ret[0]):
 1948: 		    funcs[ret[0]].append(name)
 1949: 		else:
 1950: 		    funcs[ret[0]] = [name]
 1951: 	    except:
 1952: 	        pass
 1953: 	typ = funcs.keys()
 1954: 	typ.sort()
 1955: 	for type in typ:
 1956: 	    if type == '' or type == 'void' or type == "int" or \
 1957: 	       type == "char *" or type == "const char *" :
 1958: 	        continue
 1959: 	    output.write("    <type name='%s'>\n" % (type))
 1960: 	    ids = funcs[type]
 1961: 	    ids.sort()
 1962: 	    for id in ids:
 1963: 	        output.write("      <ref name='%s'/>\n" % (id))
 1964: 	    output.write("    </type>\n")
 1965: 
 1966:     def serialize_xrefs_alpha(self, output):
 1967: 	letter = None
 1968: 	ids = self.idx.identifiers.keys()
 1969: 	ids.sort()
 1970: 	for id in ids:
 1971: 	    if id[0] != letter:
 1972: 		if letter != None:
 1973: 		    output.write("    </letter>\n")
 1974: 		letter = id[0]
 1975: 		output.write("    <letter name='%s'>\n" % (letter))
 1976: 	    output.write("      <ref name='%s'/>\n" % (id))
 1977: 	if letter != None:
 1978: 	    output.write("    </letter>\n")
 1979: 
 1980:     def serialize_xrefs_references(self, output):
 1981:         typ = self.idx.identifiers.keys()
 1982: 	typ.sort()
 1983: 	for id in typ:
 1984: 	    idf = self.idx.identifiers[id]
 1985: 	    module = idf.header
 1986: 	    output.write("    <reference name='%s' href='%s'/>\n" % (id,
 1987: 	                 'html/' + self.basename + '-' +
 1988: 		         self.modulename_file(module) + '.html#' +
 1989: 			 id))
 1990: 
 1991:     def serialize_xrefs_index(self, output):
 1992:         index = self.xref
 1993: 	typ = index.keys()
 1994: 	typ.sort()
 1995: 	letter = None
 1996: 	count = 0
 1997: 	chunk = 0
 1998: 	chunks = []
 1999: 	for id in typ:
 2000: 	    if len(index[id]) > 30:
 2001: 		continue
 2002: 	    if id[0] != letter:
 2003: 		if letter == None or count > 200:
 2004: 		    if letter != None:
 2005: 			output.write("      </letter>\n")
 2006: 			output.write("    </chunk>\n")
 2007: 			count = 0
 2008: 			chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
 2009: 		    output.write("    <chunk name='chunk%s'>\n" % (chunk))
 2010: 		    first_letter = id[0]
 2011: 		    chunk = chunk + 1
 2012: 		elif letter != None:
 2013: 		    output.write("      </letter>\n")
 2014: 		letter = id[0]
 2015: 		output.write("      <letter name='%s'>\n" % (letter))
 2016: 	    output.write("        <word name='%s'>\n" % (id))
 2017: 	    tokens = index[id];
 2018: 	    tokens.sort()
 2019: 	    tok = None
 2020: 	    for token in tokens:
 2021: 		if tok == token:
 2022: 		    continue
 2023: 		tok = token
 2024: 		output.write("          <ref name='%s'/>\n" % (token))
 2025: 		count = count + 1
 2026: 	    output.write("        </word>\n")
 2027: 	if letter != None:
 2028: 	    output.write("      </letter>\n")
 2029: 	    output.write("    </chunk>\n")
 2030: 	    if count != 0:
 2031: 	        chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
 2032: 	    output.write("    <chunks>\n")
 2033: 	    for ch in chunks:
 2034: 		output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
 2035: 			     ch[0], ch[1], ch[2]))
 2036: 	    output.write("    </chunks>\n")
 2037: 
 2038:     def serialize_xrefs(self, output):
 2039: 	output.write("  <references>\n")
 2040: 	self.serialize_xrefs_references(output)
 2041: 	output.write("  </references>\n")
 2042: 	output.write("  <alpha>\n")
 2043: 	self.serialize_xrefs_alpha(output)
 2044: 	output.write("  </alpha>\n")
 2045: 	output.write("  <constructors>\n")
 2046: 	self.serialize_xrefs_constructors(output)
 2047: 	output.write("  </constructors>\n")
 2048: 	output.write("  <functions>\n")
 2049: 	self.serialize_xrefs_functions(output)
 2050: 	output.write("  </functions>\n")
 2051: 	output.write("  <files>\n")
 2052: 	self.serialize_xrefs_files(output)
 2053: 	output.write("  </files>\n")
 2054: 	output.write("  <index>\n")
 2055: 	self.serialize_xrefs_index(output)
 2056: 	output.write("  </index>\n")
 2057: 
 2058:     def serialize(self):
 2059:         filename = "%s-api.xml" % self.name
 2060:         print "Saving XML description %s" % (filename)
 2061:         output = open(filename, "w")
 2062:         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
 2063:         output.write("<api name='%s'>\n" % self.name)
 2064:         output.write("  <files>\n")
 2065:         headers = self.headers.keys()
 2066:         headers.sort()
 2067:         for file in headers:
 2068:             self.serialize_exports(output, file)
 2069:         output.write("  </files>\n")
 2070:         output.write("  <symbols>\n")
 2071:         macros = self.idx.macros.keys()
 2072:         macros.sort()
 2073:         for macro in macros:
 2074:             self.serialize_macro(output, macro)
 2075:         enums = self.idx.enums.keys()
 2076:         enums.sort()
 2077:         for enum in enums:
 2078:             self.serialize_enum(output, enum)
 2079:         typedefs = self.idx.typedefs.keys()
 2080:         typedefs.sort()
 2081:         for typedef in typedefs:
 2082:             self.serialize_typedef(output, typedef)
 2083:         variables = self.idx.variables.keys()
 2084:         variables.sort()
 2085:         for variable in variables:
 2086:             self.serialize_variable(output, variable)
 2087:         functions = self.idx.functions.keys()
 2088:         functions.sort()
 2089:         for function in functions:
 2090:             self.serialize_function(output, function)
 2091:         output.write("  </symbols>\n")
 2092:         output.write("</api>\n")
 2093:         output.close()
 2094: 
 2095:         filename = "%s-refs.xml" % self.name
 2096:         print "Saving XML Cross References %s" % (filename)
 2097:         output = open(filename, "w")
 2098:         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
 2099:         output.write("<apirefs name='%s'>\n" % self.name)
 2100:         self.serialize_xrefs(output)
 2101:         output.write("</apirefs>\n")
 2102:         output.close()
 2103: 
 2104: 
 2105: def rebuild():
 2106:     builder = None
 2107:     if glob.glob("parser.c") != [] :
 2108:         print "Rebuilding API description for libxml2"
 2109: 	builder = docBuilder("libxml2", [".", "."],
 2110: 	                     ["xmlwin32version.h", "tst.c"])
 2111:     elif glob.glob("../parser.c") != [] :
 2112:         print "Rebuilding API description for libxml2"
 2113: 	builder = docBuilder("libxml2", ["..", "../include/libxml"],
 2114: 	                     ["xmlwin32version.h", "tst.c"])
 2115:     elif glob.glob("../libxslt/transform.c") != [] :
 2116:         print "Rebuilding API description for libxslt"
 2117: 	builder = docBuilder("libxslt", ["../libxslt"],
 2118: 	                     ["win32config.h", "libxslt.h", "tst.c"])
 2119:     else:
 2120:         print "rebuild() failed, unable to guess the module"
 2121: 	return None
 2122:     builder.scan()
 2123:     builder.analyze()
 2124:     builder.serialize()
 2125:     if glob.glob("../libexslt/exslt.c") != [] :
 2126:         extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
 2127: 	extra.scan()
 2128: 	extra.analyze()
 2129: 	extra.serialize()
 2130:     return builder
 2131: 
 2132: #
 2133: # for debugging the parser
 2134: #
 2135: def parse(filename):
 2136:     parser = CParser(filename)
 2137:     idx = parser.parse()
 2138:     return idx
 2139: 
 2140: if __name__ == "__main__":
 2141:     if len(sys.argv) > 1:
 2142:         debug = 1
 2143:         parse(sys.argv[1])
 2144:     else:
 2145: 	rebuild()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>