Annotation of embedaddon/libxml2/python/libxml.py, revision 1.1.1.1
1.1 misho 1: import libxml2mod
2: import types
3: import sys
4:
5: # The root of all libxml2 errors.
6: class libxmlError(Exception): pass
7:
8: #
9: # id() is sometimes negative ...
10: #
11: def pos_id(o):
12: i = id(o)
13: if (i < 0):
14: return (sys.maxint - i)
15: return i
16:
17: #
18: # Errors raised by the wrappers when some tree handling failed.
19: #
20: class treeError(libxmlError):
21: def __init__(self, msg):
22: self.msg = msg
23: def __str__(self):
24: return self.msg
25:
26: class parserError(libxmlError):
27: def __init__(self, msg):
28: self.msg = msg
29: def __str__(self):
30: return self.msg
31:
32: class uriError(libxmlError):
33: def __init__(self, msg):
34: self.msg = msg
35: def __str__(self):
36: return self.msg
37:
38: class xpathError(libxmlError):
39: def __init__(self, msg):
40: self.msg = msg
41: def __str__(self):
42: return self.msg
43:
44: class ioWrapper:
45: def __init__(self, _obj):
46: self.__io = _obj
47: self._o = None
48:
49: def io_close(self):
50: if self.__io == None:
51: return(-1)
52: self.__io.close()
53: self.__io = None
54: return(0)
55:
56: def io_flush(self):
57: if self.__io == None:
58: return(-1)
59: self.__io.flush()
60: return(0)
61:
62: def io_read(self, len = -1):
63: if self.__io == None:
64: return(-1)
65: if len < 0:
66: return(self.__io.read())
67: return(self.__io.read(len))
68:
69: def io_write(self, str, len = -1):
70: if self.__io == None:
71: return(-1)
72: if len < 0:
73: return(self.__io.write(str))
74: return(self.__io.write(str, len))
75:
76: class ioReadWrapper(ioWrapper):
77: def __init__(self, _obj, enc = ""):
78: ioWrapper.__init__(self, _obj)
79: self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
80:
81: def __del__(self):
82: print "__del__"
83: self.io_close()
84: if self._o != None:
85: libxml2mod.xmlFreeParserInputBuffer(self._o)
86: self._o = None
87:
88: def close(self):
89: self.io_close()
90: if self._o != None:
91: libxml2mod.xmlFreeParserInputBuffer(self._o)
92: self._o = None
93:
94: class ioWriteWrapper(ioWrapper):
95: def __init__(self, _obj, enc = ""):
96: # print "ioWriteWrapper.__init__", _obj
97: if type(_obj) == type(''):
98: print "write io from a string"
99: self.o = None
100: elif type(_obj) == types.InstanceType:
101: print "write io from instance of %s" % (_obj.__class__)
102: ioWrapper.__init__(self, _obj)
103: self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
104: else:
105: file = libxml2mod.outputBufferGetPythonFile(_obj)
106: if file != None:
107: ioWrapper.__init__(self, file)
108: else:
109: ioWrapper.__init__(self, _obj)
110: self._o = _obj
111:
112: def __del__(self):
113: # print "__del__"
114: self.io_close()
115: if self._o != None:
116: libxml2mod.xmlOutputBufferClose(self._o)
117: self._o = None
118:
119: def flush(self):
120: self.io_flush()
121: if self._o != None:
122: libxml2mod.xmlOutputBufferClose(self._o)
123: self._o = None
124:
125: def close(self):
126: self.io_flush()
127: if self._o != None:
128: libxml2mod.xmlOutputBufferClose(self._o)
129: self._o = None
130:
131: #
132: # Example of a class to handle SAX events
133: #
134: class SAXCallback:
135: """Base class for SAX handlers"""
136: def startDocument(self):
137: """called at the start of the document"""
138: pass
139:
140: def endDocument(self):
141: """called at the end of the document"""
142: pass
143:
144: def startElement(self, tag, attrs):
145: """called at the start of every element, tag is the name of
146: the element, attrs is a dictionary of the element's attributes"""
147: pass
148:
149: def endElement(self, tag):
150: """called at the start of every element, tag is the name of
151: the element"""
152: pass
153:
154: def characters(self, data):
155: """called when character data have been read, data is the string
156: containing the data, multiple consecutive characters() callback
157: are possible."""
158: pass
159:
160: def cdataBlock(self, data):
161: """called when CDATA section have been read, data is the string
162: containing the data, multiple consecutive cdataBlock() callback
163: are possible."""
164: pass
165:
166: def reference(self, name):
167: """called when an entity reference has been found"""
168: pass
169:
170: def ignorableWhitespace(self, data):
171: """called when potentially ignorable white spaces have been found"""
172: pass
173:
174: def processingInstruction(self, target, data):
175: """called when a PI has been found, target contains the PI name and
176: data is the associated data in the PI"""
177: pass
178:
179: def comment(self, content):
180: """called when a comment has been found, content contains the comment"""
181: pass
182:
183: def externalSubset(self, name, externalID, systemID):
184: """called when a DOCTYPE declaration has been found, name is the
185: DTD name and externalID, systemID are the DTD public and system
186: identifier for that DTd if available"""
187: pass
188:
189: def internalSubset(self, name, externalID, systemID):
190: """called when a DOCTYPE declaration has been found, name is the
191: DTD name and externalID, systemID are the DTD public and system
192: identifier for that DTD if available"""
193: pass
194:
195: def entityDecl(self, name, type, externalID, systemID, content):
196: """called when an ENTITY declaration has been found, name is the
197: entity name and externalID, systemID are the entity public and
198: system identifier for that entity if available, type indicates
199: the entity type, and content reports it's string content"""
200: pass
201:
202: def notationDecl(self, name, externalID, systemID):
203: """called when an NOTATION declaration has been found, name is the
204: notation name and externalID, systemID are the notation public and
205: system identifier for that notation if available"""
206: pass
207:
208: def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
209: """called when an ATTRIBUTE definition has been found"""
210: pass
211:
212: def elementDecl(self, name, type, content):
213: """called when an ELEMENT definition has been found"""
214: pass
215:
216: def entityDecl(self, name, publicId, systemID, notationName):
217: """called when an unparsed ENTITY declaration has been found,
218: name is the entity name and publicId,, systemID are the entity
219: public and system identifier for that entity if available,
220: and notationName indicate the associated NOTATION"""
221: pass
222:
223: def warning(self, msg):
224: #print msg
225: pass
226:
227: def error(self, msg):
228: raise parserError(msg)
229:
230: def fatalError(self, msg):
231: raise parserError(msg)
232:
233: #
234: # This class is the ancestor of all the Node classes. It provides
235: # the basic functionalities shared by all nodes (and handle
236: # gracefylly the exception), like name, navigation in the tree,
237: # doc reference, content access and serializing to a string or URI
238: #
239: class xmlCore:
240: def __init__(self, _obj=None):
241: if _obj != None:
242: self._o = _obj;
243: return
244: self._o = None
245:
246: def __eq__(self, other):
247: if other == None:
248: return False
249: ret = libxml2mod.compareNodesEqual(self._o, other._o)
250: if ret == None:
251: return False
252: return ret == True
253: def __ne__(self, other):
254: if other == None:
255: return True
256: ret = libxml2mod.compareNodesEqual(self._o, other._o)
257: return not ret
258: def __hash__(self):
259: ret = libxml2mod.nodeHash(self._o)
260: return ret
261:
262: def __str__(self):
263: return self.serialize()
264: def get_parent(self):
265: ret = libxml2mod.parent(self._o)
266: if ret == None:
267: return None
268: return xmlNode(_obj=ret)
269: def get_children(self):
270: ret = libxml2mod.children(self._o)
271: if ret == None:
272: return None
273: return xmlNode(_obj=ret)
274: def get_last(self):
275: ret = libxml2mod.last(self._o)
276: if ret == None:
277: return None
278: return xmlNode(_obj=ret)
279: def get_next(self):
280: ret = libxml2mod.next(self._o)
281: if ret == None:
282: return None
283: return xmlNode(_obj=ret)
284: def get_properties(self):
285: ret = libxml2mod.properties(self._o)
286: if ret == None:
287: return None
288: return xmlAttr(_obj=ret)
289: def get_prev(self):
290: ret = libxml2mod.prev(self._o)
291: if ret == None:
292: return None
293: return xmlNode(_obj=ret)
294: def get_content(self):
295: return libxml2mod.xmlNodeGetContent(self._o)
296: getContent = get_content # why is this duplicate naming needed ?
297: def get_name(self):
298: return libxml2mod.name(self._o)
299: def get_type(self):
300: return libxml2mod.type(self._o)
301: def get_doc(self):
302: ret = libxml2mod.doc(self._o)
303: if ret == None:
304: if self.type in ["document_xml", "document_html"]:
305: return xmlDoc(_obj=self._o)
306: else:
307: return None
308: return xmlDoc(_obj=ret)
309: #
310: # Those are common attributes to nearly all type of nodes
311: # defined as python2 properties
312: #
313: import sys
314: if float(sys.version[0:3]) < 2.2:
315: def __getattr__(self, attr):
316: if attr == "parent":
317: ret = libxml2mod.parent(self._o)
318: if ret == None:
319: return None
320: return xmlNode(_obj=ret)
321: elif attr == "properties":
322: ret = libxml2mod.properties(self._o)
323: if ret == None:
324: return None
325: return xmlAttr(_obj=ret)
326: elif attr == "children":
327: ret = libxml2mod.children(self._o)
328: if ret == None:
329: return None
330: return xmlNode(_obj=ret)
331: elif attr == "last":
332: ret = libxml2mod.last(self._o)
333: if ret == None:
334: return None
335: return xmlNode(_obj=ret)
336: elif attr == "next":
337: ret = libxml2mod.next(self._o)
338: if ret == None:
339: return None
340: return xmlNode(_obj=ret)
341: elif attr == "prev":
342: ret = libxml2mod.prev(self._o)
343: if ret == None:
344: return None
345: return xmlNode(_obj=ret)
346: elif attr == "content":
347: return libxml2mod.xmlNodeGetContent(self._o)
348: elif attr == "name":
349: return libxml2mod.name(self._o)
350: elif attr == "type":
351: return libxml2mod.type(self._o)
352: elif attr == "doc":
353: ret = libxml2mod.doc(self._o)
354: if ret == None:
355: if self.type == "document_xml" or self.type == "document_html":
356: return xmlDoc(_obj=self._o)
357: else:
358: return None
359: return xmlDoc(_obj=ret)
360: raise AttributeError,attr
361: else:
362: parent = property(get_parent, None, None, "Parent node")
363: children = property(get_children, None, None, "First child node")
364: last = property(get_last, None, None, "Last sibling node")
365: next = property(get_next, None, None, "Next sibling node")
366: prev = property(get_prev, None, None, "Previous sibling node")
367: properties = property(get_properties, None, None, "List of properies")
368: content = property(get_content, None, None, "Content of this node")
369: name = property(get_name, None, None, "Node name")
370: type = property(get_type, None, None, "Node type")
371: doc = property(get_doc, None, None, "The document this node belongs to")
372:
373: #
374: # Serialization routines, the optional arguments have the following
375: # meaning:
376: # encoding: string to ask saving in a specific encoding
377: # indent: if 1 the serializer is asked to indent the output
378: #
379: def serialize(self, encoding = None, format = 0):
380: return libxml2mod.serializeNode(self._o, encoding, format)
381: def saveTo(self, file, encoding = None, format = 0):
382: return libxml2mod.saveNodeTo(self._o, file, encoding, format)
383:
384: #
385: # Canonicalization routines:
386: #
387: # nodes: the node set (tuple or list) to be included in the
388: # canonized image or None if all document nodes should be
389: # included.
390: # exclusive: the exclusive flag (0 - non-exclusive
391: # canonicalization; otherwise - exclusive canonicalization)
392: # prefixes: the list of inclusive namespace prefixes (strings),
393: # or None if there is no inclusive namespaces (only for
394: # exclusive canonicalization, ignored otherwise)
395: # with_comments: include comments in the result (!=0) or not
396: # (==0)
397: def c14nMemory(self,
398: nodes=None,
399: exclusive=0,
400: prefixes=None,
401: with_comments=0):
402: if nodes:
403: nodes = map(lambda n: n._o, nodes)
404: return libxml2mod.xmlC14NDocDumpMemory(
405: self.get_doc()._o,
406: nodes,
407: exclusive != 0,
408: prefixes,
409: with_comments != 0)
410: def c14nSaveTo(self,
411: file,
412: nodes=None,
413: exclusive=0,
414: prefixes=None,
415: with_comments=0):
416: if nodes:
417: nodes = map(lambda n: n._o, nodes)
418: return libxml2mod.xmlC14NDocSaveTo(
419: self.get_doc()._o,
420: nodes,
421: exclusive != 0,
422: prefixes,
423: with_comments != 0,
424: file)
425:
426: #
427: # Selecting nodes using XPath, a bit slow because the context
428: # is allocated/freed every time but convenient.
429: #
430: def xpathEval(self, expr):
431: doc = self.doc
432: if doc == None:
433: return None
434: ctxt = doc.xpathNewContext()
435: ctxt.setContextNode(self)
436: res = ctxt.xpathEval(expr)
437: ctxt.xpathFreeContext()
438: return res
439:
440: # #
441: # # Selecting nodes using XPath, faster because the context
442: # # is allocated just once per xmlDoc.
443: # #
444: # # Removed: DV memleaks c.f. #126735
445: # #
446: # def xpathEval2(self, expr):
447: # doc = self.doc
448: # if doc == None:
449: # return None
450: # try:
451: # doc._ctxt.setContextNode(self)
452: # except:
453: # doc._ctxt = doc.xpathNewContext()
454: # doc._ctxt.setContextNode(self)
455: # res = doc._ctxt.xpathEval(expr)
456: # return res
457: def xpathEval2(self, expr):
458: return self.xpathEval(expr)
459:
460: # Remove namespaces
461: def removeNsDef(self, href):
462: """
463: Remove a namespace definition from a node. If href is None,
464: remove all of the ns definitions on that node. The removed
465: namespaces are returned as a linked list.
466:
467: Note: If any child nodes referred to the removed namespaces,
468: they will be left with dangling links. You should call
469: renconciliateNs() to fix those pointers.
470:
471: Note: This method does not free memory taken by the ns
472: definitions. You will need to free it manually with the
473: freeNsList() method on the returns xmlNs object.
474: """
475:
476: ret = libxml2mod.xmlNodeRemoveNsDef(self._o, href)
477: if ret is None:return None
478: __tmp = xmlNs(_obj=ret)
479: return __tmp
480:
481: # support for python2 iterators
482: def walk_depth_first(self):
483: return xmlCoreDepthFirstItertor(self)
484: def walk_breadth_first(self):
485: return xmlCoreBreadthFirstItertor(self)
486: __iter__ = walk_depth_first
487:
488: def free(self):
489: try:
490: self.doc._ctxt.xpathFreeContext()
491: except:
492: pass
493: libxml2mod.xmlFreeDoc(self._o)
494:
495:
496: #
497: # implements the depth-first iterator for libxml2 DOM tree
498: #
499: class xmlCoreDepthFirstItertor:
500: def __init__(self, node):
501: self.node = node
502: self.parents = []
503: def __iter__(self):
504: return self
505: def next(self):
506: while 1:
507: if self.node:
508: ret = self.node
509: self.parents.append(self.node)
510: self.node = self.node.children
511: return ret
512: try:
513: parent = self.parents.pop()
514: except IndexError:
515: raise StopIteration
516: self.node = parent.next
517:
518: #
519: # implements the breadth-first iterator for libxml2 DOM tree
520: #
521: class xmlCoreBreadthFirstItertor:
522: def __init__(self, node):
523: self.node = node
524: self.parents = []
525: def __iter__(self):
526: return self
527: def next(self):
528: while 1:
529: if self.node:
530: ret = self.node
531: self.parents.append(self.node)
532: self.node = self.node.next
533: return ret
534: try:
535: parent = self.parents.pop()
536: except IndexError:
537: raise StopIteration
538: self.node = parent.children
539:
540: #
541: # converters to present a nicer view of the XPath returns
542: #
543: def nodeWrap(o):
544: # TODO try to cast to the most appropriate node class
545: name = libxml2mod.type(o)
546: if name == "element" or name == "text":
547: return xmlNode(_obj=o)
548: if name == "attribute":
549: return xmlAttr(_obj=o)
550: if name[0:8] == "document":
551: return xmlDoc(_obj=o)
552: if name == "namespace":
553: return xmlNs(_obj=o)
554: if name == "elem_decl":
555: return xmlElement(_obj=o)
556: if name == "attribute_decl":
557: return xmlAttribute(_obj=o)
558: if name == "entity_decl":
559: return xmlEntity(_obj=o)
560: if name == "dtd":
561: return xmlDtd(_obj=o)
562: return xmlNode(_obj=o)
563:
564: def xpathObjectRet(o):
565: otype = type(o)
566: if otype == type([]):
567: ret = map(xpathObjectRet, o)
568: return ret
569: elif otype == type(()):
570: ret = map(xpathObjectRet, o)
571: return tuple(ret)
572: elif otype == type('') or otype == type(0) or otype == type(0.0):
573: return o
574: else:
575: return nodeWrap(o)
576:
577: #
578: # register an XPath function
579: #
580: def registerXPathFunction(ctxt, name, ns_uri, f):
581: ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
582:
583: #
584: # For the xmlTextReader parser configuration
585: #
586: PARSER_LOADDTD=1
587: PARSER_DEFAULTATTRS=2
588: PARSER_VALIDATE=3
589: PARSER_SUBST_ENTITIES=4
590:
591: #
592: # For the error callback severities
593: #
594: PARSER_SEVERITY_VALIDITY_WARNING=1
595: PARSER_SEVERITY_VALIDITY_ERROR=2
596: PARSER_SEVERITY_WARNING=3
597: PARSER_SEVERITY_ERROR=4
598:
599: #
600: # register the libxml2 error handler
601: #
602: def registerErrorHandler(f, ctx):
603: """Register a Python written function to for error reporting.
604: The function is called back as f(ctx, error). """
605: import sys
606: if not sys.modules.has_key('libxslt'):
607: # normal behaviour when libxslt is not imported
608: ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
609: else:
610: # when libxslt is already imported, one must
611: # use libxst's error handler instead
612: import libxslt
613: ret = libxslt.registerErrorHandler(f,ctx)
614: return ret
615:
616: class parserCtxtCore:
617:
618: def __init__(self, _obj=None):
619: if _obj != None:
620: self._o = _obj;
621: return
622: self._o = None
623:
624: def __del__(self):
625: if self._o != None:
626: libxml2mod.xmlFreeParserCtxt(self._o)
627: self._o = None
628:
629: def setErrorHandler(self,f,arg):
630: """Register an error handler that will be called back as
631: f(arg,msg,severity,reserved).
632:
633: @reserved is currently always None."""
634: libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
635:
636: def getErrorHandler(self):
637: """Return (f,arg) as previously registered with setErrorHandler
638: or (None,None)."""
639: return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
640:
641: def addLocalCatalog(self, uri):
642: """Register a local catalog with the parser"""
643: return libxml2mod.addLocalCatalog(self._o, uri)
644:
645:
646: class ValidCtxtCore:
647:
648: def __init__(self, *args, **kw):
649: pass
650:
651: def setValidityErrorHandler(self, err_func, warn_func, arg=None):
652: """
653: Register error and warning handlers for DTD validation.
654: These will be called back as f(msg,arg)
655: """
656: libxml2mod.xmlSetValidErrors(self._o, err_func, warn_func, arg)
657:
658:
659: class SchemaValidCtxtCore:
660:
661: def __init__(self, *args, **kw):
662: pass
663:
664: def setValidityErrorHandler(self, err_func, warn_func, arg=None):
665: """
666: Register error and warning handlers for Schema validation.
667: These will be called back as f(msg,arg)
668: """
669: libxml2mod.xmlSchemaSetValidErrors(self._o, err_func, warn_func, arg)
670:
671:
672: class relaxNgValidCtxtCore:
673:
674: def __init__(self, *args, **kw):
675: pass
676:
677: def setValidityErrorHandler(self, err_func, warn_func, arg=None):
678: """
679: Register error and warning handlers for RelaxNG validation.
680: These will be called back as f(msg,arg)
681: """
682: libxml2mod.xmlRelaxNGSetValidErrors(self._o, err_func, warn_func, arg)
683:
684:
685: def _xmlTextReaderErrorFunc((f,arg),msg,severity,locator):
686: """Intermediate callback to wrap the locator"""
687: return f(arg,msg,severity,xmlTextReaderLocator(locator))
688:
689: class xmlTextReaderCore:
690:
691: def __init__(self, _obj=None):
692: self.input = None
693: if _obj != None:self._o = _obj;return
694: self._o = None
695:
696: def __del__(self):
697: if self._o != None:
698: libxml2mod.xmlFreeTextReader(self._o)
699: self._o = None
700:
701: def SetErrorHandler(self,f,arg):
702: """Register an error handler that will be called back as
703: f(arg,msg,severity,locator)."""
704: if f is None:
705: libxml2mod.xmlTextReaderSetErrorHandler(\
706: self._o,None,None)
707: else:
708: libxml2mod.xmlTextReaderSetErrorHandler(\
709: self._o,_xmlTextReaderErrorFunc,(f,arg))
710:
711: def GetErrorHandler(self):
712: """Return (f,arg) as previously registered with setErrorHandler
713: or (None,None)."""
714: f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
715: if f is None:
716: return None,None
717: else:
718: # assert f is _xmlTextReaderErrorFunc
719: return arg
720:
721: #
722: # The cleanup now goes though a wrappe in libxml.c
723: #
724: def cleanupParser():
725: libxml2mod.xmlPythonCleanupParser()
726:
727: # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
728: #
729: # Everything before this line comes from libxml.py
730: # Everything after this line is automatically generated
731: #
732: # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
733:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>