PyXR

c:\python24\lib \ xml \ dom \ pulldom.py



0001 import xml.sax
0002 import xml.sax.handler
0003 import types
0004 
0005 try:
0006     _StringTypes = [types.StringType, types.UnicodeType]
0007 except AttributeError:
0008     _StringTypes = [types.StringType]
0009 
0010 START_ELEMENT = "START_ELEMENT"
0011 END_ELEMENT = "END_ELEMENT"
0012 COMMENT = "COMMENT"
0013 START_DOCUMENT = "START_DOCUMENT"
0014 END_DOCUMENT = "END_DOCUMENT"
0015 PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
0016 IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
0017 CHARACTERS = "CHARACTERS"
0018 
0019 class PullDOM(xml.sax.ContentHandler):
0020     _locator = None
0021     document = None
0022 
0023     def __init__(self, documentFactory=None):
0024         from xml.dom import XML_NAMESPACE
0025         self.documentFactory = documentFactory
0026         self.firstEvent = [None, None]
0027         self.lastEvent = self.firstEvent
0028         self.elementStack = []
0029         self.push = self.elementStack.append
0030         try:
0031             self.pop = self.elementStack.pop
0032         except AttributeError:
0033             # use class' pop instead
0034             pass
0035         self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts
0036         self._current_context = self._ns_contexts[-1]
0037         self.pending_events = []
0038 
0039     def pop(self):
0040         result = self.elementStack[-1]
0041         del self.elementStack[-1]
0042         return result
0043 
0044     def setDocumentLocator(self, locator):
0045         self._locator = locator
0046 
0047     def startPrefixMapping(self, prefix, uri):
0048         if not hasattr(self, '_xmlns_attrs'):
0049             self._xmlns_attrs = []
0050         self._xmlns_attrs.append((prefix or 'xmlns', uri))
0051         self._ns_contexts.append(self._current_context.copy())
0052         self._current_context[uri] = prefix or None
0053 
0054     def endPrefixMapping(self, prefix):
0055         self._current_context = self._ns_contexts.pop()
0056 
0057     def startElementNS(self, name, tagName , attrs):
0058         # Retrieve xml namespace declaration attributes.
0059         xmlns_uri = 'http://www.w3.org/2000/xmlns/'
0060         xmlns_attrs = getattr(self, '_xmlns_attrs', None)
0061         if xmlns_attrs is not None:
0062             for aname, value in xmlns_attrs:
0063                 attrs._attrs[(xmlns_uri, aname)] = value
0064             self._xmlns_attrs = []
0065         uri, localname = name
0066         if uri:
0067             # When using namespaces, the reader may or may not
0068             # provide us with the original name. If not, create
0069             # *a* valid tagName from the current context.
0070             if tagName is None:
0071                 prefix = self._current_context[uri]
0072                 if prefix:
0073                     tagName = prefix + ":" + localname
0074                 else:
0075                     tagName = localname
0076             if self.document:
0077                 node = self.document.createElementNS(uri, tagName)
0078             else:
0079                 node = self.buildDocument(uri, tagName)
0080         else:
0081             # When the tagname is not prefixed, it just appears as
0082             # localname
0083             if self.document:
0084                 node = self.document.createElement(localname)
0085             else:
0086                 node = self.buildDocument(None, localname)
0087 
0088         for aname,value in attrs.items():
0089             a_uri, a_localname = aname
0090             if a_uri == xmlns_uri:
0091                 if a_localname == 'xmlns':
0092                     qname = a_localname
0093                 else:
0094                     qname = 'xmlns:' + a_localname
0095                 attr = self.document.createAttributeNS(a_uri, qname)
0096                 node.setAttributeNodeNS(attr)
0097             elif a_uri:
0098                 prefix = self._current_context[a_uri]
0099                 if prefix:
0100                     qname = prefix + ":" + a_localname
0101                 else:
0102                     qname = a_localname
0103                 attr = self.document.createAttributeNS(a_uri, qname)
0104                 node.setAttributeNodeNS(attr)
0105             else:
0106                 attr = self.document.createAttribute(a_localname)
0107                 node.setAttributeNode(attr)
0108             attr.value = value
0109 
0110         self.lastEvent[1] = [(START_ELEMENT, node), None]
0111         self.lastEvent = self.lastEvent[1]
0112         self.push(node)
0113 
0114     def endElementNS(self, name, tagName):
0115         self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
0116         self.lastEvent = self.lastEvent[1]
0117 
0118     def startElement(self, name, attrs):
0119         if self.document:
0120             node = self.document.createElement(name)
0121         else:
0122             node = self.buildDocument(None, name)
0123 
0124         for aname,value in attrs.items():
0125             attr = self.document.createAttribute(aname)
0126             attr.value = value
0127             node.setAttributeNode(attr)
0128 
0129         self.lastEvent[1] = [(START_ELEMENT, node), None]
0130         self.lastEvent = self.lastEvent[1]
0131         self.push(node)
0132 
0133     def endElement(self, name):
0134         self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
0135         self.lastEvent = self.lastEvent[1]
0136 
0137     def comment(self, s):
0138         if self.document:
0139             node = self.document.createComment(s)
0140             self.lastEvent[1] = [(COMMENT, node), None]
0141             self.lastEvent = self.lastEvent[1]
0142         else:
0143             event = [(COMMENT, s), None]
0144             self.pending_events.append(event)
0145 
0146     def processingInstruction(self, target, data):
0147         if self.document:
0148             node = self.document.createProcessingInstruction(target, data)
0149             self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
0150             self.lastEvent = self.lastEvent[1]
0151         else:
0152             event = [(PROCESSING_INSTRUCTION, target, data), None]
0153             self.pending_events.append(event)
0154 
0155     def ignorableWhitespace(self, chars):
0156         node = self.document.createTextNode(chars)
0157         self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
0158         self.lastEvent = self.lastEvent[1]
0159 
0160     def characters(self, chars):
0161         node = self.document.createTextNode(chars)
0162         self.lastEvent[1] = [(CHARACTERS, node), None]
0163         self.lastEvent = self.lastEvent[1]
0164 
0165     def startDocument(self):
0166         if self.documentFactory is None:
0167             import xml.dom.minidom
0168             self.documentFactory = xml.dom.minidom.Document.implementation
0169 
0170     def buildDocument(self, uri, tagname):
0171         # Can't do that in startDocument, since we need the tagname
0172         # XXX: obtain DocumentType
0173         node = self.documentFactory.createDocument(uri, tagname, None)
0174         self.document = node
0175         self.lastEvent[1] = [(START_DOCUMENT, node), None]
0176         self.lastEvent = self.lastEvent[1]
0177         self.push(node)
0178         # Put everything we have seen so far into the document
0179         for e in self.pending_events:
0180             if e[0][0] == PROCESSING_INSTRUCTION:
0181                 _,target,data = e[0]
0182                 n = self.document.createProcessingInstruction(target, data)
0183                 e[0] = (PROCESSING_INSTRUCTION, n)
0184             elif e[0][0] == COMMENT:
0185                 n = self.document.createComment(e[0][1])
0186                 e[0] = (COMMENT, n)
0187             else:
0188                 raise AssertionError("Unknown pending event ",e[0][0])
0189             self.lastEvent[1] = e
0190             self.lastEvent = e
0191         self.pending_events = None
0192         return node.firstChild
0193 
0194     def endDocument(self):
0195         self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
0196         self.pop()
0197 
0198     def clear(self):
0199         "clear(): Explicitly release parsing structures"
0200         self.document = None
0201 
0202 class ErrorHandler:
0203     def warning(self, exception):
0204         print exception
0205     def error(self, exception):
0206         raise exception
0207     def fatalError(self, exception):
0208         raise exception
0209 
0210 class DOMEventStream:
0211     def __init__(self, stream, parser, bufsize):
0212         self.stream = stream
0213         self.parser = parser
0214         self.bufsize = bufsize
0215         if not hasattr(self.parser, 'feed'):
0216             self.getEvent = self._slurp
0217         self.reset()
0218 
0219     def reset(self):
0220         self.pulldom = PullDOM()
0221         # This content handler relies on namespace support
0222         self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
0223         self.parser.setContentHandler(self.pulldom)
0224 
0225     def __getitem__(self, pos):
0226         rc = self.getEvent()
0227         if rc:
0228             return rc
0229         raise IndexError
0230 
0231     def next(self):
0232         rc = self.getEvent()
0233         if rc:
0234             return rc
0235         raise StopIteration
0236 
0237     def __iter__(self):
0238         return self
0239 
0240     def expandNode(self, node):
0241         event = self.getEvent()
0242         parents = [node]
0243         while event:
0244             token, cur_node = event
0245             if cur_node is node:
0246                 return
0247             if token != END_ELEMENT:
0248                 parents[-1].appendChild(cur_node)
0249             if token == START_ELEMENT:
0250                 parents.append(cur_node)
0251             elif token == END_ELEMENT:
0252                 del parents[-1]
0253             event = self.getEvent()
0254 
0255     def getEvent(self):
0256         # use IncrementalParser interface, so we get the desired
0257         # pull effect
0258         if not self.pulldom.firstEvent[1]:
0259             self.pulldom.lastEvent = self.pulldom.firstEvent
0260         while not self.pulldom.firstEvent[1]:
0261             buf = self.stream.read(self.bufsize)
0262             if not buf:
0263                 self.parser.close()
0264                 return None
0265             self.parser.feed(buf)
0266         rc = self.pulldom.firstEvent[1][0]
0267         self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
0268         return rc
0269 
0270     def _slurp(self):
0271         """ Fallback replacement for getEvent() using the
0272             standard SAX2 interface, which means we slurp the
0273             SAX events into memory (no performance gain, but
0274             we are compatible to all SAX parsers).
0275         """
0276         self.parser.parse(self.stream)
0277         self.getEvent = self._emit
0278         return self._emit()
0279 
0280     def _emit(self):
0281         """ Fallback replacement for getEvent() that emits
0282             the events that _slurp() read previously.
0283         """
0284         rc = self.pulldom.firstEvent[1][0]
0285         self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
0286         return rc
0287 
0288     def clear(self):
0289         """clear(): Explicitly release parsing objects"""
0290         self.pulldom.clear()
0291         del self.pulldom
0292         self.parser = None
0293         self.stream = None
0294 
0295 class SAX2DOM(PullDOM):
0296 
0297     def startElementNS(self, name, tagName , attrs):
0298         PullDOM.startElementNS(self, name, tagName, attrs)
0299         curNode = self.elementStack[-1]
0300         parentNode = self.elementStack[-2]
0301         parentNode.appendChild(curNode)
0302 
0303     def startElement(self, name, attrs):
0304         PullDOM.startElement(self, name, attrs)
0305         curNode = self.elementStack[-1]
0306         parentNode = self.elementStack[-2]
0307         parentNode.appendChild(curNode)
0308 
0309     def processingInstruction(self, target, data):
0310         PullDOM.processingInstruction(self, target, data)
0311         node = self.lastEvent[0][1]
0312         parentNode = self.elementStack[-1]
0313         parentNode.appendChild(node)
0314 
0315     def ignorableWhitespace(self, chars):
0316         PullDOM.ignorableWhitespace(self, chars)
0317         node = self.lastEvent[0][1]
0318         parentNode = self.elementStack[-1]
0319         parentNode.appendChild(node)
0320 
0321     def characters(self, chars):
0322         PullDOM.characters(self, chars)
0323         node = self.lastEvent[0][1]
0324         parentNode = self.elementStack[-1]
0325         parentNode.appendChild(node)
0326 
0327 
0328 default_bufsize = (2 ** 14) - 20
0329 
0330 def parse(stream_or_string, parser=None, bufsize=None):
0331     if bufsize is None:
0332         bufsize = default_bufsize
0333     if type(stream_or_string) in _StringTypes:
0334         stream = open(stream_or_string)
0335     else:
0336         stream = stream_or_string
0337     if not parser:
0338         parser = xml.sax.make_parser()
0339     return DOMEventStream(stream, parser, bufsize)
0340 
0341 def parseString(string, parser=None):
0342     try:
0343         from cStringIO import StringIO
0344     except ImportError:
0345         from StringIO import StringIO
0346 
0347     bufsize = len(string)
0348     buf = StringIO(string)
0349     if not parser:
0350         parser = xml.sax.make_parser()
0351     return DOMEventStream(buf, parser, bufsize)
0352 

Generated by PyXR 0.9.4
SourceForge.net Logo