PyXR

c:\python24\lib \ xml \ sax \ expatreader.py



0001 """
0002 SAX driver for the pyexpat C module.  This driver works with
0003 pyexpat.__version__ == '2.22'.
0004 """
0005 
0006 version = "0.20"
0007 
0008 from xml.sax._exceptions import *
0009 from xml.sax.handler import feature_validation, feature_namespaces
0010 from xml.sax.handler import feature_namespace_prefixes
0011 from xml.sax.handler import feature_external_ges, feature_external_pes
0012 from xml.sax.handler import feature_string_interning
0013 from xml.sax.handler import property_xml_string, property_interning_dict
0014 
0015 # xml.parsers.expat does not raise ImportError in Jython
0016 import sys
0017 if sys.platform[:4] == "java":
0018     raise SAXReaderNotAvailable("expat not available in Java", None)
0019 del sys
0020 
0021 try:
0022     from xml.parsers import expat
0023 except ImportError:
0024     raise SAXReaderNotAvailable("expat not supported", None)
0025 else:
0026     if not hasattr(expat, "ParserCreate"):
0027         raise SAXReaderNotAvailable("expat not supported", None)
0028 from xml.sax import xmlreader, saxutils, handler
0029 
0030 AttributesImpl = xmlreader.AttributesImpl
0031 AttributesNSImpl = xmlreader.AttributesNSImpl
0032 
0033 # If we're using a sufficiently recent version of Python, we can use
0034 # weak references to avoid cycles between the parser and content
0035 # handler, otherwise we'll just have to pretend.
0036 try:
0037     import _weakref
0038 except ImportError:
0039     def _mkproxy(o):
0040         return o
0041 else:
0042     import weakref
0043     _mkproxy = weakref.proxy
0044     del weakref, _weakref
0045 
0046 # --- ExpatLocator
0047 
0048 class ExpatLocator(xmlreader.Locator):
0049     """Locator for use with the ExpatParser class.
0050 
0051     This uses a weak reference to the parser object to avoid creating
0052     a circular reference between the parser and the content handler.
0053     """
0054     def __init__(self, parser):
0055         self._ref = _mkproxy(parser)
0056 
0057     def getColumnNumber(self):
0058         parser = self._ref
0059         if parser._parser is None:
0060             return None
0061         return parser._parser.ErrorColumnNumber
0062 
0063     def getLineNumber(self):
0064         parser = self._ref
0065         if parser._parser is None:
0066             return 1
0067         return parser._parser.ErrorLineNumber
0068 
0069     def getPublicId(self):
0070         parser = self._ref
0071         if parser is None:
0072             return None
0073         return parser._source.getPublicId()
0074 
0075     def getSystemId(self):
0076         parser = self._ref
0077         if parser is None:
0078             return None
0079         return parser._source.getSystemId()
0080 
0081 
0082 # --- ExpatParser
0083 
0084 class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
0085     """SAX driver for the pyexpat C module."""
0086 
0087     def __init__(self, namespaceHandling=0, bufsize=2**16-20):
0088         xmlreader.IncrementalParser.__init__(self, bufsize)
0089         self._source = xmlreader.InputSource()
0090         self._parser = None
0091         self._namespaces = namespaceHandling
0092         self._lex_handler_prop = None
0093         self._parsing = 0
0094         self._entity_stack = []
0095         self._external_ges = 1
0096         self._interning = None
0097 
0098     # XMLReader methods
0099 
0100     def parse(self, source):
0101         "Parse an XML document from a URL or an InputSource."
0102         source = saxutils.prepare_input_source(source)
0103 
0104         self._source = source
0105         self.reset()
0106         self._cont_handler.setDocumentLocator(ExpatLocator(self))
0107         xmlreader.IncrementalParser.parse(self, source)
0108 
0109     def prepareParser(self, source):
0110         if source.getSystemId() != None:
0111             self._parser.SetBase(source.getSystemId())
0112 
0113     # Redefined setContentHandler to allow changing handlers during parsing
0114 
0115     def setContentHandler(self, handler):
0116         xmlreader.IncrementalParser.setContentHandler(self, handler)
0117         if self._parsing:
0118             self._reset_cont_handler()
0119 
0120     def getFeature(self, name):
0121         if name == feature_namespaces:
0122             return self._namespaces
0123         elif name == feature_string_interning:
0124             return self._interning is not None
0125         elif name in (feature_validation, feature_external_pes,
0126                       feature_namespace_prefixes):
0127             return 0
0128         elif name == feature_external_ges:
0129             return self._external_ges
0130         raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
0131 
0132     def setFeature(self, name, state):
0133         if self._parsing:
0134             raise SAXNotSupportedException("Cannot set features while parsing")
0135 
0136         if name == feature_namespaces:
0137             self._namespaces = state
0138         elif name == feature_external_ges:
0139             self._external_ges = state
0140         elif name == feature_string_interning:
0141             if state:
0142                 if self._interning is None:
0143                     self._interning = {}
0144             else:
0145                 self._interning = None
0146         elif name == feature_validation:
0147             if state:
0148                 raise SAXNotSupportedException(
0149                     "expat does not support validation")
0150         elif name == feature_external_pes:
0151             if state:
0152                 raise SAXNotSupportedException(
0153                     "expat does not read external parameter entities")
0154         elif name == feature_namespace_prefixes:
0155             if state:
0156                 raise SAXNotSupportedException(
0157                     "expat does not report namespace prefixes")
0158         else:
0159             raise SAXNotRecognizedException(
0160                 "Feature '%s' not recognized" % name)
0161 
0162     def getProperty(self, name):
0163         if name == handler.property_lexical_handler:
0164             return self._lex_handler_prop
0165         elif name == property_interning_dict:
0166             return self._interning
0167         elif name == property_xml_string:
0168             if self._parser:
0169                 if hasattr(self._parser, "GetInputContext"):
0170                     return self._parser.GetInputContext()
0171                 else:
0172                     raise SAXNotRecognizedException(
0173                         "This version of expat does not support getting"
0174                         " the XML string")
0175             else:
0176                 raise SAXNotSupportedException(
0177                     "XML string cannot be returned when not parsing")
0178         raise SAXNotRecognizedException("Property '%s' not recognized" % name)
0179 
0180     def setProperty(self, name, value):
0181         if name == handler.property_lexical_handler:
0182             self._lex_handler_prop = value
0183             if self._parsing:
0184                 self._reset_lex_handler_prop()
0185         elif name == property_interning_dict:
0186             self._interning = value
0187         elif name == property_xml_string:
0188             raise SAXNotSupportedException("Property '%s' cannot be set" %
0189                                            name)
0190         else:
0191             raise SAXNotRecognizedException("Property '%s' not recognized" %
0192                                             name)
0193 
0194     # IncrementalParser methods
0195 
0196     def feed(self, data, isFinal = 0):
0197         if not self._parsing:
0198             self.reset()
0199             self._parsing = 1
0200             self._cont_handler.startDocument()
0201 
0202         try:
0203             # The isFinal parameter is internal to the expat reader.
0204             # If it is set to true, expat will check validity of the entire
0205             # document. When feeding chunks, they are not normally final -
0206             # except when invoked from close.
0207             self._parser.Parse(data, isFinal)
0208         except expat.error, e:
0209             exc = SAXParseException(expat.ErrorString(e.code), e, self)
0210             # FIXME: when to invoke error()?
0211             self._err_handler.fatalError(exc)
0212 
0213     def close(self):
0214         if self._entity_stack:
0215             # If we are completing an external entity, do nothing here
0216             return
0217         self.feed("", isFinal = 1)
0218         self._cont_handler.endDocument()
0219         self._parsing = 0
0220         # break cycle created by expat handlers pointing to our methods
0221         self._parser = None
0222 
0223     def _reset_cont_handler(self):
0224         self._parser.ProcessingInstructionHandler = \
0225                                     self._cont_handler.processingInstruction
0226         self._parser.CharacterDataHandler = self._cont_handler.characters
0227 
0228     def _reset_lex_handler_prop(self):
0229         lex = self._lex_handler_prop
0230         parser = self._parser
0231         if lex is None:
0232             parser.CommentHandler = None
0233             parser.StartCdataSectionHandler = None
0234             parser.EndCdataSectionHandler = None
0235             parser.StartDoctypeDeclHandler = None
0236             parser.EndDoctypeDeclHandler = None
0237         else:
0238             parser.CommentHandler = lex.comment
0239             parser.StartCdataSectionHandler = lex.startCDATA
0240             parser.EndCdataSectionHandler = lex.endCDATA
0241             parser.StartDoctypeDeclHandler = self.start_doctype_decl
0242             parser.EndDoctypeDeclHandler = lex.endDTD
0243 
0244     def reset(self):
0245         if self._namespaces:
0246             self._parser = expat.ParserCreate(None, " ",
0247                                               intern=self._interning)
0248             self._parser.namespace_prefixes = 1
0249             self._parser.StartElementHandler = self.start_element_ns
0250             self._parser.EndElementHandler = self.end_element_ns
0251         else:
0252             self._parser = expat.ParserCreate(intern = self._interning)
0253             self._parser.StartElementHandler = self.start_element
0254             self._parser.EndElementHandler = self.end_element
0255 
0256         self._reset_cont_handler()
0257         self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
0258         self._parser.NotationDeclHandler = self.notation_decl
0259         self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
0260         self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
0261 
0262         self._decl_handler_prop = None
0263         if self._lex_handler_prop:
0264             self._reset_lex_handler_prop()
0265 #         self._parser.DefaultHandler =
0266 #         self._parser.DefaultHandlerExpand =
0267 #         self._parser.NotStandaloneHandler =
0268         self._parser.ExternalEntityRefHandler = self.external_entity_ref
0269         try:
0270             self._parser.SkippedEntityHandler = self.skipped_entity_handler
0271         except AttributeError:
0272             # This pyexpat does not support SkippedEntity
0273             pass
0274         self._parser.SetParamEntityParsing(
0275             expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
0276 
0277         self._parsing = 0
0278         self._entity_stack = []
0279 
0280     # Locator methods
0281 
0282     def getColumnNumber(self):
0283         if self._parser is None:
0284             return None
0285         return self._parser.ErrorColumnNumber
0286 
0287     def getLineNumber(self):
0288         if self._parser is None:
0289             return 1
0290         return self._parser.ErrorLineNumber
0291 
0292     def getPublicId(self):
0293         return self._source.getPublicId()
0294 
0295     def getSystemId(self):
0296         return self._source.getSystemId()
0297 
0298     # event handlers
0299     def start_element(self, name, attrs):
0300         self._cont_handler.startElement(name, AttributesImpl(attrs))
0301 
0302     def end_element(self, name):
0303         self._cont_handler.endElement(name)
0304 
0305     def start_element_ns(self, name, attrs):
0306         pair = name.split()
0307         if len(pair) == 1:
0308             # no namespace
0309             pair = (None, name)
0310         elif len(pair) == 3:
0311             pair = pair[0], pair[1]
0312         else:
0313             # default namespace
0314             pair = tuple(pair)
0315 
0316         newattrs = {}
0317         qnames = {}
0318         for (aname, value) in attrs.items():
0319             parts = aname.split()
0320             length = len(parts)
0321             if length == 1:
0322                 # no namespace
0323                 qname = aname
0324                 apair = (None, aname)
0325             elif length == 3:
0326                 qname = "%s:%s" % (parts[2], parts[1])
0327                 apair = parts[0], parts[1]
0328             else:
0329                 # default namespace
0330                 qname = parts[1]
0331                 apair = tuple(parts)
0332 
0333             newattrs[apair] = value
0334             qnames[apair] = qname
0335 
0336         self._cont_handler.startElementNS(pair, None,
0337                                           AttributesNSImpl(newattrs, qnames))
0338 
0339     def end_element_ns(self, name):
0340         pair = name.split()
0341         if len(pair) == 1:
0342             pair = (None, name)
0343         elif len(pair) == 3:
0344             pair = pair[0], pair[1]
0345         else:
0346             pair = tuple(pair)
0347 
0348         self._cont_handler.endElementNS(pair, None)
0349 
0350     # this is not used (call directly to ContentHandler)
0351     def processing_instruction(self, target, data):
0352         self._cont_handler.processingInstruction(target, data)
0353 
0354     # this is not used (call directly to ContentHandler)
0355     def character_data(self, data):
0356         self._cont_handler.characters(data)
0357 
0358     def start_namespace_decl(self, prefix, uri):
0359         self._cont_handler.startPrefixMapping(prefix, uri)
0360 
0361     def end_namespace_decl(self, prefix):
0362         self._cont_handler.endPrefixMapping(prefix)
0363 
0364     def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
0365         self._lex_handler_prop.startDTD(name, pubid, sysid)
0366 
0367     def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
0368         self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
0369 
0370     def notation_decl(self, name, base, sysid, pubid):
0371         self._dtd_handler.notationDecl(name, pubid, sysid)
0372 
0373     def external_entity_ref(self, context, base, sysid, pubid):
0374         if not self._external_ges:
0375             return 1
0376 
0377         source = self._ent_handler.resolveEntity(pubid, sysid)
0378         source = saxutils.prepare_input_source(source,
0379                                                self._source.getSystemId() or
0380                                                "")
0381 
0382         self._entity_stack.append((self._parser, self._source))
0383         self._parser = self._parser.ExternalEntityParserCreate(context)
0384         self._source = source
0385 
0386         try:
0387             xmlreader.IncrementalParser.parse(self, source)
0388         except:
0389             return 0  # FIXME: save error info here?
0390 
0391         (self._parser, self._source) = self._entity_stack[-1]
0392         del self._entity_stack[-1]
0393         return 1
0394 
0395     def skipped_entity_handler(self, name, is_pe):
0396         if is_pe:
0397             # The SAX spec requires to report skipped PEs with a '%'
0398             name = '%'+name
0399         self._cont_handler.skippedEntity(name)
0400 
0401 # ---
0402 
0403 def create_parser(*args, **kwargs):
0404     return ExpatParser(*args, **kwargs)
0405 
0406 # ---
0407 
0408 if __name__ == "__main__":
0409     import xml.sax
0410     p = create_parser()
0411     p.setContentHandler(xml.sax.XMLGenerator())
0412     p.setErrorHandler(xml.sax.ErrorHandler())
0413     p.parse("../../../hamlet.xml")
0414 

Generated by PyXR 0.9.4
SourceForge.net Logo