0001 """ 0002 SAX driver for the pyexpat C module. This driver works with 0003 pyexpat.__version__ == '2.22'. 0004 """ 0005 0006 version = "0.20" 0007 0008 from xml.sax._exceptions import * 0009 from xml.sax.handler import feature_validation, feature_namespaces 0010 from xml.sax.handler import feature_namespace_prefixes 0011 from xml.sax.handler import feature_external_ges, feature_external_pes 0012 from xml.sax.handler import feature_string_interning 0013 from xml.sax.handler import property_xml_string, property_interning_dict 0014 0015 # xml.parsers.expat does not raise ImportError in Jython 0016 import sys 0017 if sys.platform[:4] == "java": 0018 raise SAXReaderNotAvailable("expat not available in Java", None) 0019 del sys 0020 0021 try: 0022 from xml.parsers import expat 0023 except ImportError: 0024 raise SAXReaderNotAvailable("expat not supported", None) 0025 else: 0026 if not hasattr(expat, "ParserCreate"): 0027 raise SAXReaderNotAvailable("expat not supported", None) 0028 from xml.sax import xmlreader, saxutils, handler 0029 0030 AttributesImpl = xmlreader.AttributesImpl 0031 AttributesNSImpl = xmlreader.AttributesNSImpl 0032 0033 # If we're using a sufficiently recent version of Python, we can use 0034 # weak references to avoid cycles between the parser and content 0035 # handler, otherwise we'll just have to pretend. 0036 try: 0037 import _weakref 0038 except ImportError: 0039 def _mkproxy(o): 0040 return o 0041 else: 0042 import weakref 0043 _mkproxy = weakref.proxy 0044 del weakref, _weakref 0045 0046 # --- ExpatLocator 0047 0048 class ExpatLocator(xmlreader.Locator): 0049 """Locator for use with the ExpatParser class. 0050 0051 This uses a weak reference to the parser object to avoid creating 0052 a circular reference between the parser and the content handler. 0053 """ 0054 def __init__(self, parser): 0055 self._ref = _mkproxy(parser) 0056 0057 def getColumnNumber(self): 0058 parser = self._ref 0059 if parser._parser is None: 0060 return None 0061 return parser._parser.ErrorColumnNumber 0062 0063 def getLineNumber(self): 0064 parser = self._ref 0065 if parser._parser is None: 0066 return 1 0067 return parser._parser.ErrorLineNumber 0068 0069 def getPublicId(self): 0070 parser = self._ref 0071 if parser is None: 0072 return None 0073 return parser._source.getPublicId() 0074 0075 def getSystemId(self): 0076 parser = self._ref 0077 if parser is None: 0078 return None 0079 return parser._source.getSystemId() 0080 0081 0082 # --- ExpatParser 0083 0084 class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): 0085 """SAX driver for the pyexpat C module.""" 0086 0087 def __init__(self, namespaceHandling=0, bufsize=2**16-20): 0088 xmlreader.IncrementalParser.__init__(self, bufsize) 0089 self._source = xmlreader.InputSource() 0090 self._parser = None 0091 self._namespaces = namespaceHandling 0092 self._lex_handler_prop = None 0093 self._parsing = 0 0094 self._entity_stack = [] 0095 self._external_ges = 1 0096 self._interning = None 0097 0098 # XMLReader methods 0099 0100 def parse(self, source): 0101 "Parse an XML document from a URL or an InputSource." 0102 source = saxutils.prepare_input_source(source) 0103 0104 self._source = source 0105 self.reset() 0106 self._cont_handler.setDocumentLocator(ExpatLocator(self)) 0107 xmlreader.IncrementalParser.parse(self, source) 0108 0109 def prepareParser(self, source): 0110 if source.getSystemId() != None: 0111 self._parser.SetBase(source.getSystemId()) 0112 0113 # Redefined setContentHandler to allow changing handlers during parsing 0114 0115 def setContentHandler(self, handler): 0116 xmlreader.IncrementalParser.setContentHandler(self, handler) 0117 if self._parsing: 0118 self._reset_cont_handler() 0119 0120 def getFeature(self, name): 0121 if name == feature_namespaces: 0122 return self._namespaces 0123 elif name == feature_string_interning: 0124 return self._interning is not None 0125 elif name in (feature_validation, feature_external_pes, 0126 feature_namespace_prefixes): 0127 return 0 0128 elif name == feature_external_ges: 0129 return self._external_ges 0130 raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 0131 0132 def setFeature(self, name, state): 0133 if self._parsing: 0134 raise SAXNotSupportedException("Cannot set features while parsing") 0135 0136 if name == feature_namespaces: 0137 self._namespaces = state 0138 elif name == feature_external_ges: 0139 self._external_ges = state 0140 elif name == feature_string_interning: 0141 if state: 0142 if self._interning is None: 0143 self._interning = {} 0144 else: 0145 self._interning = None 0146 elif name == feature_validation: 0147 if state: 0148 raise SAXNotSupportedException( 0149 "expat does not support validation") 0150 elif name == feature_external_pes: 0151 if state: 0152 raise SAXNotSupportedException( 0153 "expat does not read external parameter entities") 0154 elif name == feature_namespace_prefixes: 0155 if state: 0156 raise SAXNotSupportedException( 0157 "expat does not report namespace prefixes") 0158 else: 0159 raise SAXNotRecognizedException( 0160 "Feature '%s' not recognized" % name) 0161 0162 def getProperty(self, name): 0163 if name == handler.property_lexical_handler: 0164 return self._lex_handler_prop 0165 elif name == property_interning_dict: 0166 return self._interning 0167 elif name == property_xml_string: 0168 if self._parser: 0169 if hasattr(self._parser, "GetInputContext"): 0170 return self._parser.GetInputContext() 0171 else: 0172 raise SAXNotRecognizedException( 0173 "This version of expat does not support getting" 0174 " the XML string") 0175 else: 0176 raise SAXNotSupportedException( 0177 "XML string cannot be returned when not parsing") 0178 raise SAXNotRecognizedException("Property '%s' not recognized" % name) 0179 0180 def setProperty(self, name, value): 0181 if name == handler.property_lexical_handler: 0182 self._lex_handler_prop = value 0183 if self._parsing: 0184 self._reset_lex_handler_prop() 0185 elif name == property_interning_dict: 0186 self._interning = value 0187 elif name == property_xml_string: 0188 raise SAXNotSupportedException("Property '%s' cannot be set" % 0189 name) 0190 else: 0191 raise SAXNotRecognizedException("Property '%s' not recognized" % 0192 name) 0193 0194 # IncrementalParser methods 0195 0196 def feed(self, data, isFinal = 0): 0197 if not self._parsing: 0198 self.reset() 0199 self._parsing = 1 0200 self._cont_handler.startDocument() 0201 0202 try: 0203 # The isFinal parameter is internal to the expat reader. 0204 # If it is set to true, expat will check validity of the entire 0205 # document. When feeding chunks, they are not normally final - 0206 # except when invoked from close. 0207 self._parser.Parse(data, isFinal) 0208 except expat.error, e: 0209 exc = SAXParseException(expat.ErrorString(e.code), e, self) 0210 # FIXME: when to invoke error()? 0211 self._err_handler.fatalError(exc) 0212 0213 def close(self): 0214 if self._entity_stack: 0215 # If we are completing an external entity, do nothing here 0216 return 0217 self.feed("", isFinal = 1) 0218 self._cont_handler.endDocument() 0219 self._parsing = 0 0220 # break cycle created by expat handlers pointing to our methods 0221 self._parser = None 0222 0223 def _reset_cont_handler(self): 0224 self._parser.ProcessingInstructionHandler = \ 0225 self._cont_handler.processingInstruction 0226 self._parser.CharacterDataHandler = self._cont_handler.characters 0227 0228 def _reset_lex_handler_prop(self): 0229 lex = self._lex_handler_prop 0230 parser = self._parser 0231 if lex is None: 0232 parser.CommentHandler = None 0233 parser.StartCdataSectionHandler = None 0234 parser.EndCdataSectionHandler = None 0235 parser.StartDoctypeDeclHandler = None 0236 parser.EndDoctypeDeclHandler = None 0237 else: 0238 parser.CommentHandler = lex.comment 0239 parser.StartCdataSectionHandler = lex.startCDATA 0240 parser.EndCdataSectionHandler = lex.endCDATA 0241 parser.StartDoctypeDeclHandler = self.start_doctype_decl 0242 parser.EndDoctypeDeclHandler = lex.endDTD 0243 0244 def reset(self): 0245 if self._namespaces: 0246 self._parser = expat.ParserCreate(None, " ", 0247 intern=self._interning) 0248 self._parser.namespace_prefixes = 1 0249 self._parser.StartElementHandler = self.start_element_ns 0250 self._parser.EndElementHandler = self.end_element_ns 0251 else: 0252 self._parser = expat.ParserCreate(intern = self._interning) 0253 self._parser.StartElementHandler = self.start_element 0254 self._parser.EndElementHandler = self.end_element 0255 0256 self._reset_cont_handler() 0257 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl 0258 self._parser.NotationDeclHandler = self.notation_decl 0259 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl 0260 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl 0261 0262 self._decl_handler_prop = None 0263 if self._lex_handler_prop: 0264 self._reset_lex_handler_prop() 0265 # self._parser.DefaultHandler = 0266 # self._parser.DefaultHandlerExpand = 0267 # self._parser.NotStandaloneHandler = 0268 self._parser.ExternalEntityRefHandler = self.external_entity_ref 0269 try: 0270 self._parser.SkippedEntityHandler = self.skipped_entity_handler 0271 except AttributeError: 0272 # This pyexpat does not support SkippedEntity 0273 pass 0274 self._parser.SetParamEntityParsing( 0275 expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) 0276 0277 self._parsing = 0 0278 self._entity_stack = [] 0279 0280 # Locator methods 0281 0282 def getColumnNumber(self): 0283 if self._parser is None: 0284 return None 0285 return self._parser.ErrorColumnNumber 0286 0287 def getLineNumber(self): 0288 if self._parser is None: 0289 return 1 0290 return self._parser.ErrorLineNumber 0291 0292 def getPublicId(self): 0293 return self._source.getPublicId() 0294 0295 def getSystemId(self): 0296 return self._source.getSystemId() 0297 0298 # event handlers 0299 def start_element(self, name, attrs): 0300 self._cont_handler.startElement(name, AttributesImpl(attrs)) 0301 0302 def end_element(self, name): 0303 self._cont_handler.endElement(name) 0304 0305 def start_element_ns(self, name, attrs): 0306 pair = name.split() 0307 if len(pair) == 1: 0308 # no namespace 0309 pair = (None, name) 0310 elif len(pair) == 3: 0311 pair = pair[0], pair[1] 0312 else: 0313 # default namespace 0314 pair = tuple(pair) 0315 0316 newattrs = {} 0317 qnames = {} 0318 for (aname, value) in attrs.items(): 0319 parts = aname.split() 0320 length = len(parts) 0321 if length == 1: 0322 # no namespace 0323 qname = aname 0324 apair = (None, aname) 0325 elif length == 3: 0326 qname = "%s:%s" % (parts[2], parts[1]) 0327 apair = parts[0], parts[1] 0328 else: 0329 # default namespace 0330 qname = parts[1] 0331 apair = tuple(parts) 0332 0333 newattrs[apair] = value 0334 qnames[apair] = qname 0335 0336 self._cont_handler.startElementNS(pair, None, 0337 AttributesNSImpl(newattrs, qnames)) 0338 0339 def end_element_ns(self, name): 0340 pair = name.split() 0341 if len(pair) == 1: 0342 pair = (None, name) 0343 elif len(pair) == 3: 0344 pair = pair[0], pair[1] 0345 else: 0346 pair = tuple(pair) 0347 0348 self._cont_handler.endElementNS(pair, None) 0349 0350 # this is not used (call directly to ContentHandler) 0351 def processing_instruction(self, target, data): 0352 self._cont_handler.processingInstruction(target, data) 0353 0354 # this is not used (call directly to ContentHandler) 0355 def character_data(self, data): 0356 self._cont_handler.characters(data) 0357 0358 def start_namespace_decl(self, prefix, uri): 0359 self._cont_handler.startPrefixMapping(prefix, uri) 0360 0361 def end_namespace_decl(self, prefix): 0362 self._cont_handler.endPrefixMapping(prefix) 0363 0364 def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): 0365 self._lex_handler_prop.startDTD(name, pubid, sysid) 0366 0367 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): 0368 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) 0369 0370 def notation_decl(self, name, base, sysid, pubid): 0371 self._dtd_handler.notationDecl(name, pubid, sysid) 0372 0373 def external_entity_ref(self, context, base, sysid, pubid): 0374 if not self._external_ges: 0375 return 1 0376 0377 source = self._ent_handler.resolveEntity(pubid, sysid) 0378 source = saxutils.prepare_input_source(source, 0379 self._source.getSystemId() or 0380 "") 0381 0382 self._entity_stack.append((self._parser, self._source)) 0383 self._parser = self._parser.ExternalEntityParserCreate(context) 0384 self._source = source 0385 0386 try: 0387 xmlreader.IncrementalParser.parse(self, source) 0388 except: 0389 return 0 # FIXME: save error info here? 0390 0391 (self._parser, self._source) = self._entity_stack[-1] 0392 del self._entity_stack[-1] 0393 return 1 0394 0395 def skipped_entity_handler(self, name, is_pe): 0396 if is_pe: 0397 # The SAX spec requires to report skipped PEs with a '%' 0398 name = '%'+name 0399 self._cont_handler.skippedEntity(name) 0400 0401 # --- 0402 0403 def create_parser(*args, **kwargs): 0404 return ExpatParser(*args, **kwargs) 0405 0406 # --- 0407 0408 if __name__ == "__main__": 0409 import xml.sax 0410 p = create_parser() 0411 p.setContentHandler(xml.sax.XMLGenerator()) 0412 p.setErrorHandler(xml.sax.ErrorHandler()) 0413 p.parse("../../../hamlet.xml") 0414
Generated by PyXR 0.9.4