0001 """An XML Reader is the SAX 2 name for an XML parser. XML Parsers 0002 should be based on this code. """ 0003 0004 import handler 0005 0006 from _exceptions import SAXNotSupportedException, SAXNotRecognizedException 0007 0008 0009 # ===== XMLREADER ===== 0010 0011 class XMLReader: 0012 """Interface for reading an XML document using callbacks. 0013 0014 XMLReader is the interface that an XML parser's SAX2 driver must 0015 implement. This interface allows an application to set and query 0016 features and properties in the parser, to register event handlers 0017 for document processing, and to initiate a document parse. 0018 0019 All SAX interfaces are assumed to be synchronous: the parse 0020 methods must not return until parsing is complete, and readers 0021 must wait for an event-handler callback to return before reporting 0022 the next event.""" 0023 0024 def __init__(self): 0025 self._cont_handler = handler.ContentHandler() 0026 self._dtd_handler = handler.DTDHandler() 0027 self._ent_handler = handler.EntityResolver() 0028 self._err_handler = handler.ErrorHandler() 0029 0030 def parse(self, source): 0031 "Parse an XML document from a system identifier or an InputSource." 0032 raise NotImplementedError("This method must be implemented!") 0033 0034 def getContentHandler(self): 0035 "Returns the current ContentHandler." 0036 return self._cont_handler 0037 0038 def setContentHandler(self, handler): 0039 "Registers a new object to receive document content events." 0040 self._cont_handler = handler 0041 0042 def getDTDHandler(self): 0043 "Returns the current DTD handler." 0044 return self._dtd_handler 0045 0046 def setDTDHandler(self, handler): 0047 "Register an object to receive basic DTD-related events." 0048 self._dtd_handler = handler 0049 0050 def getEntityResolver(self): 0051 "Returns the current EntityResolver." 0052 return self._ent_handler 0053 0054 def setEntityResolver(self, resolver): 0055 "Register an object to resolve external entities." 0056 self._ent_handler = resolver 0057 0058 def getErrorHandler(self): 0059 "Returns the current ErrorHandler." 0060 return self._err_handler 0061 0062 def setErrorHandler(self, handler): 0063 "Register an object to receive error-message events." 0064 self._err_handler = handler 0065 0066 def setLocale(self, locale): 0067 """Allow an application to set the locale for errors and warnings. 0068 0069 SAX parsers are not required to provide localization for errors 0070 and warnings; if they cannot support the requested locale, 0071 however, they must throw a SAX exception. Applications may 0072 request a locale change in the middle of a parse.""" 0073 raise SAXNotSupportedException("Locale support not implemented") 0074 0075 def getFeature(self, name): 0076 "Looks up and returns the state of a SAX2 feature." 0077 raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 0078 0079 def setFeature(self, name, state): 0080 "Sets the state of a SAX2 feature." 0081 raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 0082 0083 def getProperty(self, name): 0084 "Looks up and returns the value of a SAX2 property." 0085 raise SAXNotRecognizedException("Property '%s' not recognized" % name) 0086 0087 def setProperty(self, name, value): 0088 "Sets the value of a SAX2 property." 0089 raise SAXNotRecognizedException("Property '%s' not recognized" % name) 0090 0091 class IncrementalParser(XMLReader): 0092 """This interface adds three extra methods to the XMLReader 0093 interface that allow XML parsers to support incremental 0094 parsing. Support for this interface is optional, since not all 0095 underlying XML parsers support this functionality. 0096 0097 When the parser is instantiated it is ready to begin accepting 0098 data from the feed method immediately. After parsing has been 0099 finished with a call to close the reset method must be called to 0100 make the parser ready to accept new data, either from feed or 0101 using the parse method. 0102 0103 Note that these methods must _not_ be called during parsing, that 0104 is, after parse has been called and before it returns. 0105 0106 By default, the class also implements the parse method of the XMLReader 0107 interface using the feed, close and reset methods of the 0108 IncrementalParser interface as a convenience to SAX 2.0 driver 0109 writers.""" 0110 0111 def __init__(self, bufsize=2**16): 0112 self._bufsize = bufsize 0113 XMLReader.__init__(self) 0114 0115 def parse(self, source): 0116 import saxutils 0117 source = saxutils.prepare_input_source(source) 0118 0119 self.prepareParser(source) 0120 file = source.getByteStream() 0121 buffer = file.read(self._bufsize) 0122 while buffer != "": 0123 self.feed(buffer) 0124 buffer = file.read(self._bufsize) 0125 self.close() 0126 0127 def feed(self, data): 0128 """This method gives the raw XML data in the data parameter to 0129 the parser and makes it parse the data, emitting the 0130 corresponding events. It is allowed for XML constructs to be 0131 split across several calls to feed. 0132 0133 feed may raise SAXException.""" 0134 raise NotImplementedError("This method must be implemented!") 0135 0136 def prepareParser(self, source): 0137 """This method is called by the parse implementation to allow 0138 the SAX 2.0 driver to prepare itself for parsing.""" 0139 raise NotImplementedError("prepareParser must be overridden!") 0140 0141 def close(self): 0142 """This method is called when the entire XML document has been 0143 passed to the parser through the feed method, to notify the 0144 parser that there are no more data. This allows the parser to 0145 do the final checks on the document and empty the internal 0146 data buffer. 0147 0148 The parser will not be ready to parse another document until 0149 the reset method has been called. 0150 0151 close may raise SAXException.""" 0152 raise NotImplementedError("This method must be implemented!") 0153 0154 def reset(self): 0155 """This method is called after close has been called to reset 0156 the parser so that it is ready to parse new documents. The 0157 results of calling parse or feed after close without calling 0158 reset are undefined.""" 0159 raise NotImplementedError("This method must be implemented!") 0160 0161 # ===== LOCATOR ===== 0162 0163 class Locator: 0164 """Interface for associating a SAX event with a document 0165 location. A locator object will return valid results only during 0166 calls to DocumentHandler methods; at any other time, the 0167 results are unpredictable.""" 0168 0169 def getColumnNumber(self): 0170 "Return the column number where the current event ends." 0171 return -1 0172 0173 def getLineNumber(self): 0174 "Return the line number where the current event ends." 0175 return -1 0176 0177 def getPublicId(self): 0178 "Return the public identifier for the current event." 0179 return None 0180 0181 def getSystemId(self): 0182 "Return the system identifier for the current event." 0183 return None 0184 0185 # ===== INPUTSOURCE ===== 0186 0187 class InputSource: 0188 """Encapsulation of the information needed by the XMLReader to 0189 read entities. 0190 0191 This class may include information about the public identifier, 0192 system identifier, byte stream (possibly with character encoding 0193 information) and/or the character stream of an entity. 0194 0195 Applications will create objects of this class for use in the 0196 XMLReader.parse method and for returning from 0197 EntityResolver.resolveEntity. 0198 0199 An InputSource belongs to the application, the XMLReader is not 0200 allowed to modify InputSource objects passed to it from the 0201 application, although it may make copies and modify those.""" 0202 0203 def __init__(self, system_id = None): 0204 self.__system_id = system_id 0205 self.__public_id = None 0206 self.__encoding = None 0207 self.__bytefile = None 0208 self.__charfile = None 0209 0210 def setPublicId(self, public_id): 0211 "Sets the public identifier of this InputSource." 0212 self.__public_id = public_id 0213 0214 def getPublicId(self): 0215 "Returns the public identifier of this InputSource." 0216 return self.__public_id 0217 0218 def setSystemId(self, system_id): 0219 "Sets the system identifier of this InputSource." 0220 self.__system_id = system_id 0221 0222 def getSystemId(self): 0223 "Returns the system identifier of this InputSource." 0224 return self.__system_id 0225 0226 def setEncoding(self, encoding): 0227 """Sets the character encoding of this InputSource. 0228 0229 The encoding must be a string acceptable for an XML encoding 0230 declaration (see section 4.3.3 of the XML recommendation). 0231 0232 The encoding attribute of the InputSource is ignored if the 0233 InputSource also contains a character stream.""" 0234 self.__encoding = encoding 0235 0236 def getEncoding(self): 0237 "Get the character encoding of this InputSource." 0238 return self.__encoding 0239 0240 def setByteStream(self, bytefile): 0241 """Set the byte stream (a Python file-like object which does 0242 not perform byte-to-character conversion) for this input 0243 source. 0244 0245 The SAX parser will ignore this if there is also a character 0246 stream specified, but it will use a byte stream in preference 0247 to opening a URI connection itself. 0248 0249 If the application knows the character encoding of the byte 0250 stream, it should set it with the setEncoding method.""" 0251 self.__bytefile = bytefile 0252 0253 def getByteStream(self): 0254 """Get the byte stream for this input source. 0255 0256 The getEncoding method will return the character encoding for 0257 this byte stream, or None if unknown.""" 0258 return self.__bytefile 0259 0260 def setCharacterStream(self, charfile): 0261 """Set the character stream for this input source. (The stream 0262 must be a Python 2.0 Unicode-wrapped file-like that performs 0263 conversion to Unicode strings.) 0264 0265 If there is a character stream specified, the SAX parser will 0266 ignore any byte stream and will not attempt to open a URI 0267 connection to the system identifier.""" 0268 self.__charfile = charfile 0269 0270 def getCharacterStream(self): 0271 "Get the character stream for this input source." 0272 return self.__charfile 0273 0274 # ===== ATTRIBUTESIMPL ===== 0275 0276 class AttributesImpl: 0277 0278 def __init__(self, attrs): 0279 """Non-NS-aware implementation. 0280 0281 attrs should be of the form {name : value}.""" 0282 self._attrs = attrs 0283 0284 def getLength(self): 0285 return len(self._attrs) 0286 0287 def getType(self, name): 0288 return "CDATA" 0289 0290 def getValue(self, name): 0291 return self._attrs[name] 0292 0293 def getValueByQName(self, name): 0294 return self._attrs[name] 0295 0296 def getNameByQName(self, name): 0297 if not self._attrs.has_key(name): 0298 raise KeyError, name 0299 return name 0300 0301 def getQNameByName(self, name): 0302 if not self._attrs.has_key(name): 0303 raise KeyError, name 0304 return name 0305 0306 def getNames(self): 0307 return self._attrs.keys() 0308 0309 def getQNames(self): 0310 return self._attrs.keys() 0311 0312 def __len__(self): 0313 return len(self._attrs) 0314 0315 def __getitem__(self, name): 0316 return self._attrs[name] 0317 0318 def keys(self): 0319 return self._attrs.keys() 0320 0321 def has_key(self, name): 0322 return self._attrs.has_key(name) 0323 0324 def __contains__(self, name): 0325 return self._attrs.has_key(name) 0326 0327 def get(self, name, alternative=None): 0328 return self._attrs.get(name, alternative) 0329 0330 def copy(self): 0331 return self.__class__(self._attrs) 0332 0333 def items(self): 0334 return self._attrs.items() 0335 0336 def values(self): 0337 return self._attrs.values() 0338 0339 # ===== ATTRIBUTESNSIMPL ===== 0340 0341 class AttributesNSImpl(AttributesImpl): 0342 0343 def __init__(self, attrs, qnames): 0344 """NS-aware implementation. 0345 0346 attrs should be of the form {(ns_uri, lname): value, ...}. 0347 qnames of the form {(ns_uri, lname): qname, ...}.""" 0348 self._attrs = attrs 0349 self._qnames = qnames 0350 0351 def getValueByQName(self, name): 0352 for (nsname, qname) in self._qnames.items(): 0353 if qname == name: 0354 return self._attrs[nsname] 0355 0356 raise KeyError, name 0357 0358 def getNameByQName(self, name): 0359 for (nsname, qname) in self._qnames.items(): 0360 if qname == name: 0361 return nsname 0362 0363 raise KeyError, name 0364 0365 def getQNameByName(self, name): 0366 return self._qnames[name] 0367 0368 def getQNames(self): 0369 return self._qnames.values() 0370 0371 def copy(self): 0372 return self.__class__(self._attrs, self._qnames) 0373 0374 0375 def _test(): 0376 XMLReader() 0377 IncrementalParser() 0378 Locator() 0379 0380 if __name__ == "__main__": 0381 _test() 0382
Generated by PyXR 0.9.4