PyXR

c:\python24\lib \ xml \ sax \ xmlreader.py



0001 """An XML Reader is the SAX 2 name for an XML parser. XML Parsers
0002 should be based on this code. """
0003 
0004 import handler
0005 
0006 from _exceptions import SAXNotSupportedException, SAXNotRecognizedException
0007 
0008 
0009 # ===== XMLREADER =====
0010 
0011 class XMLReader:
0012     """Interface for reading an XML document using callbacks.
0013 
0014     XMLReader is the interface that an XML parser's SAX2 driver must
0015     implement. This interface allows an application to set and query
0016     features and properties in the parser, to register event handlers
0017     for document processing, and to initiate a document parse.
0018 
0019     All SAX interfaces are assumed to be synchronous: the parse
0020     methods must not return until parsing is complete, and readers
0021     must wait for an event-handler callback to return before reporting
0022     the next event."""
0023 
0024     def __init__(self):
0025         self._cont_handler = handler.ContentHandler()
0026         self._dtd_handler = handler.DTDHandler()
0027         self._ent_handler = handler.EntityResolver()
0028         self._err_handler = handler.ErrorHandler()
0029 
0030     def parse(self, source):
0031         "Parse an XML document from a system identifier or an InputSource."
0032         raise NotImplementedError("This method must be implemented!")
0033 
0034     def getContentHandler(self):
0035         "Returns the current ContentHandler."
0036         return self._cont_handler
0037 
0038     def setContentHandler(self, handler):
0039         "Registers a new object to receive document content events."
0040         self._cont_handler = handler
0041 
0042     def getDTDHandler(self):
0043         "Returns the current DTD handler."
0044         return self._dtd_handler
0045 
0046     def setDTDHandler(self, handler):
0047         "Register an object to receive basic DTD-related events."
0048         self._dtd_handler = handler
0049 
0050     def getEntityResolver(self):
0051         "Returns the current EntityResolver."
0052         return self._ent_handler
0053 
0054     def setEntityResolver(self, resolver):
0055         "Register an object to resolve external entities."
0056         self._ent_handler = resolver
0057 
0058     def getErrorHandler(self):
0059         "Returns the current ErrorHandler."
0060         return self._err_handler
0061 
0062     def setErrorHandler(self, handler):
0063         "Register an object to receive error-message events."
0064         self._err_handler = handler
0065 
0066     def setLocale(self, locale):
0067         """Allow an application to set the locale for errors and warnings.
0068 
0069         SAX parsers are not required to provide localization for errors
0070         and warnings; if they cannot support the requested locale,
0071         however, they must throw a SAX exception. Applications may
0072         request a locale change in the middle of a parse."""
0073         raise SAXNotSupportedException("Locale support not implemented")
0074 
0075     def getFeature(self, name):
0076         "Looks up and returns the state of a SAX2 feature."
0077         raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
0078 
0079     def setFeature(self, name, state):
0080         "Sets the state of a SAX2 feature."
0081         raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
0082 
0083     def getProperty(self, name):
0084         "Looks up and returns the value of a SAX2 property."
0085         raise SAXNotRecognizedException("Property '%s' not recognized" % name)
0086 
0087     def setProperty(self, name, value):
0088         "Sets the value of a SAX2 property."
0089         raise SAXNotRecognizedException("Property '%s' not recognized" % name)
0090 
0091 class IncrementalParser(XMLReader):
0092     """This interface adds three extra methods to the XMLReader
0093     interface that allow XML parsers to support incremental
0094     parsing. Support for this interface is optional, since not all
0095     underlying XML parsers support this functionality.
0096 
0097     When the parser is instantiated it is ready to begin accepting
0098     data from the feed method immediately. After parsing has been
0099     finished with a call to close the reset method must be called to
0100     make the parser ready to accept new data, either from feed or
0101     using the parse method.
0102 
0103     Note that these methods must _not_ be called during parsing, that
0104     is, after parse has been called and before it returns.
0105 
0106     By default, the class also implements the parse method of the XMLReader
0107     interface using the feed, close and reset methods of the
0108     IncrementalParser interface as a convenience to SAX 2.0 driver
0109     writers."""
0110 
0111     def __init__(self, bufsize=2**16):
0112         self._bufsize = bufsize
0113         XMLReader.__init__(self)
0114 
0115     def parse(self, source):
0116         import saxutils
0117         source = saxutils.prepare_input_source(source)
0118 
0119         self.prepareParser(source)
0120         file = source.getByteStream()
0121         buffer = file.read(self._bufsize)
0122         while buffer != "":
0123             self.feed(buffer)
0124             buffer = file.read(self._bufsize)
0125         self.close()
0126 
0127     def feed(self, data):
0128         """This method gives the raw XML data in the data parameter to
0129         the parser and makes it parse the data, emitting the
0130         corresponding events. It is allowed for XML constructs to be
0131         split across several calls to feed.
0132 
0133         feed may raise SAXException."""
0134         raise NotImplementedError("This method must be implemented!")
0135 
0136     def prepareParser(self, source):
0137         """This method is called by the parse implementation to allow
0138         the SAX 2.0 driver to prepare itself for parsing."""
0139         raise NotImplementedError("prepareParser must be overridden!")
0140 
0141     def close(self):
0142         """This method is called when the entire XML document has been
0143         passed to the parser through the feed method, to notify the
0144         parser that there are no more data. This allows the parser to
0145         do the final checks on the document and empty the internal
0146         data buffer.
0147 
0148         The parser will not be ready to parse another document until
0149         the reset method has been called.
0150 
0151         close may raise SAXException."""
0152         raise NotImplementedError("This method must be implemented!")
0153 
0154     def reset(self):
0155         """This method is called after close has been called to reset
0156         the parser so that it is ready to parse new documents. The
0157         results of calling parse or feed after close without calling
0158         reset are undefined."""
0159         raise NotImplementedError("This method must be implemented!")
0160 
0161 # ===== LOCATOR =====
0162 
0163 class Locator:
0164     """Interface for associating a SAX event with a document
0165     location. A locator object will return valid results only during
0166     calls to DocumentHandler methods; at any other time, the
0167     results are unpredictable."""
0168 
0169     def getColumnNumber(self):
0170         "Return the column number where the current event ends."
0171         return -1
0172 
0173     def getLineNumber(self):
0174         "Return the line number where the current event ends."
0175         return -1
0176 
0177     def getPublicId(self):
0178         "Return the public identifier for the current event."
0179         return None
0180 
0181     def getSystemId(self):
0182         "Return the system identifier for the current event."
0183         return None
0184 
0185 # ===== INPUTSOURCE =====
0186 
0187 class InputSource:
0188     """Encapsulation of the information needed by the XMLReader to
0189     read entities.
0190 
0191     This class may include information about the public identifier,
0192     system identifier, byte stream (possibly with character encoding
0193     information) and/or the character stream of an entity.
0194 
0195     Applications will create objects of this class for use in the
0196     XMLReader.parse method and for returning from
0197     EntityResolver.resolveEntity.
0198 
0199     An InputSource belongs to the application, the XMLReader is not
0200     allowed to modify InputSource objects passed to it from the
0201     application, although it may make copies and modify those."""
0202 
0203     def __init__(self, system_id = None):
0204         self.__system_id = system_id
0205         self.__public_id = None
0206         self.__encoding  = None
0207         self.__bytefile  = None
0208         self.__charfile  = None
0209 
0210     def setPublicId(self, public_id):
0211         "Sets the public identifier of this InputSource."
0212         self.__public_id = public_id
0213 
0214     def getPublicId(self):
0215         "Returns the public identifier of this InputSource."
0216         return self.__public_id
0217 
0218     def setSystemId(self, system_id):
0219         "Sets the system identifier of this InputSource."
0220         self.__system_id = system_id
0221 
0222     def getSystemId(self):
0223         "Returns the system identifier of this InputSource."
0224         return self.__system_id
0225 
0226     def setEncoding(self, encoding):
0227         """Sets the character encoding of this InputSource.
0228 
0229         The encoding must be a string acceptable for an XML encoding
0230         declaration (see section 4.3.3 of the XML recommendation).
0231 
0232         The encoding attribute of the InputSource is ignored if the
0233         InputSource also contains a character stream."""
0234         self.__encoding = encoding
0235 
0236     def getEncoding(self):
0237         "Get the character encoding of this InputSource."
0238         return self.__encoding
0239 
0240     def setByteStream(self, bytefile):
0241         """Set the byte stream (a Python file-like object which does
0242         not perform byte-to-character conversion) for this input
0243         source.
0244 
0245         The SAX parser will ignore this if there is also a character
0246         stream specified, but it will use a byte stream in preference
0247         to opening a URI connection itself.
0248 
0249         If the application knows the character encoding of the byte
0250         stream, it should set it with the setEncoding method."""
0251         self.__bytefile = bytefile
0252 
0253     def getByteStream(self):
0254         """Get the byte stream for this input source.
0255 
0256         The getEncoding method will return the character encoding for
0257         this byte stream, or None if unknown."""
0258         return self.__bytefile
0259 
0260     def setCharacterStream(self, charfile):
0261         """Set the character stream for this input source. (The stream
0262         must be a Python 2.0 Unicode-wrapped file-like that performs
0263         conversion to Unicode strings.)
0264 
0265         If there is a character stream specified, the SAX parser will
0266         ignore any byte stream and will not attempt to open a URI
0267         connection to the system identifier."""
0268         self.__charfile = charfile
0269 
0270     def getCharacterStream(self):
0271         "Get the character stream for this input source."
0272         return self.__charfile
0273 
0274 # ===== ATTRIBUTESIMPL =====
0275 
0276 class AttributesImpl:
0277 
0278     def __init__(self, attrs):
0279         """Non-NS-aware implementation.
0280 
0281         attrs should be of the form {name : value}."""
0282         self._attrs = attrs
0283 
0284     def getLength(self):
0285         return len(self._attrs)
0286 
0287     def getType(self, name):
0288         return "CDATA"
0289 
0290     def getValue(self, name):
0291         return self._attrs[name]
0292 
0293     def getValueByQName(self, name):
0294         return self._attrs[name]
0295 
0296     def getNameByQName(self, name):
0297         if not self._attrs.has_key(name):
0298             raise KeyError, name
0299         return name
0300 
0301     def getQNameByName(self, name):
0302         if not self._attrs.has_key(name):
0303             raise KeyError, name
0304         return name
0305 
0306     def getNames(self):
0307         return self._attrs.keys()
0308 
0309     def getQNames(self):
0310         return self._attrs.keys()
0311 
0312     def __len__(self):
0313         return len(self._attrs)
0314 
0315     def __getitem__(self, name):
0316         return self._attrs[name]
0317 
0318     def keys(self):
0319         return self._attrs.keys()
0320 
0321     def has_key(self, name):
0322         return self._attrs.has_key(name)
0323 
0324     def __contains__(self, name):
0325         return self._attrs.has_key(name)
0326 
0327     def get(self, name, alternative=None):
0328         return self._attrs.get(name, alternative)
0329 
0330     def copy(self):
0331         return self.__class__(self._attrs)
0332 
0333     def items(self):
0334         return self._attrs.items()
0335 
0336     def values(self):
0337         return self._attrs.values()
0338 
0339 # ===== ATTRIBUTESNSIMPL =====
0340 
0341 class AttributesNSImpl(AttributesImpl):
0342 
0343     def __init__(self, attrs, qnames):
0344         """NS-aware implementation.
0345 
0346         attrs should be of the form {(ns_uri, lname): value, ...}.
0347         qnames of the form {(ns_uri, lname): qname, ...}."""
0348         self._attrs = attrs
0349         self._qnames = qnames
0350 
0351     def getValueByQName(self, name):
0352         for (nsname, qname) in self._qnames.items():
0353             if qname == name:
0354                 return self._attrs[nsname]
0355 
0356         raise KeyError, name
0357 
0358     def getNameByQName(self, name):
0359         for (nsname, qname) in self._qnames.items():
0360             if qname == name:
0361                 return nsname
0362 
0363         raise KeyError, name
0364 
0365     def getQNameByName(self, name):
0366         return self._qnames[name]
0367 
0368     def getQNames(self):
0369         return self._qnames.values()
0370 
0371     def copy(self):
0372         return self.__class__(self._attrs, self._qnames)
0373 
0374 
0375 def _test():
0376     XMLReader()
0377     IncrementalParser()
0378     Locator()
0379 
0380 if __name__ == "__main__":
0381     _test()
0382 

Generated by PyXR 0.9.4
SourceForge.net Logo