0001 """\ 0002 A library of useful helper classes to the SAX classes, for the 0003 convenience of application and driver writers. 0004 """ 0005 0006 import os, urlparse, urllib, types 0007 import handler 0008 import xmlreader 0009 0010 try: 0011 _StringTypes = [types.StringType, types.UnicodeType] 0012 except AttributeError: 0013 _StringTypes = [types.StringType] 0014 0015 # See whether the xmlcharrefreplace error handler is 0016 # supported 0017 try: 0018 from codecs import xmlcharrefreplace_errors 0019 _error_handling = "xmlcharrefreplace" 0020 del xmlcharrefreplace_errors 0021 except ImportError: 0022 _error_handling = "strict" 0023 0024 def __dict_replace(s, d): 0025 """Replace substrings of a string using a dictionary.""" 0026 for key, value in d.items(): 0027 s = s.replace(key, value) 0028 return s 0029 0030 def escape(data, entities={}): 0031 """Escape &, <, and > in a string of data. 0032 0033 You can escape other strings of data by passing a dictionary as 0034 the optional entities parameter. The keys and values must all be 0035 strings; each key will be replaced with its corresponding value. 0036 """ 0037 0038 # must do ampersand first 0039 data = data.replace("&", "&") 0040 data = data.replace(">", ">") 0041 data = data.replace("<", "<") 0042 if entities: 0043 data = __dict_replace(data, entities) 0044 return data 0045 0046 def unescape(data, entities={}): 0047 """Unescape &, <, and > in a string of data. 0048 0049 You can unescape other strings of data by passing a dictionary as 0050 the optional entities parameter. The keys and values must all be 0051 strings; each key will be replaced with its corresponding value. 0052 """ 0053 data = data.replace("<", "<") 0054 data = data.replace(">", ">") 0055 if entities: 0056 data = __dict_replace(data, entities) 0057 # must do ampersand last 0058 return data.replace("&", "&") 0059 0060 def quoteattr(data, entities={}): 0061 """Escape and quote an attribute value. 0062 0063 Escape &, <, and > in a string of data, then quote it for use as 0064 an attribute value. The \" character will be escaped as well, if 0065 necessary. 0066 0067 You can escape other strings of data by passing a dictionary as 0068 the optional entities parameter. The keys and values must all be 0069 strings; each key will be replaced with its corresponding value. 0070 """ 0071 data = escape(data, entities) 0072 if '"' in data: 0073 if "'" in data: 0074 data = '"%s"' % data.replace('"', """) 0075 else: 0076 data = "'%s'" % data 0077 else: 0078 data = '"%s"' % data 0079 return data 0080 0081 0082 class XMLGenerator(handler.ContentHandler): 0083 0084 def __init__(self, out=None, encoding="iso-8859-1"): 0085 if out is None: 0086 import sys 0087 out = sys.stdout 0088 handler.ContentHandler.__init__(self) 0089 self._out = out 0090 self._ns_contexts = [{}] # contains uri -> prefix dicts 0091 self._current_context = self._ns_contexts[-1] 0092 self._undeclared_ns_maps = [] 0093 self._encoding = encoding 0094 0095 def _write(self, text): 0096 if isinstance(text, str): 0097 self._out.write(text) 0098 else: 0099 self._out.write(text.encode(self._encoding, _error_handling)) 0100 0101 # ContentHandler methods 0102 0103 def startDocument(self): 0104 self._write('<?xml version="1.0" encoding="%s"?>\n' % 0105 self._encoding) 0106 0107 def startPrefixMapping(self, prefix, uri): 0108 self._ns_contexts.append(self._current_context.copy()) 0109 self._current_context[uri] = prefix 0110 self._undeclared_ns_maps.append((prefix, uri)) 0111 0112 def endPrefixMapping(self, prefix): 0113 self._current_context = self._ns_contexts[-1] 0114 del self._ns_contexts[-1] 0115 0116 def startElement(self, name, attrs): 0117 self._write('<' + name) 0118 for (name, value) in attrs.items(): 0119 self._write(' %s=%s' % (name, quoteattr(value))) 0120 self._write('>') 0121 0122 def endElement(self, name): 0123 self._write('</%s>' % name) 0124 0125 def startElementNS(self, name, qname, attrs): 0126 if name[0] is None: 0127 # if the name was not namespace-scoped, use the unqualified part 0128 name = name[1] 0129 else: 0130 # else try to restore the original prefix from the namespace 0131 name = self._current_context[name[0]] + ":" + name[1] 0132 self._write('<' + name) 0133 0134 for pair in self._undeclared_ns_maps: 0135 self._write(' xmlns:%s="%s"' % pair) 0136 self._undeclared_ns_maps = [] 0137 0138 for (name, value) in attrs.items(): 0139 name = self._current_context[name[0]] + ":" + name[1] 0140 self._write(' %s=%s' % (name, quoteattr(value))) 0141 self._write('>') 0142 0143 def endElementNS(self, name, qname): 0144 if name[0] is None: 0145 name = name[1] 0146 else: 0147 name = self._current_context[name[0]] + ":" + name[1] 0148 self._write('</%s>' % name) 0149 0150 def characters(self, content): 0151 self._write(escape(content)) 0152 0153 def ignorableWhitespace(self, content): 0154 self._write(content) 0155 0156 def processingInstruction(self, target, data): 0157 self._write('<?%s %s?>' % (target, data)) 0158 0159 0160 class XMLFilterBase(xmlreader.XMLReader): 0161 """This class is designed to sit between an XMLReader and the 0162 client application's event handlers. By default, it does nothing 0163 but pass requests up to the reader and events on to the handlers 0164 unmodified, but subclasses can override specific methods to modify 0165 the event stream or the configuration requests as they pass 0166 through.""" 0167 0168 def __init__(self, parent = None): 0169 xmlreader.XMLReader.__init__(self) 0170 self._parent = parent 0171 0172 # ErrorHandler methods 0173 0174 def error(self, exception): 0175 self._err_handler.error(exception) 0176 0177 def fatalError(self, exception): 0178 self._err_handler.fatalError(exception) 0179 0180 def warning(self, exception): 0181 self._err_handler.warning(exception) 0182 0183 # ContentHandler methods 0184 0185 def setDocumentLocator(self, locator): 0186 self._cont_handler.setDocumentLocator(locator) 0187 0188 def startDocument(self): 0189 self._cont_handler.startDocument() 0190 0191 def endDocument(self): 0192 self._cont_handler.endDocument() 0193 0194 def startPrefixMapping(self, prefix, uri): 0195 self._cont_handler.startPrefixMapping(prefix, uri) 0196 0197 def endPrefixMapping(self, prefix): 0198 self._cont_handler.endPrefixMapping(prefix) 0199 0200 def startElement(self, name, attrs): 0201 self._cont_handler.startElement(name, attrs) 0202 0203 def endElement(self, name): 0204 self._cont_handler.endElement(name) 0205 0206 def startElementNS(self, name, qname, attrs): 0207 self._cont_handler.startElementNS(name, qname, attrs) 0208 0209 def endElementNS(self, name, qname): 0210 self._cont_handler.endElementNS(name, qname) 0211 0212 def characters(self, content): 0213 self._cont_handler.characters(content) 0214 0215 def ignorableWhitespace(self, chars): 0216 self._cont_handler.ignorableWhitespace(chars) 0217 0218 def processingInstruction(self, target, data): 0219 self._cont_handler.processingInstruction(target, data) 0220 0221 def skippedEntity(self, name): 0222 self._cont_handler.skippedEntity(name) 0223 0224 # DTDHandler methods 0225 0226 def notationDecl(self, name, publicId, systemId): 0227 self._dtd_handler.notationDecl(name, publicId, systemId) 0228 0229 def unparsedEntityDecl(self, name, publicId, systemId, ndata): 0230 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) 0231 0232 # EntityResolver methods 0233 0234 def resolveEntity(self, publicId, systemId): 0235 self._ent_handler.resolveEntity(publicId, systemId) 0236 0237 # XMLReader methods 0238 0239 def parse(self, source): 0240 self._parent.setContentHandler(self) 0241 self._parent.setErrorHandler(self) 0242 self._parent.setEntityResolver(self) 0243 self._parent.setDTDHandler(self) 0244 self._parent.parse(source) 0245 0246 def setLocale(self, locale): 0247 self._parent.setLocale(locale) 0248 0249 def getFeature(self, name): 0250 return self._parent.getFeature(name) 0251 0252 def setFeature(self, name, state): 0253 self._parent.setFeature(name, state) 0254 0255 def getProperty(self, name): 0256 return self._parent.getProperty(name) 0257 0258 def setProperty(self, name, value): 0259 self._parent.setProperty(name, value) 0260 0261 # XMLFilter methods 0262 0263 def getParent(self): 0264 return self._parent 0265 0266 def setParent(self, parent): 0267 self._parent = parent 0268 0269 # --- Utility functions 0270 0271 def prepare_input_source(source, base = ""): 0272 """This function takes an InputSource and an optional base URL and 0273 returns a fully resolved InputSource object ready for reading.""" 0274 0275 if type(source) in _StringTypes: 0276 source = xmlreader.InputSource(source) 0277 elif hasattr(source, "read"): 0278 f = source 0279 source = xmlreader.InputSource() 0280 source.setByteStream(f) 0281 if hasattr(f, "name"): 0282 source.setSystemId(f.name) 0283 0284 if source.getByteStream() is None: 0285 sysid = source.getSystemId() 0286 basehead = os.path.dirname(os.path.normpath(base)) 0287 sysidfilename = os.path.join(basehead, sysid) 0288 if os.path.isfile(sysidfilename): 0289 source.setSystemId(sysidfilename) 0290 f = open(sysidfilename, "rb") 0291 else: 0292 source.setSystemId(urlparse.urljoin(base, sysid)) 0293 f = urllib.urlopen(source.getSystemId()) 0294 0295 source.setByteStream(f) 0296 0297 return source 0298
Generated by PyXR 0.9.4