0001 """Implementation of the DOM Level 3 'LS-Load' feature.""" 0002 0003 import copy 0004 import xml.dom 0005 0006 from xml.dom.minicompat import * 0007 0008 from xml.dom.NodeFilter import NodeFilter 0009 0010 0011 __all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] 0012 0013 0014 class Options: 0015 """Features object that has variables set for each DOMBuilder feature. 0016 0017 The DOMBuilder class uses an instance of this class to pass settings to 0018 the ExpatBuilder class. 0019 """ 0020 0021 # Note that the DOMBuilder class in LoadSave constrains which of these 0022 # values can be set using the DOM Level 3 LoadSave feature. 0023 0024 namespaces = 1 0025 namespace_declarations = True 0026 validation = False 0027 external_parameter_entities = True 0028 external_general_entities = True 0029 external_dtd_subset = True 0030 validate_if_schema = False 0031 validate = False 0032 datatype_normalization = False 0033 create_entity_ref_nodes = True 0034 entities = True 0035 whitespace_in_element_content = True 0036 cdata_sections = True 0037 comments = True 0038 charset_overrides_xml_encoding = True 0039 infoset = False 0040 supported_mediatypes_only = False 0041 0042 errorHandler = None 0043 filter = None 0044 0045 0046 class DOMBuilder: 0047 entityResolver = None 0048 errorHandler = None 0049 filter = None 0050 0051 ACTION_REPLACE = 1 0052 ACTION_APPEND_AS_CHILDREN = 2 0053 ACTION_INSERT_AFTER = 3 0054 ACTION_INSERT_BEFORE = 4 0055 0056 _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, 0057 ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) 0058 0059 def __init__(self): 0060 self._options = Options() 0061 0062 def _get_entityResolver(self): 0063 return self.entityResolver 0064 def _set_entityResolver(self, entityResolver): 0065 self.entityResolver = entityResolver 0066 0067 def _get_errorHandler(self): 0068 return self.errorHandler 0069 def _set_errorHandler(self, errorHandler): 0070 self.errorHandler = errorHandler 0071 0072 def _get_filter(self): 0073 return self.filter 0074 def _set_filter(self, filter): 0075 self.filter = filter 0076 0077 def setFeature(self, name, state): 0078 if self.supportsFeature(name): 0079 state = state and 1 or 0 0080 try: 0081 settings = self._settings[(_name_xform(name), state)] 0082 except KeyError: 0083 raise xml.dom.NotSupportedErr( 0084 "unsupported feature: %r" % (name,)) 0085 else: 0086 for name, value in settings: 0087 setattr(self._options, name, value) 0088 else: 0089 raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) 0090 0091 def supportsFeature(self, name): 0092 return hasattr(self._options, _name_xform(name)) 0093 0094 def canSetFeature(self, name, state): 0095 key = (_name_xform(name), state and 1 or 0) 0096 return self._settings.has_key(key) 0097 0098 # This dictionary maps from (feature,value) to a list of 0099 # (option,value) pairs that should be set on the Options object. 0100 # If a (feature,value) setting is not in this dictionary, it is 0101 # not supported by the DOMBuilder. 0102 # 0103 _settings = { 0104 ("namespace_declarations", 0): [ 0105 ("namespace_declarations", 0)], 0106 ("namespace_declarations", 1): [ 0107 ("namespace_declarations", 1)], 0108 ("validation", 0): [ 0109 ("validation", 0)], 0110 ("external_general_entities", 0): [ 0111 ("external_general_entities", 0)], 0112 ("external_general_entities", 1): [ 0113 ("external_general_entities", 1)], 0114 ("external_parameter_entities", 0): [ 0115 ("external_parameter_entities", 0)], 0116 ("external_parameter_entities", 1): [ 0117 ("external_parameter_entities", 1)], 0118 ("validate_if_schema", 0): [ 0119 ("validate_if_schema", 0)], 0120 ("create_entity_ref_nodes", 0): [ 0121 ("create_entity_ref_nodes", 0)], 0122 ("create_entity_ref_nodes", 1): [ 0123 ("create_entity_ref_nodes", 1)], 0124 ("entities", 0): [ 0125 ("create_entity_ref_nodes", 0), 0126 ("entities", 0)], 0127 ("entities", 1): [ 0128 ("entities", 1)], 0129 ("whitespace_in_element_content", 0): [ 0130 ("whitespace_in_element_content", 0)], 0131 ("whitespace_in_element_content", 1): [ 0132 ("whitespace_in_element_content", 1)], 0133 ("cdata_sections", 0): [ 0134 ("cdata_sections", 0)], 0135 ("cdata_sections", 1): [ 0136 ("cdata_sections", 1)], 0137 ("comments", 0): [ 0138 ("comments", 0)], 0139 ("comments", 1): [ 0140 ("comments", 1)], 0141 ("charset_overrides_xml_encoding", 0): [ 0142 ("charset_overrides_xml_encoding", 0)], 0143 ("charset_overrides_xml_encoding", 1): [ 0144 ("charset_overrides_xml_encoding", 1)], 0145 ("infoset", 0): [], 0146 ("infoset", 1): [ 0147 ("namespace_declarations", 0), 0148 ("validate_if_schema", 0), 0149 ("create_entity_ref_nodes", 0), 0150 ("entities", 0), 0151 ("cdata_sections", 0), 0152 ("datatype_normalization", 1), 0153 ("whitespace_in_element_content", 1), 0154 ("comments", 1), 0155 ("charset_overrides_xml_encoding", 1)], 0156 ("supported_mediatypes_only", 0): [ 0157 ("supported_mediatypes_only", 0)], 0158 ("namespaces", 0): [ 0159 ("namespaces", 0)], 0160 ("namespaces", 1): [ 0161 ("namespaces", 1)], 0162 } 0163 0164 def getFeature(self, name): 0165 xname = _name_xform(name) 0166 try: 0167 return getattr(self._options, xname) 0168 except AttributeError: 0169 if name == "infoset": 0170 options = self._options 0171 return (options.datatype_normalization 0172 and options.whitespace_in_element_content 0173 and options.comments 0174 and options.charset_overrides_xml_encoding 0175 and not (options.namespace_declarations 0176 or options.validate_if_schema 0177 or options.create_entity_ref_nodes 0178 or options.entities 0179 or options.cdata_sections)) 0180 raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) 0181 0182 def parseURI(self, uri): 0183 if self.entityResolver: 0184 input = self.entityResolver.resolveEntity(None, uri) 0185 else: 0186 input = DOMEntityResolver().resolveEntity(None, uri) 0187 return self.parse(input) 0188 0189 def parse(self, input): 0190 options = copy.copy(self._options) 0191 options.filter = self.filter 0192 options.errorHandler = self.errorHandler 0193 fp = input.byteStream 0194 if fp is None and options.systemId: 0195 import urllib2 0196 fp = urllib2.urlopen(input.systemId) 0197 return self._parse_bytestream(fp, options) 0198 0199 def parseWithContext(self, input, cnode, action): 0200 if action not in self._legal_actions: 0201 raise ValueError("not a legal action") 0202 raise NotImplementedError("Haven't written this yet...") 0203 0204 def _parse_bytestream(self, stream, options): 0205 import xml.dom.expatbuilder 0206 builder = xml.dom.expatbuilder.makeBuilder(options) 0207 return builder.parseFile(stream) 0208 0209 0210 def _name_xform(name): 0211 return name.lower().replace('-', '_') 0212 0213 0214 class DOMEntityResolver(NewStyle): 0215 __slots__ = '_opener', 0216 0217 def resolveEntity(self, publicId, systemId): 0218 assert systemId is not None 0219 source = DOMInputSource() 0220 source.publicId = publicId 0221 source.systemId = systemId 0222 source.byteStream = self._get_opener().open(systemId) 0223 0224 # determine the encoding if the transport provided it 0225 source.encoding = self._guess_media_encoding(source) 0226 0227 # determine the base URI is we can 0228 import posixpath, urlparse 0229 parts = urlparse.urlparse(systemId) 0230 scheme, netloc, path, params, query, fragment = parts 0231 # XXX should we check the scheme here as well? 0232 if path and not path.endswith("/"): 0233 path = posixpath.dirname(path) + "/" 0234 parts = scheme, netloc, path, params, query, fragment 0235 source.baseURI = urlparse.urlunparse(parts) 0236 0237 return source 0238 0239 def _get_opener(self): 0240 try: 0241 return self._opener 0242 except AttributeError: 0243 self._opener = self._create_opener() 0244 return self._opener 0245 0246 def _create_opener(self): 0247 import urllib2 0248 return urllib2.build_opener() 0249 0250 def _guess_media_encoding(self, source): 0251 info = source.byteStream.info() 0252 if info.has_key("Content-Type"): 0253 for param in info.getplist(): 0254 if param.startswith("charset="): 0255 return param.split("=", 1)[1].lower() 0256 0257 0258 class DOMInputSource(NewStyle): 0259 __slots__ = ('byteStream', 'characterStream', 'stringData', 0260 'encoding', 'publicId', 'systemId', 'baseURI') 0261 0262 def __init__(self): 0263 self.byteStream = None 0264 self.characterStream = None 0265 self.stringData = None 0266 self.encoding = None 0267 self.publicId = None 0268 self.systemId = None 0269 self.baseURI = None 0270 0271 def _get_byteStream(self): 0272 return self.byteStream 0273 def _set_byteStream(self, byteStream): 0274 self.byteStream = byteStream 0275 0276 def _get_characterStream(self): 0277 return self.characterStream 0278 def _set_characterStream(self, characterStream): 0279 self.characterStream = characterStream 0280 0281 def _get_stringData(self): 0282 return self.stringData 0283 def _set_stringData(self, data): 0284 self.stringData = data 0285 0286 def _get_encoding(self): 0287 return self.encoding 0288 def _set_encoding(self, encoding): 0289 self.encoding = encoding 0290 0291 def _get_publicId(self): 0292 return self.publicId 0293 def _set_publicId(self, publicId): 0294 self.publicId = publicId 0295 0296 def _get_systemId(self): 0297 return self.systemId 0298 def _set_systemId(self, systemId): 0299 self.systemId = systemId 0300 0301 def _get_baseURI(self): 0302 return self.baseURI 0303 def _set_baseURI(self, uri): 0304 self.baseURI = uri 0305 0306 0307 class DOMBuilderFilter: 0308 """Element filter which can be used to tailor construction of 0309 a DOM instance. 0310 """ 0311 0312 # There's really no need for this class; concrete implementations 0313 # should just implement the endElement() and startElement() 0314 # methods as appropriate. Using this makes it easy to only 0315 # implement one of them. 0316 0317 FILTER_ACCEPT = 1 0318 FILTER_REJECT = 2 0319 FILTER_SKIP = 3 0320 FILTER_INTERRUPT = 4 0321 0322 whatToShow = NodeFilter.SHOW_ALL 0323 0324 def _get_whatToShow(self): 0325 return self.whatToShow 0326 0327 def acceptNode(self, element): 0328 return self.FILTER_ACCEPT 0329 0330 def startContainer(self, element): 0331 return self.FILTER_ACCEPT 0332 0333 del NodeFilter 0334 0335 0336 class DocumentLS: 0337 """Mixin to create documents that conform to the load/save spec.""" 0338 0339 async = False 0340 0341 def _get_async(self): 0342 return False 0343 def _set_async(self, async): 0344 if async: 0345 raise xml.dom.NotSupportedErr( 0346 "asynchronous document loading is not supported") 0347 0348 def abort(self): 0349 # What does it mean to "clear" a document? Does the 0350 # documentElement disappear? 0351 raise NotImplementedError( 0352 "haven't figured out what this means yet") 0353 0354 def load(self, uri): 0355 raise NotImplementedError("haven't written this yet") 0356 0357 def loadXML(self, source): 0358 raise NotImplementedError("haven't written this yet") 0359 0360 def saveXML(self, snode): 0361 if snode is None: 0362 snode = self 0363 elif snode.ownerDocument is not self: 0364 raise xml.dom.WrongDocumentErr() 0365 return snode.toxml() 0366 0367 0368 class DOMImplementationLS: 0369 MODE_SYNCHRONOUS = 1 0370 MODE_ASYNCHRONOUS = 2 0371 0372 def createDOMBuilder(self, mode, schemaType): 0373 if schemaType is not None: 0374 raise xml.dom.NotSupportedErr( 0375 "schemaType not yet supported") 0376 if mode == self.MODE_SYNCHRONOUS: 0377 return DOMBuilder() 0378 if mode == self.MODE_ASYNCHRONOUS: 0379 raise xml.dom.NotSupportedErr( 0380 "asynchronous builders are not supported") 0381 raise ValueError("unknown value for mode") 0382 0383 def createDOMWriter(self): 0384 raise NotImplementedError( 0385 "the writer interface hasn't been written yet!") 0386 0387 def createDOMInputSource(self): 0388 return DOMInputSource() 0389
Generated by PyXR 0.9.4