0001 """\ 0002 minidom.py -- a lightweight DOM implementation. 0003 0004 parse("foo.xml") 0005 0006 parseString("<foo><bar/></foo>") 0007 0008 Todo: 0009 ===== 0010 * convenience methods for getting elements and text. 0011 * more testing 0012 * bring some of the writer and linearizer code into conformance with this 0013 interface 0014 * SAX 2 namespaces 0015 """ 0016 0017 import xml.dom 0018 0019 from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg 0020 from xml.dom.minicompat import * 0021 from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS 0022 0023 _TupleType = type(()) 0024 0025 # This is used by the ID-cache invalidation checks; the list isn't 0026 # actually complete, since the nodes being checked will never be the 0027 # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is 0028 # the node being added or removed, not the node being modified.) 0029 # 0030 _nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, 0031 xml.dom.Node.ENTITY_REFERENCE_NODE) 0032 0033 0034 class Node(xml.dom.Node, GetattrMagic): 0035 namespaceURI = None # this is non-null only for elements and attributes 0036 parentNode = None 0037 ownerDocument = None 0038 nextSibling = None 0039 previousSibling = None 0040 0041 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes 0042 0043 def __nonzero__(self): 0044 return True 0045 0046 def toxml(self, encoding = None): 0047 return self.toprettyxml("", "", encoding) 0048 0049 def toprettyxml(self, indent="\t", newl="\n", encoding = None): 0050 # indent = the indentation string to prepend, per level 0051 # newl = the newline string to append 0052 writer = _get_StringIO() 0053 if encoding is not None: 0054 import codecs 0055 # Can't use codecs.getwriter to preserve 2.0 compatibility 0056 writer = codecs.lookup(encoding)[3](writer) 0057 if self.nodeType == Node.DOCUMENT_NODE: 0058 # Can pass encoding only to document, to put it into XML header 0059 self.writexml(writer, "", indent, newl, encoding) 0060 else: 0061 self.writexml(writer, "", indent, newl) 0062 return writer.getvalue() 0063 0064 def hasChildNodes(self): 0065 if self.childNodes: 0066 return True 0067 else: 0068 return False 0069 0070 def _get_childNodes(self): 0071 return self.childNodes 0072 0073 def _get_firstChild(self): 0074 if self.childNodes: 0075 return self.childNodes[0] 0076 0077 def _get_lastChild(self): 0078 if self.childNodes: 0079 return self.childNodes[-1] 0080 0081 def insertBefore(self, newChild, refChild): 0082 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 0083 for c in tuple(newChild.childNodes): 0084 self.insertBefore(c, refChild) 0085 ### The DOM does not clearly specify what to return in this case 0086 return newChild 0087 if newChild.nodeType not in self._child_node_types: 0088 raise xml.dom.HierarchyRequestErr( 0089 "%s cannot be child of %s" % (repr(newChild), repr(self))) 0090 if newChild.parentNode is not None: 0091 newChild.parentNode.removeChild(newChild) 0092 if refChild is None: 0093 self.appendChild(newChild) 0094 else: 0095 try: 0096 index = self.childNodes.index(refChild) 0097 except ValueError: 0098 raise xml.dom.NotFoundErr() 0099 if newChild.nodeType in _nodeTypes_with_children: 0100 _clear_id_cache(self) 0101 self.childNodes.insert(index, newChild) 0102 newChild.nextSibling = refChild 0103 refChild.previousSibling = newChild 0104 if index: 0105 node = self.childNodes[index-1] 0106 node.nextSibling = newChild 0107 newChild.previousSibling = node 0108 else: 0109 newChild.previousSibling = None 0110 newChild.parentNode = self 0111 return newChild 0112 0113 def appendChild(self, node): 0114 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: 0115 for c in tuple(node.childNodes): 0116 self.appendChild(c) 0117 ### The DOM does not clearly specify what to return in this case 0118 return node 0119 if node.nodeType not in self._child_node_types: 0120 raise xml.dom.HierarchyRequestErr( 0121 "%s cannot be child of %s" % (repr(node), repr(self))) 0122 elif node.nodeType in _nodeTypes_with_children: 0123 _clear_id_cache(self) 0124 if node.parentNode is not None: 0125 node.parentNode.removeChild(node) 0126 _append_child(self, node) 0127 node.nextSibling = None 0128 return node 0129 0130 def replaceChild(self, newChild, oldChild): 0131 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 0132 refChild = oldChild.nextSibling 0133 self.removeChild(oldChild) 0134 return self.insertBefore(newChild, refChild) 0135 if newChild.nodeType not in self._child_node_types: 0136 raise xml.dom.HierarchyRequestErr( 0137 "%s cannot be child of %s" % (repr(newChild), repr(self))) 0138 if newChild.parentNode is not None: 0139 newChild.parentNode.removeChild(newChild) 0140 if newChild is oldChild: 0141 return 0142 try: 0143 index = self.childNodes.index(oldChild) 0144 except ValueError: 0145 raise xml.dom.NotFoundErr() 0146 self.childNodes[index] = newChild 0147 newChild.parentNode = self 0148 oldChild.parentNode = None 0149 if (newChild.nodeType in _nodeTypes_with_children 0150 or oldChild.nodeType in _nodeTypes_with_children): 0151 _clear_id_cache(self) 0152 newChild.nextSibling = oldChild.nextSibling 0153 newChild.previousSibling = oldChild.previousSibling 0154 oldChild.nextSibling = None 0155 oldChild.previousSibling = None 0156 if newChild.previousSibling: 0157 newChild.previousSibling.nextSibling = newChild 0158 if newChild.nextSibling: 0159 newChild.nextSibling.previousSibling = newChild 0160 return oldChild 0161 0162 def removeChild(self, oldChild): 0163 try: 0164 self.childNodes.remove(oldChild) 0165 except ValueError: 0166 raise xml.dom.NotFoundErr() 0167 if oldChild.nextSibling is not None: 0168 oldChild.nextSibling.previousSibling = oldChild.previousSibling 0169 if oldChild.previousSibling is not None: 0170 oldChild.previousSibling.nextSibling = oldChild.nextSibling 0171 oldChild.nextSibling = oldChild.previousSibling = None 0172 if oldChild.nodeType in _nodeTypes_with_children: 0173 _clear_id_cache(self) 0174 0175 oldChild.parentNode = None 0176 return oldChild 0177 0178 def normalize(self): 0179 L = [] 0180 for child in self.childNodes: 0181 if child.nodeType == Node.TEXT_NODE: 0182 data = child.data 0183 if data and L and L[-1].nodeType == child.nodeType: 0184 # collapse text node 0185 node = L[-1] 0186 node.data = node.data + child.data 0187 node.nextSibling = child.nextSibling 0188 child.unlink() 0189 elif data: 0190 if L: 0191 L[-1].nextSibling = child 0192 child.previousSibling = L[-1] 0193 else: 0194 child.previousSibling = None 0195 L.append(child) 0196 else: 0197 # empty text node; discard 0198 child.unlink() 0199 else: 0200 if L: 0201 L[-1].nextSibling = child 0202 child.previousSibling = L[-1] 0203 else: 0204 child.previousSibling = None 0205 L.append(child) 0206 if child.nodeType == Node.ELEMENT_NODE: 0207 child.normalize() 0208 self.childNodes[:] = L 0209 0210 def cloneNode(self, deep): 0211 return _clone_node(self, deep, self.ownerDocument or self) 0212 0213 def isSupported(self, feature, version): 0214 return self.ownerDocument.implementation.hasFeature(feature, version) 0215 0216 def _get_localName(self): 0217 # Overridden in Element and Attr where localName can be Non-Null 0218 return None 0219 0220 # Node interfaces from Level 3 (WD 9 April 2002) 0221 0222 def isSameNode(self, other): 0223 return self is other 0224 0225 def getInterface(self, feature): 0226 if self.isSupported(feature, None): 0227 return self 0228 else: 0229 return None 0230 0231 # The "user data" functions use a dictionary that is only present 0232 # if some user data has been set, so be careful not to assume it 0233 # exists. 0234 0235 def getUserData(self, key): 0236 try: 0237 return self._user_data[key][0] 0238 except (AttributeError, KeyError): 0239 return None 0240 0241 def setUserData(self, key, data, handler): 0242 old = None 0243 try: 0244 d = self._user_data 0245 except AttributeError: 0246 d = {} 0247 self._user_data = d 0248 if d.has_key(key): 0249 old = d[key][0] 0250 if data is None: 0251 # ignore handlers passed for None 0252 handler = None 0253 if old is not None: 0254 del d[key] 0255 else: 0256 d[key] = (data, handler) 0257 return old 0258 0259 def _call_user_data_handler(self, operation, src, dst): 0260 if hasattr(self, "_user_data"): 0261 for key, (data, handler) in self._user_data.items(): 0262 if handler is not None: 0263 handler.handle(operation, key, data, src, dst) 0264 0265 # minidom-specific API: 0266 0267 def unlink(self): 0268 self.parentNode = self.ownerDocument = None 0269 if self.childNodes: 0270 for child in self.childNodes: 0271 child.unlink() 0272 self.childNodes = NodeList() 0273 self.previousSibling = None 0274 self.nextSibling = None 0275 0276 defproperty(Node, "firstChild", doc="First child node, or None.") 0277 defproperty(Node, "lastChild", doc="Last child node, or None.") 0278 defproperty(Node, "localName", doc="Namespace-local name of this node.") 0279 0280 0281 def _append_child(self, node): 0282 # fast path with less checks; usable by DOM builders if careful 0283 childNodes = self.childNodes 0284 if childNodes: 0285 last = childNodes[-1] 0286 node.__dict__["previousSibling"] = last 0287 last.__dict__["nextSibling"] = node 0288 childNodes.append(node) 0289 node.__dict__["parentNode"] = self 0290 0291 def _in_document(node): 0292 # return True iff node is part of a document tree 0293 while node is not None: 0294 if node.nodeType == Node.DOCUMENT_NODE: 0295 return True 0296 node = node.parentNode 0297 return False 0298 0299 def _write_data(writer, data): 0300 "Writes datachars to writer." 0301 data = data.replace("&", "&").replace("<", "<") 0302 data = data.replace("\"", """).replace(">", ">") 0303 writer.write(data) 0304 0305 def _get_elements_by_tagName_helper(parent, name, rc): 0306 for node in parent.childNodes: 0307 if node.nodeType == Node.ELEMENT_NODE and \ 0308 (name == "*" or node.tagName == name): 0309 rc.append(node) 0310 _get_elements_by_tagName_helper(node, name, rc) 0311 return rc 0312 0313 def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): 0314 for node in parent.childNodes: 0315 if node.nodeType == Node.ELEMENT_NODE: 0316 if ((localName == "*" or node.localName == localName) and 0317 (nsURI == "*" or node.namespaceURI == nsURI)): 0318 rc.append(node) 0319 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) 0320 return rc 0321 0322 class DocumentFragment(Node): 0323 nodeType = Node.DOCUMENT_FRAGMENT_NODE 0324 nodeName = "#document-fragment" 0325 nodeValue = None 0326 attributes = None 0327 parentNode = None 0328 _child_node_types = (Node.ELEMENT_NODE, 0329 Node.TEXT_NODE, 0330 Node.CDATA_SECTION_NODE, 0331 Node.ENTITY_REFERENCE_NODE, 0332 Node.PROCESSING_INSTRUCTION_NODE, 0333 Node.COMMENT_NODE, 0334 Node.NOTATION_NODE) 0335 0336 def __init__(self): 0337 self.childNodes = NodeList() 0338 0339 0340 class Attr(Node): 0341 nodeType = Node.ATTRIBUTE_NODE 0342 attributes = None 0343 ownerElement = None 0344 specified = False 0345 _is_id = False 0346 0347 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) 0348 0349 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, 0350 prefix=None): 0351 # skip setattr for performance 0352 d = self.__dict__ 0353 d["nodeName"] = d["name"] = qName 0354 d["namespaceURI"] = namespaceURI 0355 d["prefix"] = prefix 0356 d['childNodes'] = NodeList() 0357 0358 # Add the single child node that represents the value of the attr 0359 self.childNodes.append(Text()) 0360 0361 # nodeValue and value are set elsewhere 0362 0363 def _get_localName(self): 0364 return self.nodeName.split(":", 1)[-1] 0365 0366 def _get_name(self): 0367 return self.name 0368 0369 def _get_specified(self): 0370 return self.specified 0371 0372 def __setattr__(self, name, value): 0373 d = self.__dict__ 0374 if name in ("value", "nodeValue"): 0375 d["value"] = d["nodeValue"] = value 0376 d2 = self.childNodes[0].__dict__ 0377 d2["data"] = d2["nodeValue"] = value 0378 if self.ownerElement is not None: 0379 _clear_id_cache(self.ownerElement) 0380 elif name in ("name", "nodeName"): 0381 d["name"] = d["nodeName"] = value 0382 if self.ownerElement is not None: 0383 _clear_id_cache(self.ownerElement) 0384 else: 0385 d[name] = value 0386 0387 def _set_prefix(self, prefix): 0388 nsuri = self.namespaceURI 0389 if prefix == "xmlns": 0390 if nsuri and nsuri != XMLNS_NAMESPACE: 0391 raise xml.dom.NamespaceErr( 0392 "illegal use of 'xmlns' prefix for the wrong namespace") 0393 d = self.__dict__ 0394 d['prefix'] = prefix 0395 if prefix is None: 0396 newName = self.localName 0397 else: 0398 newName = "%s:%s" % (prefix, self.localName) 0399 if self.ownerElement: 0400 _clear_id_cache(self.ownerElement) 0401 d['nodeName'] = d['name'] = newName 0402 0403 def _set_value(self, value): 0404 d = self.__dict__ 0405 d['value'] = d['nodeValue'] = value 0406 if self.ownerElement: 0407 _clear_id_cache(self.ownerElement) 0408 self.childNodes[0].data = value 0409 0410 def unlink(self): 0411 # This implementation does not call the base implementation 0412 # since most of that is not needed, and the expense of the 0413 # method call is not warranted. We duplicate the removal of 0414 # children, but that's all we needed from the base class. 0415 elem = self.ownerElement 0416 if elem is not None: 0417 del elem._attrs[self.nodeName] 0418 del elem._attrsNS[(self.namespaceURI, self.localName)] 0419 if self._is_id: 0420 self._is_id = False 0421 elem._magic_id_nodes -= 1 0422 self.ownerDocument._magic_id_count -= 1 0423 for child in self.childNodes: 0424 child.unlink() 0425 del self.childNodes[:] 0426 0427 def _get_isId(self): 0428 if self._is_id: 0429 return True 0430 doc = self.ownerDocument 0431 elem = self.ownerElement 0432 if doc is None or elem is None: 0433 return False 0434 0435 info = doc._get_elem_info(elem) 0436 if info is None: 0437 return False 0438 if self.namespaceURI: 0439 return info.isIdNS(self.namespaceURI, self.localName) 0440 else: 0441 return info.isId(self.nodeName) 0442 0443 def _get_schemaType(self): 0444 doc = self.ownerDocument 0445 elem = self.ownerElement 0446 if doc is None or elem is None: 0447 return _no_type 0448 0449 info = doc._get_elem_info(elem) 0450 if info is None: 0451 return _no_type 0452 if self.namespaceURI: 0453 return info.getAttributeTypeNS(self.namespaceURI, self.localName) 0454 else: 0455 return info.getAttributeType(self.nodeName) 0456 0457 defproperty(Attr, "isId", doc="True if this attribute is an ID.") 0458 defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") 0459 defproperty(Attr, "schemaType", doc="Schema type for this attribute.") 0460 0461 0462 class NamedNodeMap(NewStyle, GetattrMagic): 0463 """The attribute list is a transient interface to the underlying 0464 dictionaries. Mutations here will change the underlying element's 0465 dictionary. 0466 0467 Ordering is imposed artificially and does not reflect the order of 0468 attributes as found in an input document. 0469 """ 0470 0471 __slots__ = ('_attrs', '_attrsNS', '_ownerElement') 0472 0473 def __init__(self, attrs, attrsNS, ownerElement): 0474 self._attrs = attrs 0475 self._attrsNS = attrsNS 0476 self._ownerElement = ownerElement 0477 0478 def _get_length(self): 0479 return len(self._attrs) 0480 0481 def item(self, index): 0482 try: 0483 return self[self._attrs.keys()[index]] 0484 except IndexError: 0485 return None 0486 0487 def items(self): 0488 L = [] 0489 for node in self._attrs.values(): 0490 L.append((node.nodeName, node.value)) 0491 return L 0492 0493 def itemsNS(self): 0494 L = [] 0495 for node in self._attrs.values(): 0496 L.append(((node.namespaceURI, node.localName), node.value)) 0497 return L 0498 0499 def has_key(self, key): 0500 if isinstance(key, StringTypes): 0501 return self._attrs.has_key(key) 0502 else: 0503 return self._attrsNS.has_key(key) 0504 0505 def keys(self): 0506 return self._attrs.keys() 0507 0508 def keysNS(self): 0509 return self._attrsNS.keys() 0510 0511 def values(self): 0512 return self._attrs.values() 0513 0514 def get(self, name, value=None): 0515 return self._attrs.get(name, value) 0516 0517 __len__ = _get_length 0518 0519 def __cmp__(self, other): 0520 if self._attrs is getattr(other, "_attrs", None): 0521 return 0 0522 else: 0523 return cmp(id(self), id(other)) 0524 0525 def __getitem__(self, attname_or_tuple): 0526 if isinstance(attname_or_tuple, _TupleType): 0527 return self._attrsNS[attname_or_tuple] 0528 else: 0529 return self._attrs[attname_or_tuple] 0530 0531 # same as set 0532 def __setitem__(self, attname, value): 0533 if isinstance(value, StringTypes): 0534 try: 0535 node = self._attrs[attname] 0536 except KeyError: 0537 node = Attr(attname) 0538 node.ownerDocument = self._ownerElement.ownerDocument 0539 self.setNamedItem(node) 0540 node.value = value 0541 else: 0542 if not isinstance(value, Attr): 0543 raise TypeError, "value must be a string or Attr object" 0544 node = value 0545 self.setNamedItem(node) 0546 0547 def getNamedItem(self, name): 0548 try: 0549 return self._attrs[name] 0550 except KeyError: 0551 return None 0552 0553 def getNamedItemNS(self, namespaceURI, localName): 0554 try: 0555 return self._attrsNS[(namespaceURI, localName)] 0556 except KeyError: 0557 return None 0558 0559 def removeNamedItem(self, name): 0560 n = self.getNamedItem(name) 0561 if n is not None: 0562 _clear_id_cache(self._ownerElement) 0563 del self._attrs[n.nodeName] 0564 del self._attrsNS[(n.namespaceURI, n.localName)] 0565 if n.__dict__.has_key('ownerElement'): 0566 n.__dict__['ownerElement'] = None 0567 return n 0568 else: 0569 raise xml.dom.NotFoundErr() 0570 0571 def removeNamedItemNS(self, namespaceURI, localName): 0572 n = self.getNamedItemNS(namespaceURI, localName) 0573 if n is not None: 0574 _clear_id_cache(self._ownerElement) 0575 del self._attrsNS[(n.namespaceURI, n.localName)] 0576 del self._attrs[n.nodeName] 0577 if n.__dict__.has_key('ownerElement'): 0578 n.__dict__['ownerElement'] = None 0579 return n 0580 else: 0581 raise xml.dom.NotFoundErr() 0582 0583 def setNamedItem(self, node): 0584 if not isinstance(node, Attr): 0585 raise xml.dom.HierarchyRequestErr( 0586 "%s cannot be child of %s" % (repr(node), repr(self))) 0587 old = self._attrs.get(node.name) 0588 if old: 0589 old.unlink() 0590 self._attrs[node.name] = node 0591 self._attrsNS[(node.namespaceURI, node.localName)] = node 0592 node.ownerElement = self._ownerElement 0593 _clear_id_cache(node.ownerElement) 0594 return old 0595 0596 def setNamedItemNS(self, node): 0597 return self.setNamedItem(node) 0598 0599 def __delitem__(self, attname_or_tuple): 0600 node = self[attname_or_tuple] 0601 _clear_id_cache(node.ownerElement) 0602 node.unlink() 0603 0604 def __getstate__(self): 0605 return self._attrs, self._attrsNS, self._ownerElement 0606 0607 def __setstate__(self, state): 0608 self._attrs, self._attrsNS, self._ownerElement = state 0609 0610 defproperty(NamedNodeMap, "length", 0611 doc="Number of nodes in the NamedNodeMap.") 0612 0613 AttributeList = NamedNodeMap 0614 0615 0616 class TypeInfo(NewStyle): 0617 __slots__ = 'namespace', 'name' 0618 0619 def __init__(self, namespace, name): 0620 self.namespace = namespace 0621 self.name = name 0622 0623 def __repr__(self): 0624 if self.namespace: 0625 return "<TypeInfo %r (from %r)>" % (self.name, self.namespace) 0626 else: 0627 return "<TypeInfo %r>" % self.name 0628 0629 def _get_name(self): 0630 return self.name 0631 0632 def _get_namespace(self): 0633 return self.namespace 0634 0635 _no_type = TypeInfo(None, None) 0636 0637 class Element(Node): 0638 nodeType = Node.ELEMENT_NODE 0639 nodeValue = None 0640 schemaType = _no_type 0641 0642 _magic_id_nodes = 0 0643 0644 _child_node_types = (Node.ELEMENT_NODE, 0645 Node.PROCESSING_INSTRUCTION_NODE, 0646 Node.COMMENT_NODE, 0647 Node.TEXT_NODE, 0648 Node.CDATA_SECTION_NODE, 0649 Node.ENTITY_REFERENCE_NODE) 0650 0651 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, 0652 localName=None): 0653 self.tagName = self.nodeName = tagName 0654 self.prefix = prefix 0655 self.namespaceURI = namespaceURI 0656 self.childNodes = NodeList() 0657 0658 self._attrs = {} # attributes are double-indexed: 0659 self._attrsNS = {} # tagName -> Attribute 0660 # URI,localName -> Attribute 0661 # in the future: consider lazy generation 0662 # of attribute objects this is too tricky 0663 # for now because of headaches with 0664 # namespaces. 0665 0666 def _get_localName(self): 0667 return self.tagName.split(":", 1)[-1] 0668 0669 def _get_tagName(self): 0670 return self.tagName 0671 0672 def unlink(self): 0673 for attr in self._attrs.values(): 0674 attr.unlink() 0675 self._attrs = None 0676 self._attrsNS = None 0677 Node.unlink(self) 0678 0679 def getAttribute(self, attname): 0680 try: 0681 return self._attrs[attname].value 0682 except KeyError: 0683 return "" 0684 0685 def getAttributeNS(self, namespaceURI, localName): 0686 try: 0687 return self._attrsNS[(namespaceURI, localName)].value 0688 except KeyError: 0689 return "" 0690 0691 def setAttribute(self, attname, value): 0692 attr = self.getAttributeNode(attname) 0693 if attr is None: 0694 attr = Attr(attname) 0695 # for performance 0696 d = attr.__dict__ 0697 d["value"] = d["nodeValue"] = value 0698 d["ownerDocument"] = self.ownerDocument 0699 self.setAttributeNode(attr) 0700 elif value != attr.value: 0701 d = attr.__dict__ 0702 d["value"] = d["nodeValue"] = value 0703 if attr.isId: 0704 _clear_id_cache(self) 0705 0706 def setAttributeNS(self, namespaceURI, qualifiedName, value): 0707 prefix, localname = _nssplit(qualifiedName) 0708 attr = self.getAttributeNodeNS(namespaceURI, localname) 0709 if attr is None: 0710 # for performance 0711 attr = Attr(qualifiedName, namespaceURI, localname, prefix) 0712 d = attr.__dict__ 0713 d["prefix"] = prefix 0714 d["nodeName"] = qualifiedName 0715 d["value"] = d["nodeValue"] = value 0716 d["ownerDocument"] = self.ownerDocument 0717 self.setAttributeNode(attr) 0718 else: 0719 d = attr.__dict__ 0720 if value != attr.value: 0721 d["value"] = d["nodeValue"] = value 0722 if attr.isId: 0723 _clear_id_cache(self) 0724 if attr.prefix != prefix: 0725 d["prefix"] = prefix 0726 d["nodeName"] = qualifiedName 0727 0728 def getAttributeNode(self, attrname): 0729 return self._attrs.get(attrname) 0730 0731 def getAttributeNodeNS(self, namespaceURI, localName): 0732 return self._attrsNS.get((namespaceURI, localName)) 0733 0734 def setAttributeNode(self, attr): 0735 if attr.ownerElement not in (None, self): 0736 raise xml.dom.InuseAttributeErr("attribute node already owned") 0737 old1 = self._attrs.get(attr.name, None) 0738 if old1 is not None: 0739 self.removeAttributeNode(old1) 0740 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) 0741 if old2 is not None and old2 is not old1: 0742 self.removeAttributeNode(old2) 0743 _set_attribute_node(self, attr) 0744 0745 if old1 is not attr: 0746 # It might have already been part of this node, in which case 0747 # it doesn't represent a change, and should not be returned. 0748 return old1 0749 if old2 is not attr: 0750 return old2 0751 0752 setAttributeNodeNS = setAttributeNode 0753 0754 def removeAttribute(self, name): 0755 try: 0756 attr = self._attrs[name] 0757 except KeyError: 0758 raise xml.dom.NotFoundErr() 0759 self.removeAttributeNode(attr) 0760 0761 def removeAttributeNS(self, namespaceURI, localName): 0762 try: 0763 attr = self._attrsNS[(namespaceURI, localName)] 0764 except KeyError: 0765 raise xml.dom.NotFoundErr() 0766 self.removeAttributeNode(attr) 0767 0768 def removeAttributeNode(self, node): 0769 if node is None: 0770 raise xml.dom.NotFoundErr() 0771 try: 0772 self._attrs[node.name] 0773 except KeyError: 0774 raise xml.dom.NotFoundErr() 0775 _clear_id_cache(self) 0776 node.unlink() 0777 # Restore this since the node is still useful and otherwise 0778 # unlinked 0779 node.ownerDocument = self.ownerDocument 0780 0781 removeAttributeNodeNS = removeAttributeNode 0782 0783 def hasAttribute(self, name): 0784 return self._attrs.has_key(name) 0785 0786 def hasAttributeNS(self, namespaceURI, localName): 0787 return self._attrsNS.has_key((namespaceURI, localName)) 0788 0789 def getElementsByTagName(self, name): 0790 return _get_elements_by_tagName_helper(self, name, NodeList()) 0791 0792 def getElementsByTagNameNS(self, namespaceURI, localName): 0793 return _get_elements_by_tagName_ns_helper( 0794 self, namespaceURI, localName, NodeList()) 0795 0796 def __repr__(self): 0797 return "<DOM Element: %s at %#x>" % (self.tagName, id(self)) 0798 0799 def writexml(self, writer, indent="", addindent="", newl=""): 0800 # indent = current indentation 0801 # addindent = indentation to add to higher levels 0802 # newl = newline string 0803 writer.write(indent+"<" + self.tagName) 0804 0805 attrs = self._get_attributes() 0806 a_names = attrs.keys() 0807 a_names.sort() 0808 0809 for a_name in a_names: 0810 writer.write(" %s=\"" % a_name) 0811 _write_data(writer, attrs[a_name].value) 0812 writer.write("\"") 0813 if self.childNodes: 0814 writer.write(">%s"%(newl)) 0815 for node in self.childNodes: 0816 node.writexml(writer,indent+addindent,addindent,newl) 0817 writer.write("%s</%s>%s" % (indent,self.tagName,newl)) 0818 else: 0819 writer.write("/>%s"%(newl)) 0820 0821 def _get_attributes(self): 0822 return NamedNodeMap(self._attrs, self._attrsNS, self) 0823 0824 def hasAttributes(self): 0825 if self._attrs: 0826 return True 0827 else: 0828 return False 0829 0830 # DOM Level 3 attributes, based on the 22 Oct 2002 draft 0831 0832 def setIdAttribute(self, name): 0833 idAttr = self.getAttributeNode(name) 0834 self.setIdAttributeNode(idAttr) 0835 0836 def setIdAttributeNS(self, namespaceURI, localName): 0837 idAttr = self.getAttributeNodeNS(namespaceURI, localName) 0838 self.setIdAttributeNode(idAttr) 0839 0840 def setIdAttributeNode(self, idAttr): 0841 if idAttr is None or not self.isSameNode(idAttr.ownerElement): 0842 raise xml.dom.NotFoundErr() 0843 if _get_containing_entref(self) is not None: 0844 raise xml.dom.NoModificationAllowedErr() 0845 if not idAttr._is_id: 0846 idAttr.__dict__['_is_id'] = True 0847 self._magic_id_nodes += 1 0848 self.ownerDocument._magic_id_count += 1 0849 _clear_id_cache(self) 0850 0851 defproperty(Element, "attributes", 0852 doc="NamedNodeMap of attributes on the element.") 0853 defproperty(Element, "localName", 0854 doc="Namespace-local name of this element.") 0855 0856 0857 def _set_attribute_node(element, attr): 0858 _clear_id_cache(element) 0859 element._attrs[attr.name] = attr 0860 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr 0861 0862 # This creates a circular reference, but Element.unlink() 0863 # breaks the cycle since the references to the attribute 0864 # dictionaries are tossed. 0865 attr.__dict__['ownerElement'] = element 0866 0867 0868 class Childless: 0869 """Mixin that makes childless-ness easy to implement and avoids 0870 the complexity of the Node methods that deal with children. 0871 """ 0872 0873 attributes = None 0874 childNodes = EmptyNodeList() 0875 firstChild = None 0876 lastChild = None 0877 0878 def _get_firstChild(self): 0879 return None 0880 0881 def _get_lastChild(self): 0882 return None 0883 0884 def appendChild(self, node): 0885 raise xml.dom.HierarchyRequestErr( 0886 self.nodeName + " nodes cannot have children") 0887 0888 def hasChildNodes(self): 0889 return False 0890 0891 def insertBefore(self, newChild, refChild): 0892 raise xml.dom.HierarchyRequestErr( 0893 self.nodeName + " nodes do not have children") 0894 0895 def removeChild(self, oldChild): 0896 raise xml.dom.NotFoundErr( 0897 self.nodeName + " nodes do not have children") 0898 0899 def replaceChild(self, newChild, oldChild): 0900 raise xml.dom.HierarchyRequestErr( 0901 self.nodeName + " nodes do not have children") 0902 0903 0904 class ProcessingInstruction(Childless, Node): 0905 nodeType = Node.PROCESSING_INSTRUCTION_NODE 0906 0907 def __init__(self, target, data): 0908 self.target = self.nodeName = target 0909 self.data = self.nodeValue = data 0910 0911 def _get_data(self): 0912 return self.data 0913 def _set_data(self, value): 0914 d = self.__dict__ 0915 d['data'] = d['nodeValue'] = value 0916 0917 def _get_target(self): 0918 return self.target 0919 def _set_target(self, value): 0920 d = self.__dict__ 0921 d['target'] = d['nodeName'] = value 0922 0923 def __setattr__(self, name, value): 0924 if name == "data" or name == "nodeValue": 0925 self.__dict__['data'] = self.__dict__['nodeValue'] = value 0926 elif name == "target" or name == "nodeName": 0927 self.__dict__['target'] = self.__dict__['nodeName'] = value 0928 else: 0929 self.__dict__[name] = value 0930 0931 def writexml(self, writer, indent="", addindent="", newl=""): 0932 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) 0933 0934 0935 class CharacterData(Childless, Node): 0936 def _get_length(self): 0937 return len(self.data) 0938 __len__ = _get_length 0939 0940 def _get_data(self): 0941 return self.__dict__['data'] 0942 def _set_data(self, data): 0943 d = self.__dict__ 0944 d['data'] = d['nodeValue'] = data 0945 0946 _get_nodeValue = _get_data 0947 _set_nodeValue = _set_data 0948 0949 def __setattr__(self, name, value): 0950 if name == "data" or name == "nodeValue": 0951 self.__dict__['data'] = self.__dict__['nodeValue'] = value 0952 else: 0953 self.__dict__[name] = value 0954 0955 def __repr__(self): 0956 data = self.data 0957 if len(data) > 10: 0958 dotdotdot = "..." 0959 else: 0960 dotdotdot = "" 0961 return "<DOM %s node \"%s%s\">" % ( 0962 self.__class__.__name__, data[0:10], dotdotdot) 0963 0964 def substringData(self, offset, count): 0965 if offset < 0: 0966 raise xml.dom.IndexSizeErr("offset cannot be negative") 0967 if offset >= len(self.data): 0968 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 0969 if count < 0: 0970 raise xml.dom.IndexSizeErr("count cannot be negative") 0971 return self.data[offset:offset+count] 0972 0973 def appendData(self, arg): 0974 self.data = self.data + arg 0975 0976 def insertData(self, offset, arg): 0977 if offset < 0: 0978 raise xml.dom.IndexSizeErr("offset cannot be negative") 0979 if offset >= len(self.data): 0980 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 0981 if arg: 0982 self.data = "%s%s%s" % ( 0983 self.data[:offset], arg, self.data[offset:]) 0984 0985 def deleteData(self, offset, count): 0986 if offset < 0: 0987 raise xml.dom.IndexSizeErr("offset cannot be negative") 0988 if offset >= len(self.data): 0989 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 0990 if count < 0: 0991 raise xml.dom.IndexSizeErr("count cannot be negative") 0992 if count: 0993 self.data = self.data[:offset] + self.data[offset+count:] 0994 0995 def replaceData(self, offset, count, arg): 0996 if offset < 0: 0997 raise xml.dom.IndexSizeErr("offset cannot be negative") 0998 if offset >= len(self.data): 0999 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1000 if count < 0: 1001 raise xml.dom.IndexSizeErr("count cannot be negative") 1002 if count: 1003 self.data = "%s%s%s" % ( 1004 self.data[:offset], arg, self.data[offset+count:]) 1005 1006 defproperty(CharacterData, "length", doc="Length of the string data.") 1007 1008 1009 class Text(CharacterData): 1010 # Make sure we don't add an instance __dict__ if we don't already 1011 # have one, at least when that's possible: 1012 # XXX this does not work, CharacterData is an old-style class 1013 # __slots__ = () 1014 1015 nodeType = Node.TEXT_NODE 1016 nodeName = "#text" 1017 attributes = None 1018 1019 def splitText(self, offset): 1020 if offset < 0 or offset > len(self.data): 1021 raise xml.dom.IndexSizeErr("illegal offset value") 1022 newText = self.__class__() 1023 newText.data = self.data[offset:] 1024 newText.ownerDocument = self.ownerDocument 1025 next = self.nextSibling 1026 if self.parentNode and self in self.parentNode.childNodes: 1027 if next is None: 1028 self.parentNode.appendChild(newText) 1029 else: 1030 self.parentNode.insertBefore(newText, next) 1031 self.data = self.data[:offset] 1032 return newText 1033 1034 def writexml(self, writer, indent="", addindent="", newl=""): 1035 _write_data(writer, "%s%s%s"%(indent, self.data, newl)) 1036 1037 # DOM Level 3 (WD 9 April 2002) 1038 1039 def _get_wholeText(self): 1040 L = [self.data] 1041 n = self.previousSibling 1042 while n is not None: 1043 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1044 L.insert(0, n.data) 1045 n = n.previousSibling 1046 else: 1047 break 1048 n = self.nextSibling 1049 while n is not None: 1050 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1051 L.append(n.data) 1052 n = n.nextSibling 1053 else: 1054 break 1055 return ''.join(L) 1056 1057 def replaceWholeText(self, content): 1058 # XXX This needs to be seriously changed if minidom ever 1059 # supports EntityReference nodes. 1060 parent = self.parentNode 1061 n = self.previousSibling 1062 while n is not None: 1063 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1064 next = n.previousSibling 1065 parent.removeChild(n) 1066 n = next 1067 else: 1068 break 1069 n = self.nextSibling 1070 if not content: 1071 parent.removeChild(self) 1072 while n is not None: 1073 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1074 next = n.nextSibling 1075 parent.removeChild(n) 1076 n = next 1077 else: 1078 break 1079 if content: 1080 d = self.__dict__ 1081 d['data'] = content 1082 d['nodeValue'] = content 1083 return self 1084 else: 1085 return None 1086 1087 def _get_isWhitespaceInElementContent(self): 1088 if self.data.strip(): 1089 return False 1090 elem = _get_containing_element(self) 1091 if elem is None: 1092 return False 1093 info = self.ownerDocument._get_elem_info(elem) 1094 if info is None: 1095 return False 1096 else: 1097 return info.isElementContent() 1098 1099 defproperty(Text, "isWhitespaceInElementContent", 1100 doc="True iff this text node contains only whitespace" 1101 " and is in element content.") 1102 defproperty(Text, "wholeText", 1103 doc="The text of all logically-adjacent text nodes.") 1104 1105 1106 def _get_containing_element(node): 1107 c = node.parentNode 1108 while c is not None: 1109 if c.nodeType == Node.ELEMENT_NODE: 1110 return c 1111 c = c.parentNode 1112 return None 1113 1114 def _get_containing_entref(node): 1115 c = node.parentNode 1116 while c is not None: 1117 if c.nodeType == Node.ENTITY_REFERENCE_NODE: 1118 return c 1119 c = c.parentNode 1120 return None 1121 1122 1123 class Comment(Childless, CharacterData): 1124 nodeType = Node.COMMENT_NODE 1125 nodeName = "#comment" 1126 1127 def __init__(self, data): 1128 self.data = self.nodeValue = data 1129 1130 def writexml(self, writer, indent="", addindent="", newl=""): 1131 writer.write("%s<!--%s-->%s" % (indent, self.data, newl)) 1132 1133 1134 class CDATASection(Text): 1135 # Make sure we don't add an instance __dict__ if we don't already 1136 # have one, at least when that's possible: 1137 # XXX this does not work, Text is an old-style class 1138 # __slots__ = () 1139 1140 nodeType = Node.CDATA_SECTION_NODE 1141 nodeName = "#cdata-section" 1142 1143 def writexml(self, writer, indent="", addindent="", newl=""): 1144 if self.data.find("]]>") >= 0: 1145 raise ValueError("']]>' not allowed in a CDATA section") 1146 writer.write("<![CDATA[%s]]>" % self.data) 1147 1148 1149 class ReadOnlySequentialNamedNodeMap(NewStyle, GetattrMagic): 1150 __slots__ = '_seq', 1151 1152 def __init__(self, seq=()): 1153 # seq should be a list or tuple 1154 self._seq = seq 1155 1156 def __len__(self): 1157 return len(self._seq) 1158 1159 def _get_length(self): 1160 return len(self._seq) 1161 1162 def getNamedItem(self, name): 1163 for n in self._seq: 1164 if n.nodeName == name: 1165 return n 1166 1167 def getNamedItemNS(self, namespaceURI, localName): 1168 for n in self._seq: 1169 if n.namespaceURI == namespaceURI and n.localName == localName: 1170 return n 1171 1172 def __getitem__(self, name_or_tuple): 1173 if isinstance(name_or_tuple, _TupleType): 1174 node = self.getNamedItemNS(*name_or_tuple) 1175 else: 1176 node = self.getNamedItem(name_or_tuple) 1177 if node is None: 1178 raise KeyError, name_or_tuple 1179 return node 1180 1181 def item(self, index): 1182 if index < 0: 1183 return None 1184 try: 1185 return self._seq[index] 1186 except IndexError: 1187 return None 1188 1189 def removeNamedItem(self, name): 1190 raise xml.dom.NoModificationAllowedErr( 1191 "NamedNodeMap instance is read-only") 1192 1193 def removeNamedItemNS(self, namespaceURI, localName): 1194 raise xml.dom.NoModificationAllowedErr( 1195 "NamedNodeMap instance is read-only") 1196 1197 def setNamedItem(self, node): 1198 raise xml.dom.NoModificationAllowedErr( 1199 "NamedNodeMap instance is read-only") 1200 1201 def setNamedItemNS(self, node): 1202 raise xml.dom.NoModificationAllowedErr( 1203 "NamedNodeMap instance is read-only") 1204 1205 def __getstate__(self): 1206 return [self._seq] 1207 1208 def __setstate__(self, state): 1209 self._seq = state[0] 1210 1211 defproperty(ReadOnlySequentialNamedNodeMap, "length", 1212 doc="Number of entries in the NamedNodeMap.") 1213 1214 1215 class Identified: 1216 """Mix-in class that supports the publicId and systemId attributes.""" 1217 1218 # XXX this does not work, this is an old-style class 1219 # __slots__ = 'publicId', 'systemId' 1220 1221 def _identified_mixin_init(self, publicId, systemId): 1222 self.publicId = publicId 1223 self.systemId = systemId 1224 1225 def _get_publicId(self): 1226 return self.publicId 1227 1228 def _get_systemId(self): 1229 return self.systemId 1230 1231 class DocumentType(Identified, Childless, Node): 1232 nodeType = Node.DOCUMENT_TYPE_NODE 1233 nodeValue = None 1234 name = None 1235 publicId = None 1236 systemId = None 1237 internalSubset = None 1238 1239 def __init__(self, qualifiedName): 1240 self.entities = ReadOnlySequentialNamedNodeMap() 1241 self.notations = ReadOnlySequentialNamedNodeMap() 1242 if qualifiedName: 1243 prefix, localname = _nssplit(qualifiedName) 1244 self.name = localname 1245 self.nodeName = self.name 1246 1247 def _get_internalSubset(self): 1248 return self.internalSubset 1249 1250 def cloneNode(self, deep): 1251 if self.ownerDocument is None: 1252 # it's ok 1253 clone = DocumentType(None) 1254 clone.name = self.name 1255 clone.nodeName = self.name 1256 operation = xml.dom.UserDataHandler.NODE_CLONED 1257 if deep: 1258 clone.entities._seq = [] 1259 clone.notations._seq = [] 1260 for n in self.notations._seq: 1261 notation = Notation(n.nodeName, n.publicId, n.systemId) 1262 clone.notations._seq.append(notation) 1263 n._call_user_data_handler(operation, n, notation) 1264 for e in self.entities._seq: 1265 entity = Entity(e.nodeName, e.publicId, e.systemId, 1266 e.notationName) 1267 entity.actualEncoding = e.actualEncoding 1268 entity.encoding = e.encoding 1269 entity.version = e.version 1270 clone.entities._seq.append(entity) 1271 e._call_user_data_handler(operation, n, entity) 1272 self._call_user_data_handler(operation, self, clone) 1273 return clone 1274 else: 1275 return None 1276 1277 def writexml(self, writer, indent="", addindent="", newl=""): 1278 writer.write("<!DOCTYPE ") 1279 writer.write(self.name) 1280 if self.publicId: 1281 writer.write("\n PUBLIC '%s'\n '%s'" 1282 % (self.publicId, self.systemId)) 1283 elif self.systemId: 1284 writer.write("\n SYSTEM '%s'" % self.systemId) 1285 if self.internalSubset is not None: 1286 writer.write(" [") 1287 writer.write(self.internalSubset) 1288 writer.write("]") 1289 writer.write(">\n") 1290 1291 class Entity(Identified, Node): 1292 attributes = None 1293 nodeType = Node.ENTITY_NODE 1294 nodeValue = None 1295 1296 actualEncoding = None 1297 encoding = None 1298 version = None 1299 1300 def __init__(self, name, publicId, systemId, notation): 1301 self.nodeName = name 1302 self.notationName = notation 1303 self.childNodes = NodeList() 1304 self._identified_mixin_init(publicId, systemId) 1305 1306 def _get_actualEncoding(self): 1307 return self.actualEncoding 1308 1309 def _get_encoding(self): 1310 return self.encoding 1311 1312 def _get_version(self): 1313 return self.version 1314 1315 def appendChild(self, newChild): 1316 raise xml.dom.HierarchyRequestErr( 1317 "cannot append children to an entity node") 1318 1319 def insertBefore(self, newChild, refChild): 1320 raise xml.dom.HierarchyRequestErr( 1321 "cannot insert children below an entity node") 1322 1323 def removeChild(self, oldChild): 1324 raise xml.dom.HierarchyRequestErr( 1325 "cannot remove children from an entity node") 1326 1327 def replaceChild(self, newChild, oldChild): 1328 raise xml.dom.HierarchyRequestErr( 1329 "cannot replace children of an entity node") 1330 1331 class Notation(Identified, Childless, Node): 1332 nodeType = Node.NOTATION_NODE 1333 nodeValue = None 1334 1335 def __init__(self, name, publicId, systemId): 1336 self.nodeName = name 1337 self._identified_mixin_init(publicId, systemId) 1338 1339 1340 class DOMImplementation(DOMImplementationLS): 1341 _features = [("core", "1.0"), 1342 ("core", "2.0"), 1343 ("core", "3.0"), 1344 ("core", None), 1345 ("xml", "1.0"), 1346 ("xml", "2.0"), 1347 ("xml", "3.0"), 1348 ("xml", None), 1349 ("ls-load", "3.0"), 1350 ("ls-load", None), 1351 ] 1352 1353 def hasFeature(self, feature, version): 1354 if version == "": 1355 version = None 1356 return (feature.lower(), version) in self._features 1357 1358 def createDocument(self, namespaceURI, qualifiedName, doctype): 1359 if doctype and doctype.parentNode is not None: 1360 raise xml.dom.WrongDocumentErr( 1361 "doctype object owned by another DOM tree") 1362 doc = self._create_document() 1363 1364 add_root_element = not (namespaceURI is None 1365 and qualifiedName is None 1366 and doctype is None) 1367 1368 if not qualifiedName and add_root_element: 1369 # The spec is unclear what to raise here; SyntaxErr 1370 # would be the other obvious candidate. Since Xerces raises 1371 # InvalidCharacterErr, and since SyntaxErr is not listed 1372 # for createDocument, that seems to be the better choice. 1373 # XXX: need to check for illegal characters here and in 1374 # createElement. 1375 1376 # DOM Level III clears this up when talking about the return value 1377 # of this function. If namespaceURI, qName and DocType are 1378 # Null the document is returned without a document element 1379 # Otherwise if doctype or namespaceURI are not None 1380 # Then we go back to the above problem 1381 raise xml.dom.InvalidCharacterErr("Element with no name") 1382 1383 if add_root_element: 1384 prefix, localname = _nssplit(qualifiedName) 1385 if prefix == "xml" \ 1386 and namespaceURI != "http://www.w3.org/XML/1998/namespace": 1387 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") 1388 if prefix and not namespaceURI: 1389 raise xml.dom.NamespaceErr( 1390 "illegal use of prefix without namespaces") 1391 element = doc.createElementNS(namespaceURI, qualifiedName) 1392 if doctype: 1393 doc.appendChild(doctype) 1394 doc.appendChild(element) 1395 1396 if doctype: 1397 doctype.parentNode = doctype.ownerDocument = doc 1398 1399 doc.doctype = doctype 1400 doc.implementation = self 1401 return doc 1402 1403 def createDocumentType(self, qualifiedName, publicId, systemId): 1404 doctype = DocumentType(qualifiedName) 1405 doctype.publicId = publicId 1406 doctype.systemId = systemId 1407 return doctype 1408 1409 # DOM Level 3 (WD 9 April 2002) 1410 1411 def getInterface(self, feature): 1412 if self.hasFeature(feature, None): 1413 return self 1414 else: 1415 return None 1416 1417 # internal 1418 def _create_document(self): 1419 return Document() 1420 1421 class ElementInfo(NewStyle): 1422 """Object that represents content-model information for an element. 1423 1424 This implementation is not expected to be used in practice; DOM 1425 builders should provide implementations which do the right thing 1426 using information available to it. 1427 1428 """ 1429 1430 __slots__ = 'tagName', 1431 1432 def __init__(self, name): 1433 self.tagName = name 1434 1435 def getAttributeType(self, aname): 1436 return _no_type 1437 1438 def getAttributeTypeNS(self, namespaceURI, localName): 1439 return _no_type 1440 1441 def isElementContent(self): 1442 return False 1443 1444 def isEmpty(self): 1445 """Returns true iff this element is declared to have an EMPTY 1446 content model.""" 1447 return False 1448 1449 def isId(self, aname): 1450 """Returns true iff the named attribte is a DTD-style ID.""" 1451 return False 1452 1453 def isIdNS(self, namespaceURI, localName): 1454 """Returns true iff the identified attribute is a DTD-style ID.""" 1455 return False 1456 1457 def __getstate__(self): 1458 return self.tagName 1459 1460 def __setstate__(self, state): 1461 self.tagName = state 1462 1463 def _clear_id_cache(node): 1464 if node.nodeType == Node.DOCUMENT_NODE: 1465 node._id_cache.clear() 1466 node._id_search_stack = None 1467 elif _in_document(node): 1468 node.ownerDocument._id_cache.clear() 1469 node.ownerDocument._id_search_stack= None 1470 1471 class Document(Node, DocumentLS): 1472 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, 1473 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) 1474 1475 nodeType = Node.DOCUMENT_NODE 1476 nodeName = "#document" 1477 nodeValue = None 1478 attributes = None 1479 doctype = None 1480 parentNode = None 1481 previousSibling = nextSibling = None 1482 1483 implementation = DOMImplementation() 1484 1485 # Document attributes from Level 3 (WD 9 April 2002) 1486 1487 actualEncoding = None 1488 encoding = None 1489 standalone = None 1490 version = None 1491 strictErrorChecking = False 1492 errorHandler = None 1493 documentURI = None 1494 1495 _magic_id_count = 0 1496 1497 def __init__(self): 1498 self.childNodes = NodeList() 1499 # mapping of (namespaceURI, localName) -> ElementInfo 1500 # and tagName -> ElementInfo 1501 self._elem_info = {} 1502 self._id_cache = {} 1503 self._id_search_stack = None 1504 1505 def _get_elem_info(self, element): 1506 if element.namespaceURI: 1507 key = element.namespaceURI, element.localName 1508 else: 1509 key = element.tagName 1510 return self._elem_info.get(key) 1511 1512 def _get_actualEncoding(self): 1513 return self.actualEncoding 1514 1515 def _get_doctype(self): 1516 return self.doctype 1517 1518 def _get_documentURI(self): 1519 return self.documentURI 1520 1521 def _get_encoding(self): 1522 return self.encoding 1523 1524 def _get_errorHandler(self): 1525 return self.errorHandler 1526 1527 def _get_standalone(self): 1528 return self.standalone 1529 1530 def _get_strictErrorChecking(self): 1531 return self.strictErrorChecking 1532 1533 def _get_version(self): 1534 return self.version 1535 1536 def appendChild(self, node): 1537 if node.nodeType not in self._child_node_types: 1538 raise xml.dom.HierarchyRequestErr( 1539 "%s cannot be child of %s" % (repr(node), repr(self))) 1540 if node.parentNode is not None: 1541 # This needs to be done before the next test since this 1542 # may *be* the document element, in which case it should 1543 # end up re-ordered to the end. 1544 node.parentNode.removeChild(node) 1545 1546 if node.nodeType == Node.ELEMENT_NODE \ 1547 and self._get_documentElement(): 1548 raise xml.dom.HierarchyRequestErr( 1549 "two document elements disallowed") 1550 return Node.appendChild(self, node) 1551 1552 def removeChild(self, oldChild): 1553 try: 1554 self.childNodes.remove(oldChild) 1555 except ValueError: 1556 raise xml.dom.NotFoundErr() 1557 oldChild.nextSibling = oldChild.previousSibling = None 1558 oldChild.parentNode = None 1559 if self.documentElement is oldChild: 1560 self.documentElement = None 1561 1562 return oldChild 1563 1564 def _get_documentElement(self): 1565 for node in self.childNodes: 1566 if node.nodeType == Node.ELEMENT_NODE: 1567 return node 1568 1569 def unlink(self): 1570 if self.doctype is not None: 1571 self.doctype.unlink() 1572 self.doctype = None 1573 Node.unlink(self) 1574 1575 def cloneNode(self, deep): 1576 if not deep: 1577 return None 1578 clone = self.implementation.createDocument(None, None, None) 1579 clone.encoding = self.encoding 1580 clone.standalone = self.standalone 1581 clone.version = self.version 1582 for n in self.childNodes: 1583 childclone = _clone_node(n, deep, clone) 1584 assert childclone.ownerDocument.isSameNode(clone) 1585 clone.childNodes.append(childclone) 1586 if childclone.nodeType == Node.DOCUMENT_NODE: 1587 assert clone.documentElement is None 1588 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: 1589 assert clone.doctype is None 1590 clone.doctype = childclone 1591 childclone.parentNode = clone 1592 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, 1593 self, clone) 1594 return clone 1595 1596 def createDocumentFragment(self): 1597 d = DocumentFragment() 1598 d.ownerDocument = self 1599 return d 1600 1601 def createElement(self, tagName): 1602 e = Element(tagName) 1603 e.ownerDocument = self 1604 return e 1605 1606 def createTextNode(self, data): 1607 if not isinstance(data, StringTypes): 1608 raise TypeError, "node contents must be a string" 1609 t = Text() 1610 t.data = data 1611 t.ownerDocument = self 1612 return t 1613 1614 def createCDATASection(self, data): 1615 if not isinstance(data, StringTypes): 1616 raise TypeError, "node contents must be a string" 1617 c = CDATASection() 1618 c.data = data 1619 c.ownerDocument = self 1620 return c 1621 1622 def createComment(self, data): 1623 c = Comment(data) 1624 c.ownerDocument = self 1625 return c 1626 1627 def createProcessingInstruction(self, target, data): 1628 p = ProcessingInstruction(target, data) 1629 p.ownerDocument = self 1630 return p 1631 1632 def createAttribute(self, qName): 1633 a = Attr(qName) 1634 a.ownerDocument = self 1635 a.value = "" 1636 return a 1637 1638 def createElementNS(self, namespaceURI, qualifiedName): 1639 prefix, localName = _nssplit(qualifiedName) 1640 e = Element(qualifiedName, namespaceURI, prefix) 1641 e.ownerDocument = self 1642 return e 1643 1644 def createAttributeNS(self, namespaceURI, qualifiedName): 1645 prefix, localName = _nssplit(qualifiedName) 1646 a = Attr(qualifiedName, namespaceURI, localName, prefix) 1647 a.ownerDocument = self 1648 a.value = "" 1649 return a 1650 1651 # A couple of implementation-specific helpers to create node types 1652 # not supported by the W3C DOM specs: 1653 1654 def _create_entity(self, name, publicId, systemId, notationName): 1655 e = Entity(name, publicId, systemId, notationName) 1656 e.ownerDocument = self 1657 return e 1658 1659 def _create_notation(self, name, publicId, systemId): 1660 n = Notation(name, publicId, systemId) 1661 n.ownerDocument = self 1662 return n 1663 1664 def getElementById(self, id): 1665 if self._id_cache.has_key(id): 1666 return self._id_cache[id] 1667 if not (self._elem_info or self._magic_id_count): 1668 return None 1669 1670 stack = self._id_search_stack 1671 if stack is None: 1672 # we never searched before, or the cache has been cleared 1673 stack = [self.documentElement] 1674 self._id_search_stack = stack 1675 elif not stack: 1676 # Previous search was completed and cache is still valid; 1677 # no matching node. 1678 return None 1679 1680 result = None 1681 while stack: 1682 node = stack.pop() 1683 # add child elements to stack for continued searching 1684 stack.extend([child for child in node.childNodes 1685 if child.nodeType in _nodeTypes_with_children]) 1686 # check this node 1687 info = self._get_elem_info(node) 1688 if info: 1689 # We have to process all ID attributes before 1690 # returning in order to get all the attributes set to 1691 # be IDs using Element.setIdAttribute*(). 1692 for attr in node.attributes.values(): 1693 if attr.namespaceURI: 1694 if info.isIdNS(attr.namespaceURI, attr.localName): 1695 self._id_cache[attr.value] = node 1696 if attr.value == id: 1697 result = node 1698 elif not node._magic_id_nodes: 1699 break 1700 elif info.isId(attr.name): 1701 self._id_cache[attr.value] = node 1702 if attr.value == id: 1703 result = node 1704 elif not node._magic_id_nodes: 1705 break 1706 elif attr._is_id: 1707 self._id_cache[attr.value] = node 1708 if attr.value == id: 1709 result = node 1710 elif node._magic_id_nodes == 1: 1711 break 1712 elif node._magic_id_nodes: 1713 for attr in node.attributes.values(): 1714 if attr._is_id: 1715 self._id_cache[attr.value] = node 1716 if attr.value == id: 1717 result = node 1718 if result is not None: 1719 break 1720 return result 1721 1722 def getElementsByTagName(self, name): 1723 return _get_elements_by_tagName_helper(self, name, NodeList()) 1724 1725 def getElementsByTagNameNS(self, namespaceURI, localName): 1726 return _get_elements_by_tagName_ns_helper( 1727 self, namespaceURI, localName, NodeList()) 1728 1729 def isSupported(self, feature, version): 1730 return self.implementation.hasFeature(feature, version) 1731 1732 def importNode(self, node, deep): 1733 if node.nodeType == Node.DOCUMENT_NODE: 1734 raise xml.dom.NotSupportedErr("cannot import document nodes") 1735 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1736 raise xml.dom.NotSupportedErr("cannot import document type nodes") 1737 return _clone_node(node, deep, self) 1738 1739 def writexml(self, writer, indent="", addindent="", newl="", 1740 encoding = None): 1741 if encoding is None: 1742 writer.write('<?xml version="1.0" ?>\n') 1743 else: 1744 writer.write('<?xml version="1.0" encoding="%s"?>\n' % encoding) 1745 for node in self.childNodes: 1746 node.writexml(writer, indent, addindent, newl) 1747 1748 # DOM Level 3 (WD 9 April 2002) 1749 1750 def renameNode(self, n, namespaceURI, name): 1751 if n.ownerDocument is not self: 1752 raise xml.dom.WrongDocumentErr( 1753 "cannot rename nodes from other documents;\n" 1754 "expected %s,\nfound %s" % (self, n.ownerDocument)) 1755 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): 1756 raise xml.dom.NotSupportedErr( 1757 "renameNode() only applies to element and attribute nodes") 1758 if namespaceURI != EMPTY_NAMESPACE: 1759 if ':' in name: 1760 prefix, localName = name.split(':', 1) 1761 if ( prefix == "xmlns" 1762 and namespaceURI != xml.dom.XMLNS_NAMESPACE): 1763 raise xml.dom.NamespaceErr( 1764 "illegal use of 'xmlns' prefix") 1765 else: 1766 if ( name == "xmlns" 1767 and namespaceURI != xml.dom.XMLNS_NAMESPACE 1768 and n.nodeType == Node.ATTRIBUTE_NODE): 1769 raise xml.dom.NamespaceErr( 1770 "illegal use of the 'xmlns' attribute") 1771 prefix = None 1772 localName = name 1773 else: 1774 prefix = None 1775 localName = None 1776 if n.nodeType == Node.ATTRIBUTE_NODE: 1777 element = n.ownerElement 1778 if element is not None: 1779 is_id = n._is_id 1780 element.removeAttributeNode(n) 1781 else: 1782 element = None 1783 # avoid __setattr__ 1784 d = n.__dict__ 1785 d['prefix'] = prefix 1786 d['localName'] = localName 1787 d['namespaceURI'] = namespaceURI 1788 d['nodeName'] = name 1789 if n.nodeType == Node.ELEMENT_NODE: 1790 d['tagName'] = name 1791 else: 1792 # attribute node 1793 d['name'] = name 1794 if element is not None: 1795 element.setAttributeNode(n) 1796 if is_id: 1797 element.setIdAttributeNode(n) 1798 # It's not clear from a semantic perspective whether we should 1799 # call the user data handlers for the NODE_RENAMED event since 1800 # we're re-using the existing node. The draft spec has been 1801 # interpreted as meaning "no, don't call the handler unless a 1802 # new node is created." 1803 return n 1804 1805 defproperty(Document, "documentElement", 1806 doc="Top-level element of this document.") 1807 1808 1809 def _clone_node(node, deep, newOwnerDocument): 1810 """ 1811 Clone a node and give it the new owner document. 1812 Called by Node.cloneNode and Document.importNode 1813 """ 1814 if node.ownerDocument.isSameNode(newOwnerDocument): 1815 operation = xml.dom.UserDataHandler.NODE_CLONED 1816 else: 1817 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1818 if node.nodeType == Node.ELEMENT_NODE: 1819 clone = newOwnerDocument.createElementNS(node.namespaceURI, 1820 node.nodeName) 1821 for attr in node.attributes.values(): 1822 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) 1823 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) 1824 a.specified = attr.specified 1825 1826 if deep: 1827 for child in node.childNodes: 1828 c = _clone_node(child, deep, newOwnerDocument) 1829 clone.appendChild(c) 1830 1831 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 1832 clone = newOwnerDocument.createDocumentFragment() 1833 if deep: 1834 for child in node.childNodes: 1835 c = _clone_node(child, deep, newOwnerDocument) 1836 clone.appendChild(c) 1837 1838 elif node.nodeType == Node.TEXT_NODE: 1839 clone = newOwnerDocument.createTextNode(node.data) 1840 elif node.nodeType == Node.CDATA_SECTION_NODE: 1841 clone = newOwnerDocument.createCDATASection(node.data) 1842 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 1843 clone = newOwnerDocument.createProcessingInstruction(node.target, 1844 node.data) 1845 elif node.nodeType == Node.COMMENT_NODE: 1846 clone = newOwnerDocument.createComment(node.data) 1847 elif node.nodeType == Node.ATTRIBUTE_NODE: 1848 clone = newOwnerDocument.createAttributeNS(node.namespaceURI, 1849 node.nodeName) 1850 clone.specified = True 1851 clone.value = node.value 1852 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1853 assert node.ownerDocument is not newOwnerDocument 1854 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1855 clone = newOwnerDocument.implementation.createDocumentType( 1856 node.name, node.publicId, node.systemId) 1857 clone.ownerDocument = newOwnerDocument 1858 if deep: 1859 clone.entities._seq = [] 1860 clone.notations._seq = [] 1861 for n in node.notations._seq: 1862 notation = Notation(n.nodeName, n.publicId, n.systemId) 1863 notation.ownerDocument = newOwnerDocument 1864 clone.notations._seq.append(notation) 1865 if hasattr(n, '_call_user_data_handler'): 1866 n._call_user_data_handler(operation, n, notation) 1867 for e in node.entities._seq: 1868 entity = Entity(e.nodeName, e.publicId, e.systemId, 1869 e.notationName) 1870 entity.actualEncoding = e.actualEncoding 1871 entity.encoding = e.encoding 1872 entity.version = e.version 1873 entity.ownerDocument = newOwnerDocument 1874 clone.entities._seq.append(entity) 1875 if hasattr(e, '_call_user_data_handler'): 1876 e._call_user_data_handler(operation, n, entity) 1877 else: 1878 # Note the cloning of Document and DocumentType nodes is 1879 # implemenetation specific. minidom handles those cases 1880 # directly in the cloneNode() methods. 1881 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) 1882 1883 # Check for _call_user_data_handler() since this could conceivably 1884 # used with other DOM implementations (one of the FourThought 1885 # DOMs, perhaps?). 1886 if hasattr(node, '_call_user_data_handler'): 1887 node._call_user_data_handler(operation, node, clone) 1888 return clone 1889 1890 1891 def _nssplit(qualifiedName): 1892 fields = qualifiedName.split(':', 1) 1893 if len(fields) == 2: 1894 return fields 1895 else: 1896 return (None, fields[0]) 1897 1898 1899 def _get_StringIO(): 1900 # we can't use cStringIO since it doesn't support Unicode strings 1901 from StringIO import StringIO 1902 return StringIO() 1903 1904 def _do_pulldom_parse(func, args, kwargs): 1905 events = func(*args, **kwargs) 1906 toktype, rootNode = events.getEvent() 1907 events.expandNode(rootNode) 1908 events.clear() 1909 return rootNode 1910 1911 def parse(file, parser=None, bufsize=None): 1912 """Parse a file into a DOM by filename or file object.""" 1913 if parser is None and not bufsize: 1914 from xml.dom import expatbuilder 1915 return expatbuilder.parse(file) 1916 else: 1917 from xml.dom import pulldom 1918 return _do_pulldom_parse(pulldom.parse, (file,), 1919 {'parser': parser, 'bufsize': bufsize}) 1920 1921 def parseString(string, parser=None): 1922 """Parse a file into a DOM from a string.""" 1923 if parser is None: 1924 from xml.dom import expatbuilder 1925 return expatbuilder.parseString(string) 1926 else: 1927 from xml.dom import pulldom 1928 return _do_pulldom_parse(pulldom.parseString, (string,), 1929 {'parser': parser}) 1930 1931 def getDOMImplementation(features=None): 1932 if features: 1933 if isinstance(features, StringTypes): 1934 features = domreg._parse_feature_string(features) 1935 for f, v in features: 1936 if not Document.implementation.hasFeature(f, v): 1937 return None 1938 return Document.implementation 1939
Generated by PyXR 0.9.4