0001 # Very simple test - Parse a file and print what happens 0002 0003 # XXX TypeErrors on calling handlers, or on bad return values from a 0004 # handler, are obscure and unhelpful. 0005 0006 import pyexpat 0007 from xml.parsers import expat 0008 0009 from test.test_support import sortdict, TestFailed 0010 0011 class Outputter: 0012 def StartElementHandler(self, name, attrs): 0013 print 'Start element:\n\t', repr(name), sortdict(attrs) 0014 0015 def EndElementHandler(self, name): 0016 print 'End element:\n\t', repr(name) 0017 0018 def CharacterDataHandler(self, data): 0019 data = data.strip() 0020 if data: 0021 print 'Character data:' 0022 print '\t', repr(data) 0023 0024 def ProcessingInstructionHandler(self, target, data): 0025 print 'PI:\n\t', repr(target), repr(data) 0026 0027 def StartNamespaceDeclHandler(self, prefix, uri): 0028 print 'NS decl:\n\t', repr(prefix), repr(uri) 0029 0030 def EndNamespaceDeclHandler(self, prefix): 0031 print 'End of NS decl:\n\t', repr(prefix) 0032 0033 def StartCdataSectionHandler(self): 0034 print 'Start of CDATA section' 0035 0036 def EndCdataSectionHandler(self): 0037 print 'End of CDATA section' 0038 0039 def CommentHandler(self, text): 0040 print 'Comment:\n\t', repr(text) 0041 0042 def NotationDeclHandler(self, *args): 0043 name, base, sysid, pubid = args 0044 print 'Notation declared:', args 0045 0046 def UnparsedEntityDeclHandler(self, *args): 0047 entityName, base, systemId, publicId, notationName = args 0048 print 'Unparsed entity decl:\n\t', args 0049 0050 def NotStandaloneHandler(self, userData): 0051 print 'Not standalone' 0052 return 1 0053 0054 def ExternalEntityRefHandler(self, *args): 0055 context, base, sysId, pubId = args 0056 print 'External entity ref:', args[1:] 0057 return 1 0058 0059 def DefaultHandler(self, userData): 0060 pass 0061 0062 def DefaultHandlerExpand(self, userData): 0063 pass 0064 0065 0066 def confirm(ok): 0067 if ok: 0068 print "OK." 0069 else: 0070 print "Not OK." 0071 0072 out = Outputter() 0073 parser = expat.ParserCreate(namespace_separator='!') 0074 0075 # Test getting/setting returns_unicode 0076 parser.returns_unicode = 0; confirm(parser.returns_unicode == 0) 0077 parser.returns_unicode = 1; confirm(parser.returns_unicode == 1) 0078 parser.returns_unicode = 2; confirm(parser.returns_unicode == 1) 0079 parser.returns_unicode = 0; confirm(parser.returns_unicode == 0) 0080 0081 # Test getting/setting ordered_attributes 0082 parser.ordered_attributes = 0; confirm(parser.ordered_attributes == 0) 0083 parser.ordered_attributes = 1; confirm(parser.ordered_attributes == 1) 0084 parser.ordered_attributes = 2; confirm(parser.ordered_attributes == 1) 0085 parser.ordered_attributes = 0; confirm(parser.ordered_attributes == 0) 0086 0087 # Test getting/setting specified_attributes 0088 parser.specified_attributes = 0; confirm(parser.specified_attributes == 0) 0089 parser.specified_attributes = 1; confirm(parser.specified_attributes == 1) 0090 parser.specified_attributes = 2; confirm(parser.specified_attributes == 1) 0091 parser.specified_attributes = 0; confirm(parser.specified_attributes == 0) 0092 0093 HANDLER_NAMES = [ 0094 'StartElementHandler', 'EndElementHandler', 0095 'CharacterDataHandler', 'ProcessingInstructionHandler', 0096 'UnparsedEntityDeclHandler', 'NotationDeclHandler', 0097 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', 0098 'CommentHandler', 'StartCdataSectionHandler', 0099 'EndCdataSectionHandler', 0100 'DefaultHandler', 'DefaultHandlerExpand', 0101 #'NotStandaloneHandler', 0102 'ExternalEntityRefHandler' 0103 ] 0104 for name in HANDLER_NAMES: 0105 setattr(parser, name, getattr(out, name)) 0106 0107 data = '''\ 0108 <?xml version="1.0" encoding="iso-8859-1" standalone="no"?> 0109 <?xml-stylesheet href="stylesheet.css"?> 0110 <!-- comment data --> 0111 <!DOCTYPE quotations SYSTEM "quotations.dtd" [ 0112 <!ELEMENT root ANY> 0113 <!NOTATION notation SYSTEM "notation.jpeg"> 0114 <!ENTITY acirc "â"> 0115 <!ENTITY external_entity SYSTEM "entity.file"> 0116 <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> 0117 %unparsed_entity; 0118 ]> 0119 0120 <root attr1="value1" attr2="value2ὀ"> 0121 <myns:subelement xmlns:myns="http://www.python.org/namespace"> 0122 Contents of subelements 0123 </myns:subelement> 0124 <sub2><![CDATA[contents of CDATA section]]></sub2> 0125 &external_entity; 0126 </root> 0127 ''' 0128 0129 # Produce UTF-8 output 0130 parser.returns_unicode = 0 0131 try: 0132 parser.Parse(data, 1) 0133 except expat.error: 0134 print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) 0135 print '** Line', parser.ErrorLineNumber 0136 print '** Column', parser.ErrorColumnNumber 0137 print '** Byte', parser.ErrorByteIndex 0138 0139 # Try the parse again, this time producing Unicode output 0140 parser = expat.ParserCreate(namespace_separator='!') 0141 parser.returns_unicode = 1 0142 0143 for name in HANDLER_NAMES: 0144 setattr(parser, name, getattr(out, name)) 0145 try: 0146 parser.Parse(data, 1) 0147 except expat.error: 0148 print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) 0149 print '** Line', parser.ErrorLineNumber 0150 print '** Column', parser.ErrorColumnNumber 0151 print '** Byte', parser.ErrorByteIndex 0152 0153 # Try parsing a file 0154 parser = expat.ParserCreate(namespace_separator='!') 0155 parser.returns_unicode = 1 0156 0157 for name in HANDLER_NAMES: 0158 setattr(parser, name, getattr(out, name)) 0159 import StringIO 0160 file = StringIO.StringIO(data) 0161 try: 0162 parser.ParseFile(file) 0163 except expat.error: 0164 print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) 0165 print '** Line', parser.ErrorLineNumber 0166 print '** Column', parser.ErrorColumnNumber 0167 print '** Byte', parser.ErrorByteIndex 0168 0169 0170 # Tests that make sure we get errors when the namespace_separator value 0171 # is illegal, and that we don't for good values: 0172 print 0173 print "Testing constructor for proper handling of namespace_separator values:" 0174 expat.ParserCreate() 0175 expat.ParserCreate(namespace_separator=None) 0176 expat.ParserCreate(namespace_separator=' ') 0177 print "Legal values tested o.k." 0178 try: 0179 expat.ParserCreate(namespace_separator=42) 0180 except TypeError, e: 0181 print "Caught expected TypeError:" 0182 print e 0183 else: 0184 print "Failed to catch expected TypeError." 0185 0186 try: 0187 expat.ParserCreate(namespace_separator='too long') 0188 except ValueError, e: 0189 print "Caught expected ValueError:" 0190 print e 0191 else: 0192 print "Failed to catch expected ValueError." 0193 0194 # ParserCreate() needs to accept a namespace_separator of zero length 0195 # to satisfy the requirements of RDF applications that are required 0196 # to simply glue together the namespace URI and the localname. Though 0197 # considered a wart of the RDF specifications, it needs to be supported. 0198 # 0199 # See XML-SIG mailing list thread starting with 0200 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html 0201 # 0202 expat.ParserCreate(namespace_separator='') # too short 0203 0204 # Test the interning machinery. 0205 p = expat.ParserCreate() 0206 L = [] 0207 def collector(name, *args): 0208 L.append(name) 0209 p.StartElementHandler = collector 0210 p.EndElementHandler = collector 0211 p.Parse("<e> <e/> <e></e> </e>", 1) 0212 tag = L[0] 0213 if len(L) != 6: 0214 print "L should only contain 6 entries; found", len(L) 0215 for entry in L: 0216 if tag is not entry: 0217 print "expected L to contain many references to the same string", 0218 print "(it didn't)" 0219 print "L =", repr(L) 0220 break 0221 0222 # Tests of the buffer_text attribute. 0223 import sys 0224 0225 class TextCollector: 0226 def __init__(self, parser): 0227 self.stuff = [] 0228 0229 def check(self, expected, label): 0230 require(self.stuff == expected, 0231 "%s\nstuff = %r\nexpected = %r" 0232 % (label, self.stuff, map(unicode, expected))) 0233 0234 def CharacterDataHandler(self, text): 0235 self.stuff.append(text) 0236 0237 def StartElementHandler(self, name, attrs): 0238 self.stuff.append("<%s>" % name) 0239 bt = attrs.get("buffer-text") 0240 if bt == "yes": 0241 parser.buffer_text = 1 0242 elif bt == "no": 0243 parser.buffer_text = 0 0244 0245 def EndElementHandler(self, name): 0246 self.stuff.append("</%s>" % name) 0247 0248 def CommentHandler(self, data): 0249 self.stuff.append("<!--%s-->" % data) 0250 0251 def require(cond, label): 0252 # similar to confirm(), but no extraneous output 0253 if not cond: 0254 raise TestFailed(label) 0255 0256 def setup(handlers=[]): 0257 parser = expat.ParserCreate() 0258 require(not parser.buffer_text, 0259 "buffer_text not disabled by default") 0260 parser.buffer_text = 1 0261 handler = TextCollector(parser) 0262 parser.CharacterDataHandler = handler.CharacterDataHandler 0263 for name in handlers: 0264 setattr(parser, name, getattr(handler, name)) 0265 return parser, handler 0266 0267 parser, handler = setup() 0268 require(parser.buffer_text, 0269 "text buffering either not acknowledged or not enabled") 0270 parser.Parse("<a>1<b/>2<c/>3</a>", 1) 0271 handler.check(["123"], 0272 "buffered text not properly collapsed") 0273 0274 # XXX This test exposes more detail of Expat's text chunking than we 0275 # XXX like, but it tests what we need to concisely. 0276 parser, handler = setup(["StartElementHandler"]) 0277 parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1) 0278 handler.check(["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"], 0279 "buffering control not reacting as expected") 0280 0281 parser, handler = setup() 0282 parser.Parse("<a>1<b/><2><c/> \n 3</a>", 1) 0283 handler.check(["1<2> \n 3"], 0284 "buffered text not properly collapsed") 0285 0286 parser, handler = setup(["StartElementHandler"]) 0287 parser.Parse("<a>1<b/>2<c/>3</a>", 1) 0288 handler.check(["<a>", "1", "<b>", "2", "<c>", "3"], 0289 "buffered text not properly split") 0290 0291 parser, handler = setup(["StartElementHandler", "EndElementHandler"]) 0292 parser.CharacterDataHandler = None 0293 parser.Parse("<a>1<b/>2<c/>3</a>", 1) 0294 handler.check(["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"], 0295 "huh?") 0296 0297 parser, handler = setup(["StartElementHandler", "EndElementHandler"]) 0298 parser.Parse("<a>1<b></b>2<c/>3</a>", 1) 0299 handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"], 0300 "huh?") 0301 0302 parser, handler = setup(["CommentHandler", "EndElementHandler", 0303 "StartElementHandler"]) 0304 parser.Parse("<a>1<b/>2<c></c>345</a> ", 1) 0305 handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"], 0306 "buffered text not properly split") 0307 0308 parser, handler = setup(["CommentHandler", "EndElementHandler", 0309 "StartElementHandler"]) 0310 parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1) 0311 handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", 0312 "<!--abc-->", "4", "<!--def-->", "5", "</a>"], 0313 "buffered text not properly split") 0314 0315 # Test handling of exception from callback: 0316 def StartElementHandler(name, attrs): 0317 raise RuntimeError(name) 0318 0319 parser = expat.ParserCreate() 0320 parser.StartElementHandler = StartElementHandler 0321 0322 try: 0323 parser.Parse("<a><b><c/></b></a>", 1) 0324 except RuntimeError, e: 0325 if e.args[0] != "a": 0326 print "Expected RuntimeError for element 'a'; found %r" % e.args[0] 0327 else: 0328 print "Expected RuntimeError for 'a'" 0329 0330 # Test Current* members: 0331 class PositionTest: 0332 0333 def __init__(self, expected_list, parser): 0334 self.parser = parser 0335 self.parser.StartElementHandler = self.StartElementHandler 0336 self.parser.EndElementHandler = self.EndElementHandler 0337 self.expected_list = expected_list 0338 self.upto = 0 0339 0340 def StartElementHandler(self, name, attrs): 0341 self.check_pos('s') 0342 0343 def EndElementHandler(self, name): 0344 self.check_pos('e') 0345 0346 def check_pos(self, event): 0347 pos = (event, 0348 self.parser.CurrentByteIndex, 0349 self.parser.CurrentLineNumber, 0350 self.parser.CurrentColumnNumber) 0351 require(self.upto < len(self.expected_list), 0352 'too many parser events') 0353 expected = self.expected_list[self.upto] 0354 require(pos == expected, 0355 'expected position %s, got %s' % (expected, pos)) 0356 self.upto += 1 0357 0358 0359 parser = expat.ParserCreate() 0360 handler = PositionTest([('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), 0361 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)], 0362 parser) 0363 parser.Parse('''<a> 0364 <b> 0365 <c/> 0366 </b> 0367 </a>''', 1) 0368
Generated by PyXR 0.9.4