0001 from test.test_support import verbose, TESTFN 0002 import random 0003 import os 0004 0005 # From SF bug #422121: Insecurities in dict comparison. 0006 0007 # Safety of code doing comparisons has been an historical Python weak spot. 0008 # The problem is that comparison of structures written in C *naturally* 0009 # wants to hold on to things like the size of the container, or "the 0010 # biggest" containee so far, across a traversal of the container; but 0011 # code to do containee comparisons can call back into Python and mutate 0012 # the container in arbitrary ways while the C loop is in midstream. If the 0013 # C code isn't extremely paranoid about digging things out of memory on 0014 # each trip, and artificially boosting refcounts for the duration, anything 0015 # from infinite loops to OS crashes can result (yes, I use Windows <wink>). 0016 # 0017 # The other problem is that code designed to provoke a weakness is usually 0018 # white-box code, and so catches only the particular vulnerabilities the 0019 # author knew to protect against. For example, Python's list.sort() code 0020 # went thru many iterations as one "new" vulnerability after another was 0021 # discovered. 0022 # 0023 # So the dict comparison test here uses a black-box approach instead, 0024 # generating dicts of various sizes at random, and performing random 0025 # mutations on them at random times. This proved very effective, 0026 # triggering at least six distinct failure modes the first 20 times I 0027 # ran it. Indeed, at the start, the driver never got beyond 6 iterations 0028 # before the test died. 0029 0030 # The dicts are global to make it easy to mutate tham from within functions. 0031 dict1 = {} 0032 dict2 = {} 0033 0034 # The current set of keys in dict1 and dict2. These are materialized as 0035 # lists to make it easy to pick a dict key at random. 0036 dict1keys = [] 0037 dict2keys = [] 0038 0039 # Global flag telling maybe_mutate() wether to *consider* mutating. 0040 mutate = 0 0041 0042 # If global mutate is true, consider mutating a dict. May or may not 0043 # mutate a dict even if mutate is true. If it does decide to mutate a 0044 # dict, it picks one of {dict1, dict2} at random, and deletes a random 0045 # entry from it; or, more rarely, adds a random element. 0046 0047 def maybe_mutate(): 0048 global mutate 0049 if not mutate: 0050 return 0051 if random.random() < 0.5: 0052 return 0053 0054 if random.random() < 0.5: 0055 target, keys = dict1, dict1keys 0056 else: 0057 target, keys = dict2, dict2keys 0058 0059 if random.random() < 0.2: 0060 # Insert a new key. 0061 mutate = 0 # disable mutation until key inserted 0062 while 1: 0063 newkey = Horrid(random.randrange(100)) 0064 if newkey not in target: 0065 break 0066 target[newkey] = Horrid(random.randrange(100)) 0067 keys.append(newkey) 0068 mutate = 1 0069 0070 elif keys: 0071 # Delete a key at random. 0072 i = random.randrange(len(keys)) 0073 key = keys[i] 0074 del target[key] 0075 # CAUTION: don't use keys.remove(key) here. Or do <wink>. The 0076 # point is that .remove() would trigger more comparisons, and so 0077 # also more calls to this routine. We're mutating often enough 0078 # without that. 0079 del keys[i] 0080 0081 # A horrid class that triggers random mutations of dict1 and dict2 when 0082 # instances are compared. 0083 0084 class Horrid: 0085 def __init__(self, i): 0086 # Comparison outcomes are determined by the value of i. 0087 self.i = i 0088 0089 # An artificial hashcode is selected at random so that we don't 0090 # have any systematic relationship between comparison outcomes 0091 # (based on self.i and other.i) and relative position within the 0092 # hash vector (based on hashcode). 0093 self.hashcode = random.randrange(1000000000) 0094 0095 def __hash__(self): 0096 return self.hashcode 0097 0098 def __cmp__(self, other): 0099 maybe_mutate() # The point of the test. 0100 return cmp(self.i, other.i) 0101 0102 def __repr__(self): 0103 return "Horrid(%d)" % self.i 0104 0105 # Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs, 0106 # where i and j are selected at random from the candidates list. 0107 # Return d.keys() after filling. 0108 0109 def fill_dict(d, candidates, numentries): 0110 d.clear() 0111 for i in xrange(numentries): 0112 d[Horrid(random.choice(candidates))] = \ 0113 Horrid(random.choice(candidates)) 0114 return d.keys() 0115 0116 # Test one pair of randomly generated dicts, each with n entries. 0117 # Note that dict comparison is trivial if they don't have the same number 0118 # of entires (then the "shorter" dict is instantly considered to be the 0119 # smaller one, without even looking at the entries). 0120 0121 def test_one(n): 0122 global mutate, dict1, dict2, dict1keys, dict2keys 0123 0124 # Fill the dicts without mutating them. 0125 mutate = 0 0126 dict1keys = fill_dict(dict1, range(n), n) 0127 dict2keys = fill_dict(dict2, range(n), n) 0128 0129 # Enable mutation, then compare the dicts so long as they have the 0130 # same size. 0131 mutate = 1 0132 if verbose: 0133 print "trying w/ lengths", len(dict1), len(dict2), 0134 while dict1 and len(dict1) == len(dict2): 0135 if verbose: 0136 print ".", 0137 c = cmp(dict1, dict2) 0138 if verbose: 0139 print 0140 0141 # Run test_one n times. At the start (before the bugs were fixed), 20 0142 # consecutive runs of this test each blew up on or before the sixth time 0143 # test_one was run. So n doesn't have to be large to get an interesting 0144 # test. 0145 # OTOH, calling with large n is also interesting, to ensure that the fixed 0146 # code doesn't hold on to refcounts *too* long (in which case memory would 0147 # leak). 0148 0149 def test(n): 0150 for i in xrange(n): 0151 test_one(random.randrange(1, 100)) 0152 0153 # See last comment block for clues about good values for n. 0154 test(100) 0155 0156 ########################################################################## 0157 # Another segfault bug, distilled by Michael Hudson from a c.l.py post. 0158 0159 class Child: 0160 def __init__(self, parent): 0161 self.__dict__['parent'] = parent 0162 def __getattr__(self, attr): 0163 self.parent.a = 1 0164 self.parent.b = 1 0165 self.parent.c = 1 0166 self.parent.d = 1 0167 self.parent.e = 1 0168 self.parent.f = 1 0169 self.parent.g = 1 0170 self.parent.h = 1 0171 self.parent.i = 1 0172 return getattr(self.parent, attr) 0173 0174 class Parent: 0175 def __init__(self): 0176 self.a = Child(self) 0177 0178 # Hard to say what this will print! May vary from time to time. But 0179 # we're specifically trying to test the tp_print slot here, and this is 0180 # the clearest way to do it. We print the result to a temp file so that 0181 # the expected-output file doesn't need to change. 0182 0183 f = open(TESTFN, "w") 0184 print >> f, Parent().__dict__ 0185 f.close() 0186 os.unlink(TESTFN) 0187 0188 ########################################################################## 0189 # And another core-dumper from Michael Hudson. 0190 0191 dict = {} 0192 0193 # Force dict to malloc its table. 0194 for i in range(1, 10): 0195 dict[i] = i 0196 0197 f = open(TESTFN, "w") 0198 0199 class Machiavelli: 0200 def __repr__(self): 0201 dict.clear() 0202 0203 # Michael sez: "doesn't crash without this. don't know why." 0204 # Tim sez: "luck of the draw; crashes with or without for me." 0205 print >> f 0206 0207 return `"machiavelli"` 0208 0209 def __hash__(self): 0210 return 0 0211 0212 dict[Machiavelli()] = Machiavelli() 0213 0214 print >> f, str(dict) 0215 f.close() 0216 os.unlink(TESTFN) 0217 del f, dict 0218 0219 0220 ########################################################################## 0221 # And another core-dumper from Michael Hudson. 0222 0223 dict = {} 0224 0225 # let's force dict to malloc its table 0226 for i in range(1, 10): 0227 dict[i] = i 0228 0229 class Machiavelli2: 0230 def __eq__(self, other): 0231 dict.clear() 0232 return 1 0233 0234 def __hash__(self): 0235 return 0 0236 0237 dict[Machiavelli2()] = Machiavelli2() 0238 0239 try: 0240 dict[Machiavelli2()] 0241 except KeyError: 0242 pass 0243 0244 del dict 0245 0246 ########################################################################## 0247 # And another core-dumper from Michael Hudson. 0248 0249 dict = {} 0250 0251 # let's force dict to malloc its table 0252 for i in range(1, 10): 0253 dict[i] = i 0254 0255 class Machiavelli3: 0256 def __init__(self, id): 0257 self.id = id 0258 0259 def __eq__(self, other): 0260 if self.id == other.id: 0261 dict.clear() 0262 return 1 0263 else: 0264 return 0 0265 0266 def __repr__(self): 0267 return "%s(%s)"%(self.__class__.__name__, self.id) 0268 0269 def __hash__(self): 0270 return 0 0271 0272 dict[Machiavelli3(1)] = Machiavelli3(0) 0273 dict[Machiavelli3(2)] = Machiavelli3(0) 0274 0275 f = open(TESTFN, "w") 0276 try: 0277 try: 0278 print >> f, dict[Machiavelli3(2)] 0279 except KeyError: 0280 pass 0281 finally: 0282 f.close() 0283 os.unlink(TESTFN) 0284 0285 del dict 0286
Generated by PyXR 0.9.4