0001 #! /usr/bin/env python 0002 # 0003 # Class for profiling python code. rev 1.0 6/2/94 0004 # 0005 # Based on prior profile module by Sjoerd Mullender... 0006 # which was hacked somewhat by: Guido van Rossum 0007 # 0008 # See profile.doc for more information 0009 0010 """Class for profiling Python code.""" 0011 0012 # Copyright 1994, by InfoSeek Corporation, all rights reserved. 0013 # Written by James Roskind 0014 # 0015 # Permission to use, copy, modify, and distribute this Python software 0016 # and its associated documentation for any purpose (subject to the 0017 # restriction in the following sentence) without fee is hereby granted, 0018 # provided that the above copyright notice appears in all copies, and 0019 # that both that copyright notice and this permission notice appear in 0020 # supporting documentation, and that the name of InfoSeek not be used in 0021 # advertising or publicity pertaining to distribution of the software 0022 # without specific, written prior permission. This permission is 0023 # explicitly restricted to the copying and modification of the software 0024 # to remain in Python, compiled Python, or other languages (such as C) 0025 # wherein the modified or derived code is exclusively imported into a 0026 # Python module. 0027 # 0028 # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS 0029 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 0030 # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY 0031 # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER 0032 # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF 0033 # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 0034 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 0035 0036 0037 0038 import sys 0039 import os 0040 import time 0041 import marshal 0042 from optparse import OptionParser 0043 0044 __all__ = ["run", "runctx", "help", "Profile"] 0045 0046 # Sample timer for use with 0047 #i_count = 0 0048 #def integer_timer(): 0049 # global i_count 0050 # i_count = i_count + 1 0051 # return i_count 0052 #itimes = integer_timer # replace with C coded timer returning integers 0053 0054 #************************************************************************** 0055 # The following are the static member functions for the profiler class 0056 # Note that an instance of Profile() is *not* needed to call them. 0057 #************************************************************************** 0058 0059 def run(statement, filename=None, sort=-1): 0060 """Run statement under profiler optionally saving results in filename 0061 0062 This function takes a single argument that can be passed to the 0063 "exec" statement, and an optional file name. In all cases this 0064 routine attempts to "exec" its first argument and gather profiling 0065 statistics from the execution. If no file name is present, then this 0066 function automatically prints a simple profiling report, sorted by the 0067 standard name string (file/line/function-name) that is presented in 0068 each line. 0069 """ 0070 prof = Profile() 0071 try: 0072 prof = prof.run(statement) 0073 except SystemExit: 0074 pass 0075 if filename is not None: 0076 prof.dump_stats(filename) 0077 else: 0078 return prof.print_stats(sort) 0079 0080 def runctx(statement, globals, locals, filename=None): 0081 """Run statement under profiler, supplying your own globals and locals, 0082 optionally saving results in filename. 0083 0084 statement and filename have the same semantics as profile.run 0085 """ 0086 prof = Profile() 0087 try: 0088 prof = prof.runctx(statement, globals, locals) 0089 except SystemExit: 0090 pass 0091 0092 if filename is not None: 0093 prof.dump_stats(filename) 0094 else: 0095 return prof.print_stats() 0096 0097 # print help 0098 def help(): 0099 for dirname in sys.path: 0100 fullname = os.path.join(dirname, 'profile.doc') 0101 if os.path.exists(fullname): 0102 sts = os.system('${PAGER-more} ' + fullname) 0103 if sts: print '*** Pager exit status:', sts 0104 break 0105 else: 0106 print 'Sorry, can\'t find the help file "profile.doc"', 0107 print 'along the Python search path.' 0108 0109 0110 if os.name == "mac": 0111 import MacOS 0112 def _get_time_mac(timer=MacOS.GetTicks): 0113 return timer() / 60.0 0114 0115 if hasattr(os, "times"): 0116 def _get_time_times(timer=os.times): 0117 t = timer() 0118 return t[0] + t[1] 0119 0120 0121 class Profile: 0122 """Profiler class. 0123 0124 self.cur is always a tuple. Each such tuple corresponds to a stack 0125 frame that is currently active (self.cur[-2]). The following are the 0126 definitions of its members. We use this external "parallel stack" to 0127 avoid contaminating the program that we are profiling. (old profiler 0128 used to write into the frames local dictionary!!) Derived classes 0129 can change the definition of some entries, as long as they leave 0130 [-2:] intact (frame and previous tuple). In case an internal error is 0131 detected, the -3 element is used as the function name. 0132 0133 [ 0] = Time that needs to be charged to the parent frame's function. 0134 It is used so that a function call will not have to access the 0135 timing data for the parent frame. 0136 [ 1] = Total time spent in this frame's function, excluding time in 0137 subfunctions (this latter is tallied in cur[2]). 0138 [ 2] = Total time spent in subfunctions, excluding time executing the 0139 frame's function (this latter is tallied in cur[1]). 0140 [-3] = Name of the function that corresponds to this frame. 0141 [-2] = Actual frame that we correspond to (used to sync exception handling). 0142 [-1] = Our parent 6-tuple (corresponds to frame.f_back). 0143 0144 Timing data for each function is stored as a 5-tuple in the dictionary 0145 self.timings[]. The index is always the name stored in self.cur[-3]. 0146 The following are the definitions of the members: 0147 0148 [0] = The number of times this function was called, not counting direct 0149 or indirect recursion, 0150 [1] = Number of times this function appears on the stack, minus one 0151 [2] = Total time spent internal to this function 0152 [3] = Cumulative time that this function was present on the stack. In 0153 non-recursive functions, this is the total execution time from start 0154 to finish of each invocation of a function, including time spent in 0155 all subfunctions. 0156 [4] = A dictionary indicating for each function name, the number of times 0157 it was called by us. 0158 """ 0159 0160 bias = 0 # calibration constant 0161 0162 def __init__(self, timer=None, bias=None): 0163 self.timings = {} 0164 self.cur = None 0165 self.cmd = "" 0166 self.c_func_name = "" 0167 0168 if bias is None: 0169 bias = self.bias 0170 self.bias = bias # Materialize in local dict for lookup speed. 0171 0172 if timer is None: 0173 if os.name == 'mac': 0174 self.timer = MacOS.GetTicks 0175 self.dispatcher = self.trace_dispatch_mac 0176 self.get_time = _get_time_mac 0177 elif hasattr(time, 'clock'): 0178 self.timer = self.get_time = time.clock 0179 self.dispatcher = self.trace_dispatch_i 0180 elif hasattr(os, 'times'): 0181 self.timer = os.times 0182 self.dispatcher = self.trace_dispatch 0183 self.get_time = _get_time_times 0184 else: 0185 self.timer = self.get_time = time.time 0186 self.dispatcher = self.trace_dispatch_i 0187 else: 0188 self.timer = timer 0189 t = self.timer() # test out timer function 0190 try: 0191 length = len(t) 0192 except TypeError: 0193 self.get_time = timer 0194 self.dispatcher = self.trace_dispatch_i 0195 else: 0196 if length == 2: 0197 self.dispatcher = self.trace_dispatch 0198 else: 0199 self.dispatcher = self.trace_dispatch_l 0200 # This get_time() implementation needs to be defined 0201 # here to capture the passed-in timer in the parameter 0202 # list (for performance). Note that we can't assume 0203 # the timer() result contains two values in all 0204 # cases. 0205 def get_time_timer(timer=timer, sum=sum): 0206 return sum(timer()) 0207 self.get_time = get_time_timer 0208 self.t = self.get_time() 0209 self.simulate_call('profiler') 0210 0211 # Heavily optimized dispatch routine for os.times() timer 0212 0213 def trace_dispatch(self, frame, event, arg): 0214 timer = self.timer 0215 t = timer() 0216 t = t[0] + t[1] - self.t - self.bias 0217 0218 if event == "c_call": 0219 self.c_func_name = arg.__name__ 0220 0221 if self.dispatch[event](self, frame,t): 0222 t = timer() 0223 self.t = t[0] + t[1] 0224 else: 0225 r = timer() 0226 self.t = r[0] + r[1] - t # put back unrecorded delta 0227 0228 # Dispatch routine for best timer program (return = scalar, fastest if 0229 # an integer but float works too -- and time.clock() relies on that). 0230 0231 def trace_dispatch_i(self, frame, event, arg): 0232 timer = self.timer 0233 t = timer() - self.t - self.bias 0234 0235 if event == "c_call": 0236 self.c_func_name = arg.__name__ 0237 0238 if self.dispatch[event](self, frame, t): 0239 self.t = timer() 0240 else: 0241 self.t = timer() - t # put back unrecorded delta 0242 0243 # Dispatch routine for macintosh (timer returns time in ticks of 0244 # 1/60th second) 0245 0246 def trace_dispatch_mac(self, frame, event, arg): 0247 timer = self.timer 0248 t = timer()/60.0 - self.t - self.bias 0249 0250 if event == "c_call": 0251 self.c_func_name = arg.__name__ 0252 0253 if self.dispatch[event](self, frame, t): 0254 self.t = timer()/60.0 0255 else: 0256 self.t = timer()/60.0 - t # put back unrecorded delta 0257 0258 # SLOW generic dispatch routine for timer returning lists of numbers 0259 0260 def trace_dispatch_l(self, frame, event, arg): 0261 get_time = self.get_time 0262 t = get_time() - self.t - self.bias 0263 0264 if event == "c_call": 0265 self.c_func_name = arg.__name__ 0266 0267 if self.dispatch[event](self, frame, t): 0268 self.t = get_time() 0269 else: 0270 self.t = get_time() - t # put back unrecorded delta 0271 0272 # In the event handlers, the first 3 elements of self.cur are unpacked 0273 # into vrbls w/ 3-letter names. The last two characters are meant to be 0274 # mnemonic: 0275 # _pt self.cur[0] "parent time" time to be charged to parent frame 0276 # _it self.cur[1] "internal time" time spent directly in the function 0277 # _et self.cur[2] "external time" time spent in subfunctions 0278 0279 def trace_dispatch_exception(self, frame, t): 0280 rpt, rit, ret, rfn, rframe, rcur = self.cur 0281 if (rframe is not frame) and rcur: 0282 return self.trace_dispatch_return(rframe, t) 0283 self.cur = rpt, rit+t, ret, rfn, rframe, rcur 0284 return 1 0285 0286 0287 def trace_dispatch_call(self, frame, t): 0288 if self.cur and frame.f_back is not self.cur[-2]: 0289 rpt, rit, ret, rfn, rframe, rcur = self.cur 0290 if not isinstance(rframe, Profile.fake_frame): 0291 assert rframe.f_back is frame.f_back, ("Bad call", rfn, 0292 rframe, rframe.f_back, 0293 frame, frame.f_back) 0294 self.trace_dispatch_return(rframe, 0) 0295 assert (self.cur is None or \ 0296 frame.f_back is self.cur[-2]), ("Bad call", 0297 self.cur[-3]) 0298 fcode = frame.f_code 0299 fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name) 0300 self.cur = (t, 0, 0, fn, frame, self.cur) 0301 timings = self.timings 0302 if fn in timings: 0303 cc, ns, tt, ct, callers = timings[fn] 0304 timings[fn] = cc, ns + 1, tt, ct, callers 0305 else: 0306 timings[fn] = 0, 0, 0, 0, {} 0307 return 1 0308 0309 def trace_dispatch_c_call (self, frame, t): 0310 fn = ("", 0, self.c_func_name) 0311 self.cur = (t, 0, 0, fn, frame, self.cur) 0312 timings = self.timings 0313 if timings.has_key(fn): 0314 cc, ns, tt, ct, callers = timings[fn] 0315 timings[fn] = cc, ns+1, tt, ct, callers 0316 else: 0317 timings[fn] = 0, 0, 0, 0, {} 0318 return 1 0319 0320 def trace_dispatch_return(self, frame, t): 0321 if frame is not self.cur[-2]: 0322 assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3]) 0323 self.trace_dispatch_return(self.cur[-2], 0) 0324 0325 # Prefix "r" means part of the Returning or exiting frame. 0326 # Prefix "p" means part of the Previous or Parent or older frame. 0327 0328 rpt, rit, ret, rfn, frame, rcur = self.cur 0329 rit = rit + t 0330 frame_total = rit + ret 0331 0332 ppt, pit, pet, pfn, pframe, pcur = rcur 0333 self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur 0334 0335 timings = self.timings 0336 cc, ns, tt, ct, callers = timings[rfn] 0337 if not ns: 0338 # This is the only occurrence of the function on the stack. 0339 # Else this is a (directly or indirectly) recursive call, and 0340 # its cumulative time will get updated when the topmost call to 0341 # it returns. 0342 ct = ct + frame_total 0343 cc = cc + 1 0344 0345 if pfn in callers: 0346 callers[pfn] = callers[pfn] + 1 # hack: gather more 0347 # stats such as the amount of time added to ct courtesy 0348 # of this specific call, and the contribution to cc 0349 # courtesy of this call. 0350 else: 0351 callers[pfn] = 1 0352 0353 timings[rfn] = cc, ns - 1, tt + rit, ct, callers 0354 0355 return 1 0356 0357 0358 dispatch = { 0359 "call": trace_dispatch_call, 0360 "exception": trace_dispatch_exception, 0361 "return": trace_dispatch_return, 0362 "c_call": trace_dispatch_c_call, 0363 "c_exception": trace_dispatch_exception, 0364 "c_return": trace_dispatch_return, 0365 } 0366 0367 0368 # The next few functions play with self.cmd. By carefully preloading 0369 # our parallel stack, we can force the profiled result to include 0370 # an arbitrary string as the name of the calling function. 0371 # We use self.cmd as that string, and the resulting stats look 0372 # very nice :-). 0373 0374 def set_cmd(self, cmd): 0375 if self.cur[-1]: return # already set 0376 self.cmd = cmd 0377 self.simulate_call(cmd) 0378 0379 class fake_code: 0380 def __init__(self, filename, line, name): 0381 self.co_filename = filename 0382 self.co_line = line 0383 self.co_name = name 0384 self.co_firstlineno = 0 0385 0386 def __repr__(self): 0387 return repr((self.co_filename, self.co_line, self.co_name)) 0388 0389 class fake_frame: 0390 def __init__(self, code, prior): 0391 self.f_code = code 0392 self.f_back = prior 0393 0394 def simulate_call(self, name): 0395 code = self.fake_code('profile', 0, name) 0396 if self.cur: 0397 pframe = self.cur[-2] 0398 else: 0399 pframe = None 0400 frame = self.fake_frame(code, pframe) 0401 self.dispatch['call'](self, frame, 0) 0402 0403 # collect stats from pending stack, including getting final 0404 # timings for self.cmd frame. 0405 0406 def simulate_cmd_complete(self): 0407 get_time = self.get_time 0408 t = get_time() - self.t 0409 while self.cur[-1]: 0410 # We *can* cause assertion errors here if 0411 # dispatch_trace_return checks for a frame match! 0412 self.dispatch['return'](self, self.cur[-2], t) 0413 t = 0 0414 self.t = get_time() - t 0415 0416 0417 def print_stats(self, sort=-1): 0418 import pstats 0419 pstats.Stats(self).strip_dirs().sort_stats(sort). \ 0420 print_stats() 0421 0422 def dump_stats(self, file): 0423 f = open(file, 'wb') 0424 self.create_stats() 0425 marshal.dump(self.stats, f) 0426 f.close() 0427 0428 def create_stats(self): 0429 self.simulate_cmd_complete() 0430 self.snapshot_stats() 0431 0432 def snapshot_stats(self): 0433 self.stats = {} 0434 for func, (cc, ns, tt, ct, callers) in self.timings.iteritems(): 0435 callers = callers.copy() 0436 nc = 0 0437 for callcnt in callers.itervalues(): 0438 nc += callcnt 0439 self.stats[func] = cc, nc, tt, ct, callers 0440 0441 0442 # The following two methods can be called by clients to use 0443 # a profiler to profile a statement, given as a string. 0444 0445 def run(self, cmd): 0446 import __main__ 0447 dict = __main__.__dict__ 0448 return self.runctx(cmd, dict, dict) 0449 0450 def runctx(self, cmd, globals, locals): 0451 self.set_cmd(cmd) 0452 sys.setprofile(self.dispatcher) 0453 try: 0454 exec cmd in globals, locals 0455 finally: 0456 sys.setprofile(None) 0457 return self 0458 0459 # This method is more useful to profile a single function call. 0460 def runcall(self, func, *args, **kw): 0461 self.set_cmd(repr(func)) 0462 sys.setprofile(self.dispatcher) 0463 try: 0464 return func(*args, **kw) 0465 finally: 0466 sys.setprofile(None) 0467 0468 0469 #****************************************************************** 0470 # The following calculates the overhead for using a profiler. The 0471 # problem is that it takes a fair amount of time for the profiler 0472 # to stop the stopwatch (from the time it receives an event). 0473 # Similarly, there is a delay from the time that the profiler 0474 # re-starts the stopwatch before the user's code really gets to 0475 # continue. The following code tries to measure the difference on 0476 # a per-event basis. 0477 # 0478 # Note that this difference is only significant if there are a lot of 0479 # events, and relatively little user code per event. For example, 0480 # code with small functions will typically benefit from having the 0481 # profiler calibrated for the current platform. This *could* be 0482 # done on the fly during init() time, but it is not worth the 0483 # effort. Also note that if too large a value specified, then 0484 # execution time on some functions will actually appear as a 0485 # negative number. It is *normal* for some functions (with very 0486 # low call counts) to have such negative stats, even if the 0487 # calibration figure is "correct." 0488 # 0489 # One alternative to profile-time calibration adjustments (i.e., 0490 # adding in the magic little delta during each event) is to track 0491 # more carefully the number of events (and cumulatively, the number 0492 # of events during sub functions) that are seen. If this were 0493 # done, then the arithmetic could be done after the fact (i.e., at 0494 # display time). Currently, we track only call/return events. 0495 # These values can be deduced by examining the callees and callers 0496 # vectors for each functions. Hence we *can* almost correct the 0497 # internal time figure at print time (note that we currently don't 0498 # track exception event processing counts). Unfortunately, there 0499 # is currently no similar information for cumulative sub-function 0500 # time. It would not be hard to "get all this info" at profiler 0501 # time. Specifically, we would have to extend the tuples to keep 0502 # counts of this in each frame, and then extend the defs of timing 0503 # tuples to include the significant two figures. I'm a bit fearful 0504 # that this additional feature will slow the heavily optimized 0505 # event/time ratio (i.e., the profiler would run slower, fur a very 0506 # low "value added" feature.) 0507 #************************************************************** 0508 0509 def calibrate(self, m, verbose=0): 0510 if self.__class__ is not Profile: 0511 raise TypeError("Subclasses must override .calibrate().") 0512 0513 saved_bias = self.bias 0514 self.bias = 0 0515 try: 0516 return self._calibrate_inner(m, verbose) 0517 finally: 0518 self.bias = saved_bias 0519 0520 def _calibrate_inner(self, m, verbose): 0521 get_time = self.get_time 0522 0523 # Set up a test case to be run with and without profiling. Include 0524 # lots of calls, because we're trying to quantify stopwatch overhead. 0525 # Do not raise any exceptions, though, because we want to know 0526 # exactly how many profile events are generated (one call event, + 0527 # one return event, per Python-level call). 0528 0529 def f1(n): 0530 for i in range(n): 0531 x = 1 0532 0533 def f(m, f1=f1): 0534 for i in range(m): 0535 f1(100) 0536 0537 f(m) # warm up the cache 0538 0539 # elapsed_noprofile <- time f(m) takes without profiling. 0540 t0 = get_time() 0541 f(m) 0542 t1 = get_time() 0543 elapsed_noprofile = t1 - t0 0544 if verbose: 0545 print "elapsed time without profiling =", elapsed_noprofile 0546 0547 # elapsed_profile <- time f(m) takes with profiling. The difference 0548 # is profiling overhead, only some of which the profiler subtracts 0549 # out on its own. 0550 p = Profile() 0551 t0 = get_time() 0552 p.runctx('f(m)', globals(), locals()) 0553 t1 = get_time() 0554 elapsed_profile = t1 - t0 0555 if verbose: 0556 print "elapsed time with profiling =", elapsed_profile 0557 0558 # reported_time <- "CPU seconds" the profiler charged to f and f1. 0559 total_calls = 0.0 0560 reported_time = 0.0 0561 for (filename, line, funcname), (cc, ns, tt, ct, callers) in \ 0562 p.timings.items(): 0563 if funcname in ("f", "f1"): 0564 total_calls += cc 0565 reported_time += tt 0566 0567 if verbose: 0568 print "'CPU seconds' profiler reported =", reported_time 0569 print "total # calls =", total_calls 0570 if total_calls != m + 1: 0571 raise ValueError("internal error: total calls = %d" % total_calls) 0572 0573 # reported_time - elapsed_noprofile = overhead the profiler wasn't 0574 # able to measure. Divide by twice the number of calls (since there 0575 # are two profiler events per call in this test) to get the hidden 0576 # overhead per event. 0577 mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls 0578 if verbose: 0579 print "mean stopwatch overhead per profile event =", mean 0580 return mean 0581 0582 #**************************************************************************** 0583 def Stats(*args): 0584 print 'Report generating functions are in the "pstats" module\a' 0585 0586 0587 # When invoked as main program, invoke the profiler on a script 0588 if __name__ == '__main__': 0589 usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..." 0590 if not sys.argv[1:]: 0591 print "Usage: ", usage 0592 sys.exit(2) 0593 0594 class ProfileParser(OptionParser): 0595 def __init__(self, usage): 0596 OptionParser.__init__(self) 0597 self.usage = usage 0598 0599 parser = ProfileParser(usage) 0600 parser.allow_interspersed_args = False 0601 parser.add_option('-o', '--outfile', dest="outfile", 0602 help="Save stats to <outfile>", default=None) 0603 parser.add_option('-s', '--sort', dest="sort", 0604 help="Sort order when printing to stdout, based on pstats.Stats class", default=-1) 0605 0606 (options, args) = parser.parse_args() 0607 sys.argv[:] = args 0608 0609 if (len(sys.argv) > 0): 0610 sys.path.insert(0, os.path.dirname(sys.argv[0])) 0611 run('execfile(%r)' % (sys.argv[0],), options.outfile, options.sort) 0612 else: 0613 print "Usage: ", usage 0614
Generated by PyXR 0.9.4