0001 """Utilities for comparing files and directories. 0002 0003 Classes: 0004 dircmp 0005 0006 Functions: 0007 cmp(f1, f2, shallow=1) -> int 0008 cmpfiles(a, b, common) -> ([], [], []) 0009 0010 """ 0011 0012 import os 0013 import stat 0014 import warnings 0015 from itertools import ifilter, ifilterfalse, imap, izip 0016 0017 __all__ = ["cmp","dircmp","cmpfiles"] 0018 0019 _cache = {} 0020 BUFSIZE=8*1024 0021 0022 def cmp(f1, f2, shallow=1, use_statcache=None): 0023 """Compare two files. 0024 0025 Arguments: 0026 0027 f1 -- First file name 0028 0029 f2 -- Second file name 0030 0031 shallow -- Just check stat signature (do not read the files). 0032 defaults to 1. 0033 0034 use_statcache -- obsolete argument. 0035 0036 Return value: 0037 0038 True if the files are the same, False otherwise. 0039 0040 This function uses a cache for past comparisons and the results, 0041 with a cache invalidation mechanism relying on stale signatures. 0042 0043 """ 0044 if use_statcache is not None: 0045 warnings.warn("use_statcache argument is deprecated", 0046 DeprecationWarning) 0047 0048 s1 = _sig(os.stat(f1)) 0049 s2 = _sig(os.stat(f2)) 0050 if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG: 0051 return False 0052 if shallow and s1 == s2: 0053 return True 0054 if s1[1] != s2[1]: 0055 return False 0056 0057 result = _cache.get((f1, f2)) 0058 if result and (s1, s2) == result[:2]: 0059 return result[2] 0060 outcome = _do_cmp(f1, f2) 0061 _cache[f1, f2] = s1, s2, outcome 0062 return outcome 0063 0064 def _sig(st): 0065 return (stat.S_IFMT(st.st_mode), 0066 st.st_size, 0067 st.st_mtime) 0068 0069 def _do_cmp(f1, f2): 0070 bufsize = BUFSIZE 0071 fp1 = open(f1, 'rb') 0072 fp2 = open(f2, 'rb') 0073 while True: 0074 b1 = fp1.read(bufsize) 0075 b2 = fp2.read(bufsize) 0076 if b1 != b2: 0077 return False 0078 if not b1: 0079 return True 0080 0081 # Directory comparison class. 0082 # 0083 class dircmp: 0084 """A class that manages the comparison of 2 directories. 0085 0086 dircmp(a,b,ignore=None,hide=None) 0087 A and B are directories. 0088 IGNORE is a list of names to ignore, 0089 defaults to ['RCS', 'CVS', 'tags']. 0090 HIDE is a list of names to hide, 0091 defaults to [os.curdir, os.pardir]. 0092 0093 High level usage: 0094 x = dircmp(dir1, dir2) 0095 x.report() -> prints a report on the differences between dir1 and dir2 0096 or 0097 x.report_partial_closure() -> prints report on differences between dir1 0098 and dir2, and reports on common immediate subdirectories. 0099 x.report_full_closure() -> like report_partial_closure, 0100 but fully recursive. 0101 0102 Attributes: 0103 left_list, right_list: The files in dir1 and dir2, 0104 filtered by hide and ignore. 0105 common: a list of names in both dir1 and dir2. 0106 left_only, right_only: names only in dir1, dir2. 0107 common_dirs: subdirectories in both dir1 and dir2. 0108 common_files: files in both dir1 and dir2. 0109 common_funny: names in both dir1 and dir2 where the type differs between 0110 dir1 and dir2, or the name is not stat-able. 0111 same_files: list of identical files. 0112 diff_files: list of filenames which differ. 0113 funny_files: list of files which could not be compared. 0114 subdirs: a dictionary of dircmp objects, keyed by names in common_dirs. 0115 """ 0116 0117 def __init__(self, a, b, ignore=None, hide=None): # Initialize 0118 self.left = a 0119 self.right = b 0120 if hide is None: 0121 self.hide = [os.curdir, os.pardir] # Names never to be shown 0122 else: 0123 self.hide = hide 0124 if ignore is None: 0125 self.ignore = ['RCS', 'CVS', 'tags'] # Names ignored in comparison 0126 else: 0127 self.ignore = ignore 0128 0129 def phase0(self): # Compare everything except common subdirectories 0130 self.left_list = _filter(os.listdir(self.left), 0131 self.hide+self.ignore) 0132 self.right_list = _filter(os.listdir(self.right), 0133 self.hide+self.ignore) 0134 self.left_list.sort() 0135 self.right_list.sort() 0136 0137 def phase1(self): # Compute common names 0138 a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list)) 0139 b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list)) 0140 self.common = map(a.__getitem__, ifilter(b.has_key, a)) 0141 self.left_only = map(a.__getitem__, ifilterfalse(b.has_key, a)) 0142 self.right_only = map(b.__getitem__, ifilterfalse(a.has_key, b)) 0143 0144 def phase2(self): # Distinguish files, directories, funnies 0145 self.common_dirs = [] 0146 self.common_files = [] 0147 self.common_funny = [] 0148 0149 for x in self.common: 0150 a_path = os.path.join(self.left, x) 0151 b_path = os.path.join(self.right, x) 0152 0153 ok = 1 0154 try: 0155 a_stat = os.stat(a_path) 0156 except os.error, why: 0157 # print 'Can\'t stat', a_path, ':', why[1] 0158 ok = 0 0159 try: 0160 b_stat = os.stat(b_path) 0161 except os.error, why: 0162 # print 'Can\'t stat', b_path, ':', why[1] 0163 ok = 0 0164 0165 if ok: 0166 a_type = stat.S_IFMT(a_stat.st_mode) 0167 b_type = stat.S_IFMT(b_stat.st_mode) 0168 if a_type != b_type: 0169 self.common_funny.append(x) 0170 elif stat.S_ISDIR(a_type): 0171 self.common_dirs.append(x) 0172 elif stat.S_ISREG(a_type): 0173 self.common_files.append(x) 0174 else: 0175 self.common_funny.append(x) 0176 else: 0177 self.common_funny.append(x) 0178 0179 def phase3(self): # Find out differences between common files 0180 xx = cmpfiles(self.left, self.right, self.common_files) 0181 self.same_files, self.diff_files, self.funny_files = xx 0182 0183 def phase4(self): # Find out differences between common subdirectories 0184 # A new dircmp object is created for each common subdirectory, 0185 # these are stored in a dictionary indexed by filename. 0186 # The hide and ignore properties are inherited from the parent 0187 self.subdirs = {} 0188 for x in self.common_dirs: 0189 a_x = os.path.join(self.left, x) 0190 b_x = os.path.join(self.right, x) 0191 self.subdirs[x] = dircmp(a_x, b_x, self.ignore, self.hide) 0192 0193 def phase4_closure(self): # Recursively call phase4() on subdirectories 0194 self.phase4() 0195 for sd in self.subdirs.itervalues(): 0196 sd.phase4_closure() 0197 0198 def report(self): # Print a report on the differences between a and b 0199 # Output format is purposely lousy 0200 print 'diff', self.left, self.right 0201 if self.left_only: 0202 self.left_only.sort() 0203 print 'Only in', self.left, ':', self.left_only 0204 if self.right_only: 0205 self.right_only.sort() 0206 print 'Only in', self.right, ':', self.right_only 0207 if self.same_files: 0208 self.same_files.sort() 0209 print 'Identical files :', self.same_files 0210 if self.diff_files: 0211 self.diff_files.sort() 0212 print 'Differing files :', self.diff_files 0213 if self.funny_files: 0214 self.funny_files.sort() 0215 print 'Trouble with common files :', self.funny_files 0216 if self.common_dirs: 0217 self.common_dirs.sort() 0218 print 'Common subdirectories :', self.common_dirs 0219 if self.common_funny: 0220 self.common_funny.sort() 0221 print 'Common funny cases :', self.common_funny 0222 0223 def report_partial_closure(self): # Print reports on self and on subdirs 0224 self.report() 0225 for sd in self.subdirs.itervalues(): 0226 print 0227 sd.report() 0228 0229 def report_full_closure(self): # Report on self and subdirs recursively 0230 self.report() 0231 for sd in self.subdirs.itervalues(): 0232 print 0233 sd.report_full_closure() 0234 0235 methodmap = dict(subdirs=phase4, 0236 same_files=phase3, diff_files=phase3, funny_files=phase3, 0237 common_dirs = phase2, common_files=phase2, common_funny=phase2, 0238 common=phase1, left_only=phase1, right_only=phase1, 0239 left_list=phase0, right_list=phase0) 0240 0241 def __getattr__(self, attr): 0242 if attr not in self.methodmap: 0243 raise AttributeError, attr 0244 self.methodmap[attr](self) 0245 return getattr(self, attr) 0246 0247 def cmpfiles(a, b, common, shallow=1, use_statcache=None): 0248 """Compare common files in two directories. 0249 0250 a, b -- directory names 0251 common -- list of file names found in both directories 0252 shallow -- if true, do comparison based solely on stat() information 0253 use_statcache -- obsolete argument 0254 0255 Returns a tuple of three lists: 0256 files that compare equal 0257 files that are different 0258 filenames that aren't regular files. 0259 0260 """ 0261 if use_statcache is not None: 0262 warnings.warn("use_statcache argument is deprecated", 0263 DeprecationWarning) 0264 res = ([], [], []) 0265 for x in common: 0266 ax = os.path.join(a, x) 0267 bx = os.path.join(b, x) 0268 res[_cmp(ax, bx, shallow)].append(x) 0269 return res 0270 0271 0272 # Compare two files. 0273 # Return: 0274 # 0 for equal 0275 # 1 for different 0276 # 2 for funny cases (can't stat, etc.) 0277 # 0278 def _cmp(a, b, sh, abs=abs, cmp=cmp): 0279 try: 0280 return not abs(cmp(a, b, sh)) 0281 except os.error: 0282 return 2 0283 0284 0285 # Return a copy with items that occur in skip removed. 0286 # 0287 def _filter(flist, skip): 0288 return list(ifilterfalse(skip.__contains__, flist)) 0289 0290 0291 # Demonstration and testing. 0292 # 0293 def demo(): 0294 import sys 0295 import getopt 0296 options, args = getopt.getopt(sys.argv[1:], 'r') 0297 if len(args) != 2: 0298 raise getopt.GetoptError('need exactly two args', None) 0299 dd = dircmp(args[0], args[1]) 0300 if ('-r', '') in options: 0301 dd.report_full_closure() 0302 else: 0303 dd.report() 0304 0305 if __name__ == '__main__': 0306 demo() 0307
Generated by PyXR 0.9.4