0001 """distutils.filelist 0002 0003 Provides the FileList class, used for poking about the filesystem 0004 and building lists of files. 0005 """ 0006 0007 # This module should be kept compatible with Python 1.5.2. 0008 0009 __revision__ = "$Id: filelist.py,v 1.17 2004/07/18 06:14:42 tim_one Exp $" 0010 0011 import os, string, re 0012 import fnmatch 0013 from types import * 0014 from glob import glob 0015 from distutils.util import convert_path 0016 from distutils.errors import DistutilsTemplateError, DistutilsInternalError 0017 from distutils import log 0018 0019 class FileList: 0020 0021 """A list of files built by on exploring the filesystem and filtered by 0022 applying various patterns to what we find there. 0023 0024 Instance attributes: 0025 dir 0026 directory from which files will be taken -- only used if 0027 'allfiles' not supplied to constructor 0028 files 0029 list of filenames currently being built/filtered/manipulated 0030 allfiles 0031 complete list of files under consideration (ie. without any 0032 filtering applied) 0033 """ 0034 0035 def __init__(self, 0036 warn=None, 0037 debug_print=None): 0038 # ignore argument to FileList, but keep them for backwards 0039 # compatibility 0040 0041 self.allfiles = None 0042 self.files = [] 0043 0044 def set_allfiles (self, allfiles): 0045 self.allfiles = allfiles 0046 0047 def findall (self, dir=os.curdir): 0048 self.allfiles = findall(dir) 0049 0050 def debug_print (self, msg): 0051 """Print 'msg' to stdout if the global DEBUG (taken from the 0052 DISTUTILS_DEBUG environment variable) flag is true. 0053 """ 0054 from distutils.debug import DEBUG 0055 if DEBUG: 0056 print msg 0057 0058 # -- List-like methods --------------------------------------------- 0059 0060 def append (self, item): 0061 self.files.append(item) 0062 0063 def extend (self, items): 0064 self.files.extend(items) 0065 0066 def sort (self): 0067 # Not a strict lexical sort! 0068 sortable_files = map(os.path.split, self.files) 0069 sortable_files.sort() 0070 self.files = [] 0071 for sort_tuple in sortable_files: 0072 self.files.append(apply(os.path.join, sort_tuple)) 0073 0074 0075 # -- Other miscellaneous utility methods --------------------------- 0076 0077 def remove_duplicates (self): 0078 # Assumes list has been sorted! 0079 for i in range(len(self.files) - 1, 0, -1): 0080 if self.files[i] == self.files[i - 1]: 0081 del self.files[i] 0082 0083 0084 # -- "File template" methods --------------------------------------- 0085 0086 def _parse_template_line (self, line): 0087 words = string.split(line) 0088 action = words[0] 0089 0090 patterns = dir = dir_pattern = None 0091 0092 if action in ('include', 'exclude', 0093 'global-include', 'global-exclude'): 0094 if len(words) < 2: 0095 raise DistutilsTemplateError, \ 0096 "'%s' expects <pattern1> <pattern2> ..." % action 0097 0098 patterns = map(convert_path, words[1:]) 0099 0100 elif action in ('recursive-include', 'recursive-exclude'): 0101 if len(words) < 3: 0102 raise DistutilsTemplateError, \ 0103 "'%s' expects <dir> <pattern1> <pattern2> ..." % action 0104 0105 dir = convert_path(words[1]) 0106 patterns = map(convert_path, words[2:]) 0107 0108 elif action in ('graft', 'prune'): 0109 if len(words) != 2: 0110 raise DistutilsTemplateError, \ 0111 "'%s' expects a single <dir_pattern>" % action 0112 0113 dir_pattern = convert_path(words[1]) 0114 0115 else: 0116 raise DistutilsTemplateError, "unknown action '%s'" % action 0117 0118 return (action, patterns, dir, dir_pattern) 0119 0120 # _parse_template_line () 0121 0122 0123 def process_template_line (self, line): 0124 0125 # Parse the line: split it up, make sure the right number of words 0126 # is there, and return the relevant words. 'action' is always 0127 # defined: it's the first word of the line. Which of the other 0128 # three are defined depends on the action; it'll be either 0129 # patterns, (dir and patterns), or (dir_pattern). 0130 (action, patterns, dir, dir_pattern) = self._parse_template_line(line) 0131 0132 # OK, now we know that the action is valid and we have the 0133 # right number of words on the line for that action -- so we 0134 # can proceed with minimal error-checking. 0135 if action == 'include': 0136 self.debug_print("include " + string.join(patterns)) 0137 for pattern in patterns: 0138 if not self.include_pattern(pattern, anchor=1): 0139 log.warn("warning: no files found matching '%s'", 0140 pattern) 0141 0142 elif action == 'exclude': 0143 self.debug_print("exclude " + string.join(patterns)) 0144 for pattern in patterns: 0145 if not self.exclude_pattern(pattern, anchor=1): 0146 log.warn(("warning: no previously-included files " 0147 "found matching '%s'"), pattern) 0148 0149 elif action == 'global-include': 0150 self.debug_print("global-include " + string.join(patterns)) 0151 for pattern in patterns: 0152 if not self.include_pattern(pattern, anchor=0): 0153 log.warn(("warning: no files found matching '%s' " + 0154 "anywhere in distribution"), pattern) 0155 0156 elif action == 'global-exclude': 0157 self.debug_print("global-exclude " + string.join(patterns)) 0158 for pattern in patterns: 0159 if not self.exclude_pattern(pattern, anchor=0): 0160 log.warn(("warning: no previously-included files matching " 0161 "'%s' found anywhere in distribution"), 0162 pattern) 0163 0164 elif action == 'recursive-include': 0165 self.debug_print("recursive-include %s %s" % 0166 (dir, string.join(patterns))) 0167 for pattern in patterns: 0168 if not self.include_pattern(pattern, prefix=dir): 0169 log.warn(("warning: no files found matching '%s' " + 0170 "under directory '%s'"), 0171 pattern, dir) 0172 0173 elif action == 'recursive-exclude': 0174 self.debug_print("recursive-exclude %s %s" % 0175 (dir, string.join(patterns))) 0176 for pattern in patterns: 0177 if not self.exclude_pattern(pattern, prefix=dir): 0178 log.warn(("warning: no previously-included files matching " 0179 "'%s' found under directory '%s'"), 0180 pattern, dir) 0181 0182 elif action == 'graft': 0183 self.debug_print("graft " + dir_pattern) 0184 if not self.include_pattern(None, prefix=dir_pattern): 0185 log.warn("warning: no directories found matching '%s'", 0186 dir_pattern) 0187 0188 elif action == 'prune': 0189 self.debug_print("prune " + dir_pattern) 0190 if not self.exclude_pattern(None, prefix=dir_pattern): 0191 log.warn(("no previously-included directories found " + 0192 "matching '%s'"), dir_pattern) 0193 else: 0194 raise DistutilsInternalError, \ 0195 "this cannot happen: invalid action '%s'" % action 0196 0197 # process_template_line () 0198 0199 0200 # -- Filtering/selection methods ----------------------------------- 0201 0202 def include_pattern (self, pattern, 0203 anchor=1, prefix=None, is_regex=0): 0204 """Select strings (presumably filenames) from 'self.files' that 0205 match 'pattern', a Unix-style wildcard (glob) pattern. Patterns 0206 are not quite the same as implemented by the 'fnmatch' module: '*' 0207 and '?' match non-special characters, where "special" is platform- 0208 dependent: slash on Unix; colon, slash, and backslash on 0209 DOS/Windows; and colon on Mac OS. 0210 0211 If 'anchor' is true (the default), then the pattern match is more 0212 stringent: "*.py" will match "foo.py" but not "foo/bar.py". If 0213 'anchor' is false, both of these will match. 0214 0215 If 'prefix' is supplied, then only filenames starting with 'prefix' 0216 (itself a pattern) and ending with 'pattern', with anything in between 0217 them, will match. 'anchor' is ignored in this case. 0218 0219 If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and 0220 'pattern' is assumed to be either a string containing a regex or a 0221 regex object -- no translation is done, the regex is just compiled 0222 and used as-is. 0223 0224 Selected strings will be added to self.files. 0225 0226 Return 1 if files are found. 0227 """ 0228 files_found = 0 0229 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) 0230 self.debug_print("include_pattern: applying regex r'%s'" % 0231 pattern_re.pattern) 0232 0233 # delayed loading of allfiles list 0234 if self.allfiles is None: 0235 self.findall() 0236 0237 for name in self.allfiles: 0238 if pattern_re.search(name): 0239 self.debug_print(" adding " + name) 0240 self.files.append(name) 0241 files_found = 1 0242 0243 return files_found 0244 0245 # include_pattern () 0246 0247 0248 def exclude_pattern (self, pattern, 0249 anchor=1, prefix=None, is_regex=0): 0250 """Remove strings (presumably filenames) from 'files' that match 0251 'pattern'. Other parameters are the same as for 0252 'include_pattern()', above. 0253 The list 'self.files' is modified in place. 0254 Return 1 if files are found. 0255 """ 0256 files_found = 0 0257 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) 0258 self.debug_print("exclude_pattern: applying regex r'%s'" % 0259 pattern_re.pattern) 0260 for i in range(len(self.files)-1, -1, -1): 0261 if pattern_re.search(self.files[i]): 0262 self.debug_print(" removing " + self.files[i]) 0263 del self.files[i] 0264 files_found = 1 0265 0266 return files_found 0267 0268 # exclude_pattern () 0269 0270 # class FileList 0271 0272 0273 # ---------------------------------------------------------------------- 0274 # Utility functions 0275 0276 def findall (dir = os.curdir): 0277 """Find all files under 'dir' and return the list of full filenames 0278 (relative to 'dir'). 0279 """ 0280 from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK 0281 0282 list = [] 0283 stack = [dir] 0284 pop = stack.pop 0285 push = stack.append 0286 0287 while stack: 0288 dir = pop() 0289 names = os.listdir(dir) 0290 0291 for name in names: 0292 if dir != os.curdir: # avoid the dreaded "./" syndrome 0293 fullname = os.path.join(dir, name) 0294 else: 0295 fullname = name 0296 0297 # Avoid excess stat calls -- just one will do, thank you! 0298 stat = os.stat(fullname) 0299 mode = stat[ST_MODE] 0300 if S_ISREG(mode): 0301 list.append(fullname) 0302 elif S_ISDIR(mode) and not S_ISLNK(mode): 0303 push(fullname) 0304 0305 return list 0306 0307 0308 def glob_to_re (pattern): 0309 """Translate a shell-like glob pattern to a regular expression; return 0310 a string containing the regex. Differs from 'fnmatch.translate()' in 0311 that '*' does not match "special characters" (which are 0312 platform-specific). 0313 """ 0314 pattern_re = fnmatch.translate(pattern) 0315 0316 # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which 0317 # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, 0318 # and by extension they shouldn't match such "special characters" under 0319 # any OS. So change all non-escaped dots in the RE to match any 0320 # character except the special characters. 0321 # XXX currently the "special characters" are just slash -- i.e. this is 0322 # Unix-only. 0323 pattern_re = re.sub(r'(^|[^\\])\.', r'\1[^/]', pattern_re) 0324 return pattern_re 0325 0326 # glob_to_re () 0327 0328 0329 def translate_pattern (pattern, anchor=1, prefix=None, is_regex=0): 0330 """Translate a shell-like wildcard pattern to a compiled regular 0331 expression. Return the compiled regex. If 'is_regex' true, 0332 then 'pattern' is directly compiled to a regex (if it's a string) 0333 or just returned as-is (assumes it's a regex object). 0334 """ 0335 if is_regex: 0336 if type(pattern) is StringType: 0337 return re.compile(pattern) 0338 else: 0339 return pattern 0340 0341 if pattern: 0342 pattern_re = glob_to_re(pattern) 0343 else: 0344 pattern_re = '' 0345 0346 if prefix is not None: 0347 prefix_re = (glob_to_re(prefix))[0:-1] # ditch trailing $ 0348 pattern_re = "^" + os.path.join(prefix_re, ".*" + pattern_re) 0349 else: # no prefix -- respect anchor flag 0350 if anchor: 0351 pattern_re = "^" + pattern_re 0352 0353 return re.compile(pattern_re) 0354 0355 # translate_pattern () 0356
Generated by PyXR 0.9.4