PageRenderTime 62ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 1ms

/Lib/distutils/filelist.py

https://bitbucket.org/khurley/python
Python | 354 lines | 308 code | 20 blank | 26 comment | 25 complexity | 036839f1a971eb30bb7d5b3889340ce3 MD5 | raw file
  1. """distutils.filelist
  2. Provides the FileList class, used for poking about the filesystem
  3. and building lists of files.
  4. """
  5. # This module should be kept compatible with Python 2.1.
  6. __revision__ = "$Id: filelist.py 61000 2008-02-23 17:40:11Z christian.heimes $"
  7. import os, string, re
  8. import fnmatch
  9. from types import *
  10. from distutils.util import convert_path
  11. from distutils.errors import DistutilsTemplateError, DistutilsInternalError
  12. from distutils import log
  13. class FileList:
  14. """A list of files built by on exploring the filesystem and filtered by
  15. applying various patterns to what we find there.
  16. Instance attributes:
  17. dir
  18. directory from which files will be taken -- only used if
  19. 'allfiles' not supplied to constructor
  20. files
  21. list of filenames currently being built/filtered/manipulated
  22. allfiles
  23. complete list of files under consideration (ie. without any
  24. filtering applied)
  25. """
  26. def __init__(self,
  27. warn=None,
  28. debug_print=None):
  29. # ignore argument to FileList, but keep them for backwards
  30. # compatibility
  31. self.allfiles = None
  32. self.files = []
  33. def set_allfiles (self, allfiles):
  34. self.allfiles = allfiles
  35. def findall (self, dir=os.curdir):
  36. self.allfiles = findall(dir)
  37. def debug_print (self, msg):
  38. """Print 'msg' to stdout if the global DEBUG (taken from the
  39. DISTUTILS_DEBUG environment variable) flag is true.
  40. """
  41. from distutils.debug import DEBUG
  42. if DEBUG:
  43. print msg
  44. # -- List-like methods ---------------------------------------------
  45. def append (self, item):
  46. self.files.append(item)
  47. def extend (self, items):
  48. self.files.extend(items)
  49. def sort (self):
  50. # Not a strict lexical sort!
  51. sortable_files = map(os.path.split, self.files)
  52. sortable_files.sort()
  53. self.files = []
  54. for sort_tuple in sortable_files:
  55. self.files.append(apply(os.path.join, sort_tuple))
  56. # -- Other miscellaneous utility methods ---------------------------
  57. def remove_duplicates (self):
  58. # Assumes list has been sorted!
  59. for i in range(len(self.files) - 1, 0, -1):
  60. if self.files[i] == self.files[i - 1]:
  61. del self.files[i]
  62. # -- "File template" methods ---------------------------------------
  63. def _parse_template_line (self, line):
  64. words = string.split(line)
  65. action = words[0]
  66. patterns = dir = dir_pattern = None
  67. if action in ('include', 'exclude',
  68. 'global-include', 'global-exclude'):
  69. if len(words) < 2:
  70. raise DistutilsTemplateError, \
  71. "'%s' expects <pattern1> <pattern2> ..." % action
  72. patterns = map(convert_path, words[1:])
  73. elif action in ('recursive-include', 'recursive-exclude'):
  74. if len(words) < 3:
  75. raise DistutilsTemplateError, \
  76. "'%s' expects <dir> <pattern1> <pattern2> ..." % action
  77. dir = convert_path(words[1])
  78. patterns = map(convert_path, words[2:])
  79. elif action in ('graft', 'prune'):
  80. if len(words) != 2:
  81. raise DistutilsTemplateError, \
  82. "'%s' expects a single <dir_pattern>" % action
  83. dir_pattern = convert_path(words[1])
  84. else:
  85. raise DistutilsTemplateError, "unknown action '%s'" % action
  86. return (action, patterns, dir, dir_pattern)
  87. # _parse_template_line ()
  88. def process_template_line (self, line):
  89. # Parse the line: split it up, make sure the right number of words
  90. # is there, and return the relevant words. 'action' is always
  91. # defined: it's the first word of the line. Which of the other
  92. # three are defined depends on the action; it'll be either
  93. # patterns, (dir and patterns), or (dir_pattern).
  94. (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
  95. # OK, now we know that the action is valid and we have the
  96. # right number of words on the line for that action -- so we
  97. # can proceed with minimal error-checking.
  98. if action == 'include':
  99. self.debug_print("include " + string.join(patterns))
  100. for pattern in patterns:
  101. if not self.include_pattern(pattern, anchor=1):
  102. log.warn("warning: no files found matching '%s'",
  103. pattern)
  104. elif action == 'exclude':
  105. self.debug_print("exclude " + string.join(patterns))
  106. for pattern in patterns:
  107. if not self.exclude_pattern(pattern, anchor=1):
  108. log.warn(("warning: no previously-included files "
  109. "found matching '%s'"), pattern)
  110. elif action == 'global-include':
  111. self.debug_print("global-include " + string.join(patterns))
  112. for pattern in patterns:
  113. if not self.include_pattern(pattern, anchor=0):
  114. log.warn(("warning: no files found matching '%s' " +
  115. "anywhere in distribution"), pattern)
  116. elif action == 'global-exclude':
  117. self.debug_print("global-exclude " + string.join(patterns))
  118. for pattern in patterns:
  119. if not self.exclude_pattern(pattern, anchor=0):
  120. log.warn(("warning: no previously-included files matching "
  121. "'%s' found anywhere in distribution"),
  122. pattern)
  123. elif action == 'recursive-include':
  124. self.debug_print("recursive-include %s %s" %
  125. (dir, string.join(patterns)))
  126. for pattern in patterns:
  127. if not self.include_pattern(pattern, prefix=dir):
  128. log.warn(("warning: no files found matching '%s' " +
  129. "under directory '%s'"),
  130. pattern, dir)
  131. elif action == 'recursive-exclude':
  132. self.debug_print("recursive-exclude %s %s" %
  133. (dir, string.join(patterns)))
  134. for pattern in patterns:
  135. if not self.exclude_pattern(pattern, prefix=dir):
  136. log.warn(("warning: no previously-included files matching "
  137. "'%s' found under directory '%s'"),
  138. pattern, dir)
  139. elif action == 'graft':
  140. self.debug_print("graft " + dir_pattern)
  141. if not self.include_pattern(None, prefix=dir_pattern):
  142. log.warn("warning: no directories found matching '%s'",
  143. dir_pattern)
  144. elif action == 'prune':
  145. self.debug_print("prune " + dir_pattern)
  146. if not self.exclude_pattern(None, prefix=dir_pattern):
  147. log.warn(("no previously-included directories found " +
  148. "matching '%s'"), dir_pattern)
  149. else:
  150. raise DistutilsInternalError, \
  151. "this cannot happen: invalid action '%s'" % action
  152. # process_template_line ()
  153. # -- Filtering/selection methods -----------------------------------
  154. def include_pattern (self, pattern,
  155. anchor=1, prefix=None, is_regex=0):
  156. """Select strings (presumably filenames) from 'self.files' that
  157. match 'pattern', a Unix-style wildcard (glob) pattern. Patterns
  158. are not quite the same as implemented by the 'fnmatch' module: '*'
  159. and '?' match non-special characters, where "special" is platform-
  160. dependent: slash on Unix; colon, slash, and backslash on
  161. DOS/Windows; and colon on Mac OS.
  162. If 'anchor' is true (the default), then the pattern match is more
  163. stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
  164. 'anchor' is false, both of these will match.
  165. If 'prefix' is supplied, then only filenames starting with 'prefix'
  166. (itself a pattern) and ending with 'pattern', with anything in between
  167. them, will match. 'anchor' is ignored in this case.
  168. If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
  169. 'pattern' is assumed to be either a string containing a regex or a
  170. regex object -- no translation is done, the regex is just compiled
  171. and used as-is.
  172. Selected strings will be added to self.files.
  173. Return 1 if files are found.
  174. """
  175. files_found = 0
  176. pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
  177. self.debug_print("include_pattern: applying regex r'%s'" %
  178. pattern_re.pattern)
  179. # delayed loading of allfiles list
  180. if self.allfiles is None:
  181. self.findall()
  182. for name in self.allfiles:
  183. if pattern_re.search(name):
  184. self.debug_print(" adding " + name)
  185. self.files.append(name)
  186. files_found = 1
  187. return files_found
  188. # include_pattern ()
  189. def exclude_pattern (self, pattern,
  190. anchor=1, prefix=None, is_regex=0):
  191. """Remove strings (presumably filenames) from 'files' that match
  192. 'pattern'. Other parameters are the same as for
  193. 'include_pattern()', above.
  194. The list 'self.files' is modified in place.
  195. Return 1 if files are found.
  196. """
  197. files_found = 0
  198. pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
  199. self.debug_print("exclude_pattern: applying regex r'%s'" %
  200. pattern_re.pattern)
  201. for i in range(len(self.files)-1, -1, -1):
  202. if pattern_re.search(self.files[i]):
  203. self.debug_print(" removing " + self.files[i])
  204. del self.files[i]
  205. files_found = 1
  206. return files_found
  207. # exclude_pattern ()
  208. # class FileList
  209. # ----------------------------------------------------------------------
  210. # Utility functions
  211. def findall (dir = os.curdir):
  212. """Find all files under 'dir' and return the list of full filenames
  213. (relative to 'dir').
  214. """
  215. from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK
  216. list = []
  217. stack = [dir]
  218. pop = stack.pop
  219. push = stack.append
  220. while stack:
  221. dir = pop()
  222. names = os.listdir(dir)
  223. for name in names:
  224. if dir != os.curdir: # avoid the dreaded "./" syndrome
  225. fullname = os.path.join(dir, name)
  226. else:
  227. fullname = name
  228. # Avoid excess stat calls -- just one will do, thank you!
  229. stat = os.stat(fullname)
  230. mode = stat[ST_MODE]
  231. if S_ISREG(mode):
  232. list.append(fullname)
  233. elif S_ISDIR(mode) and not S_ISLNK(mode):
  234. push(fullname)
  235. return list
  236. def glob_to_re (pattern):
  237. """Translate a shell-like glob pattern to a regular expression; return
  238. a string containing the regex. Differs from 'fnmatch.translate()' in
  239. that '*' does not match "special characters" (which are
  240. platform-specific).
  241. """
  242. pattern_re = fnmatch.translate(pattern)
  243. # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
  244. # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
  245. # and by extension they shouldn't match such "special characters" under
  246. # any OS. So change all non-escaped dots in the RE to match any
  247. # character except the special characters.
  248. # XXX currently the "special characters" are just slash -- i.e. this is
  249. # Unix-only.
  250. pattern_re = re.sub(r'(^|[^\\])\.', r'\1[^/]', pattern_re)
  251. return pattern_re
  252. # glob_to_re ()
  253. def translate_pattern (pattern, anchor=1, prefix=None, is_regex=0):
  254. """Translate a shell-like wildcard pattern to a compiled regular
  255. expression. Return the compiled regex. If 'is_regex' true,
  256. then 'pattern' is directly compiled to a regex (if it's a string)
  257. or just returned as-is (assumes it's a regex object).
  258. """
  259. if is_regex:
  260. if type(pattern) is StringType:
  261. return re.compile(pattern)
  262. else:
  263. return pattern
  264. if pattern:
  265. pattern_re = glob_to_re(pattern)
  266. else:
  267. pattern_re = ''
  268. if prefix is not None:
  269. prefix_re = (glob_to_re(prefix))[0:-1] # ditch trailing $
  270. pattern_re = "^" + os.path.join(prefix_re, ".*" + pattern_re)
  271. else: # no prefix -- respect anchor flag
  272. if anchor:
  273. pattern_re = "^" + pattern_re
  274. return re.compile(pattern_re)
  275. # translate_pattern ()