PageRenderTime 40ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 0ms

/lib-python/2/distutils/filelist.py

https://bitbucket.org/kcr/pypy
Python | 343 lines | 309 code | 13 blank | 21 comment | 26 complexity | 457fd4b1191b3b0e881858dda4df1e91 MD5 | raw file
Possible License(s): Apache-2.0
  1. """distutils.filelist
  2. Provides the FileList class, used for poking about the filesystem
  3. and building lists of files.
  4. """
  5. __revision__ = "$Id$"
  6. import os, re
  7. import fnmatch
  8. from distutils.util import convert_path
  9. from distutils.errors import DistutilsTemplateError, DistutilsInternalError
  10. from distutils import log
  11. class FileList:
  12. """A list of files built by on exploring the filesystem and filtered by
  13. applying various patterns to what we find there.
  14. Instance attributes:
  15. dir
  16. directory from which files will be taken -- only used if
  17. 'allfiles' not supplied to constructor
  18. files
  19. list of filenames currently being built/filtered/manipulated
  20. allfiles
  21. complete list of files under consideration (ie. without any
  22. filtering applied)
  23. """
  24. def __init__(self, warn=None, debug_print=None):
  25. # ignore argument to FileList, but keep them for backwards
  26. # compatibility
  27. self.allfiles = None
  28. self.files = []
  29. def set_allfiles(self, allfiles):
  30. self.allfiles = allfiles
  31. def findall(self, dir=os.curdir):
  32. self.allfiles = findall(dir)
  33. def debug_print(self, msg):
  34. """Print 'msg' to stdout if the global DEBUG (taken from the
  35. DISTUTILS_DEBUG environment variable) flag is true.
  36. """
  37. from distutils.debug import DEBUG
  38. if DEBUG:
  39. print msg
  40. # -- List-like methods ---------------------------------------------
  41. def append(self, item):
  42. self.files.append(item)
  43. def extend(self, items):
  44. self.files.extend(items)
  45. def sort(self):
  46. # Not a strict lexical sort!
  47. sortable_files = map(os.path.split, self.files)
  48. sortable_files.sort()
  49. self.files = []
  50. for sort_tuple in sortable_files:
  51. self.files.append(os.path.join(*sort_tuple))
  52. # -- Other miscellaneous utility methods ---------------------------
  53. def remove_duplicates(self):
  54. # Assumes list has been sorted!
  55. for i in range(len(self.files) - 1, 0, -1):
  56. if self.files[i] == self.files[i - 1]:
  57. del self.files[i]
  58. # -- "File template" methods ---------------------------------------
  59. def _parse_template_line(self, line):
  60. words = line.split()
  61. action = words[0]
  62. patterns = dir = dir_pattern = None
  63. if action in ('include', 'exclude',
  64. 'global-include', 'global-exclude'):
  65. if len(words) < 2:
  66. raise DistutilsTemplateError, \
  67. "'%s' expects <pattern1> <pattern2> ..." % action
  68. patterns = map(convert_path, words[1:])
  69. elif action in ('recursive-include', 'recursive-exclude'):
  70. if len(words) < 3:
  71. raise DistutilsTemplateError, \
  72. "'%s' expects <dir> <pattern1> <pattern2> ..." % action
  73. dir = convert_path(words[1])
  74. patterns = map(convert_path, words[2:])
  75. elif action in ('graft', 'prune'):
  76. if len(words) != 2:
  77. raise DistutilsTemplateError, \
  78. "'%s' expects a single <dir_pattern>" % action
  79. dir_pattern = convert_path(words[1])
  80. else:
  81. raise DistutilsTemplateError, "unknown action '%s'" % action
  82. return (action, patterns, dir, dir_pattern)
  83. def process_template_line(self, line):
  84. # Parse the line: split it up, make sure the right number of words
  85. # is there, and return the relevant words. 'action' is always
  86. # defined: it's the first word of the line. Which of the other
  87. # three are defined depends on the action; it'll be either
  88. # patterns, (dir and patterns), or (dir_pattern).
  89. action, patterns, dir, dir_pattern = self._parse_template_line(line)
  90. # OK, now we know that the action is valid and we have the
  91. # right number of words on the line for that action -- so we
  92. # can proceed with minimal error-checking.
  93. if action == 'include':
  94. self.debug_print("include " + ' '.join(patterns))
  95. for pattern in patterns:
  96. if not self.include_pattern(pattern, anchor=1):
  97. log.warn("warning: no files found matching '%s'",
  98. pattern)
  99. elif action == 'exclude':
  100. self.debug_print("exclude " + ' '.join(patterns))
  101. for pattern in patterns:
  102. if not self.exclude_pattern(pattern, anchor=1):
  103. log.warn(("warning: no previously-included files "
  104. "found matching '%s'"), pattern)
  105. elif action == 'global-include':
  106. self.debug_print("global-include " + ' '.join(patterns))
  107. for pattern in patterns:
  108. if not self.include_pattern(pattern, anchor=0):
  109. log.warn(("warning: no files found matching '%s' " +
  110. "anywhere in distribution"), pattern)
  111. elif action == 'global-exclude':
  112. self.debug_print("global-exclude " + ' '.join(patterns))
  113. for pattern in patterns:
  114. if not self.exclude_pattern(pattern, anchor=0):
  115. log.warn(("warning: no previously-included files matching "
  116. "'%s' found anywhere in distribution"),
  117. pattern)
  118. elif action == 'recursive-include':
  119. self.debug_print("recursive-include %s %s" %
  120. (dir, ' '.join(patterns)))
  121. for pattern in patterns:
  122. if not self.include_pattern(pattern, prefix=dir):
  123. log.warn(("warning: no files found matching '%s' " +
  124. "under directory '%s'"),
  125. pattern, dir)
  126. elif action == 'recursive-exclude':
  127. self.debug_print("recursive-exclude %s %s" %
  128. (dir, ' '.join(patterns)))
  129. for pattern in patterns:
  130. if not self.exclude_pattern(pattern, prefix=dir):
  131. log.warn(("warning: no previously-included files matching "
  132. "'%s' found under directory '%s'"),
  133. pattern, dir)
  134. elif action == 'graft':
  135. self.debug_print("graft " + dir_pattern)
  136. if not self.include_pattern(None, prefix=dir_pattern):
  137. log.warn("warning: no directories found matching '%s'",
  138. dir_pattern)
  139. elif action == 'prune':
  140. self.debug_print("prune " + dir_pattern)
  141. if not self.exclude_pattern(None, prefix=dir_pattern):
  142. log.warn(("no previously-included directories found " +
  143. "matching '%s'"), dir_pattern)
  144. else:
  145. raise DistutilsInternalError, \
  146. "this cannot happen: invalid action '%s'" % action
  147. # -- Filtering/selection methods -----------------------------------
  148. def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
  149. """Select strings (presumably filenames) from 'self.files' that
  150. match 'pattern', a Unix-style wildcard (glob) pattern.
  151. Patterns are not quite the same as implemented by the 'fnmatch'
  152. module: '*' and '?' match non-special characters, where "special"
  153. is platform-dependent: slash on Unix; colon, slash, and backslash on
  154. DOS/Windows; and colon on Mac OS.
  155. If 'anchor' is true (the default), then the pattern match is more
  156. stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
  157. 'anchor' is false, both of these will match.
  158. If 'prefix' is supplied, then only filenames starting with 'prefix'
  159. (itself a pattern) and ending with 'pattern', with anything in between
  160. them, will match. 'anchor' is ignored in this case.
  161. If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
  162. 'pattern' is assumed to be either a string containing a regex or a
  163. regex object -- no translation is done, the regex is just compiled
  164. and used as-is.
  165. Selected strings will be added to self.files.
  166. Return 1 if files are found.
  167. """
  168. # XXX docstring lying about what the special chars are?
  169. files_found = 0
  170. pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
  171. self.debug_print("include_pattern: applying regex r'%s'" %
  172. pattern_re.pattern)
  173. # delayed loading of allfiles list
  174. if self.allfiles is None:
  175. self.findall()
  176. for name in self.allfiles:
  177. if pattern_re.search(name):
  178. self.debug_print(" adding " + name)
  179. self.files.append(name)
  180. files_found = 1
  181. return files_found
  182. def exclude_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
  183. """Remove strings (presumably filenames) from 'files' that match
  184. 'pattern'.
  185. Other parameters are the same as for 'include_pattern()', above.
  186. The list 'self.files' is modified in place. Return 1 if files are
  187. found.
  188. """
  189. files_found = 0
  190. pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
  191. self.debug_print("exclude_pattern: applying regex r'%s'" %
  192. pattern_re.pattern)
  193. for i in range(len(self.files)-1, -1, -1):
  194. if pattern_re.search(self.files[i]):
  195. self.debug_print(" removing " + self.files[i])
  196. del self.files[i]
  197. files_found = 1
  198. return files_found
  199. # ----------------------------------------------------------------------
  200. # Utility functions
  201. def findall(dir = os.curdir):
  202. """Find all files under 'dir' and return the list of full filenames
  203. (relative to 'dir').
  204. """
  205. from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK
  206. list = []
  207. stack = [dir]
  208. pop = stack.pop
  209. push = stack.append
  210. while stack:
  211. dir = pop()
  212. names = os.listdir(dir)
  213. for name in names:
  214. if dir != os.curdir: # avoid the dreaded "./" syndrome
  215. fullname = os.path.join(dir, name)
  216. else:
  217. fullname = name
  218. # Avoid excess stat calls -- just one will do, thank you!
  219. stat = os.stat(fullname)
  220. mode = stat[ST_MODE]
  221. if S_ISREG(mode):
  222. list.append(fullname)
  223. elif S_ISDIR(mode) and not S_ISLNK(mode):
  224. push(fullname)
  225. return list
  226. def glob_to_re(pattern):
  227. """Translate a shell-like glob pattern to a regular expression.
  228. Return a string containing the regex. Differs from
  229. 'fnmatch.translate()' in that '*' does not match "special characters"
  230. (which are platform-specific).
  231. """
  232. pattern_re = fnmatch.translate(pattern)
  233. # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
  234. # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
  235. # and by extension they shouldn't match such "special characters" under
  236. # any OS. So change all non-escaped dots in the RE to match any
  237. # character except the special characters (currently: just os.sep).
  238. sep = os.sep
  239. if os.sep == '\\':
  240. # we're using a regex to manipulate a regex, so we need
  241. # to escape the backslash twice
  242. sep = r'\\\\'
  243. escaped = r'\1[^%s]' % sep
  244. pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
  245. return pattern_re
  246. def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
  247. """Translate a shell-like wildcard pattern to a compiled regular
  248. expression.
  249. Return the compiled regex. If 'is_regex' true,
  250. then 'pattern' is directly compiled to a regex (if it's a string)
  251. or just returned as-is (assumes it's a regex object).
  252. """
  253. if is_regex:
  254. if isinstance(pattern, str):
  255. return re.compile(pattern)
  256. else:
  257. return pattern
  258. if pattern:
  259. pattern_re = glob_to_re(pattern)
  260. else:
  261. pattern_re = ''
  262. if prefix is not None:
  263. # ditch end of pattern character
  264. empty_pattern = glob_to_re('')
  265. prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
  266. sep = os.sep
  267. if os.sep == '\\':
  268. sep = r'\\'
  269. pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
  270. else: # no prefix -- respect anchor flag
  271. if anchor:
  272. pattern_re = "^" + pattern_re
  273. return re.compile(pattern_re)