PageRenderTime 42ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/lib-python/modified-2.7/distutils/filelist.py

https://bitbucket.org/evelyn559/pypy
Python | 336 lines | 295 code | 16 blank | 25 comment | 25 complexity | 6890b620b46667f3a3fc4794bcf61460 MD5 | raw file
  1. """distutils.filelist
  2. Provides the FileList class, used for poking about the filesystem
  3. and building lists of files.
  4. """
  5. __revision__ = "$Id$"
  6. import os, re
  7. import fnmatch
  8. from distutils.util import convert_path
  9. from distutils.errors import DistutilsTemplateError, DistutilsInternalError
  10. from distutils import log
  11. class FileList:
  12. """A list of files built by on exploring the filesystem and filtered by
  13. applying various patterns to what we find there.
  14. Instance attributes:
  15. dir
  16. directory from which files will be taken -- only used if
  17. 'allfiles' not supplied to constructor
  18. files
  19. list of filenames currently being built/filtered/manipulated
  20. allfiles
  21. complete list of files under consideration (ie. without any
  22. filtering applied)
  23. """
  24. def __init__(self, warn=None, debug_print=None):
  25. # ignore argument to FileList, but keep them for backwards
  26. # compatibility
  27. self.allfiles = None
  28. self.files = []
  29. def set_allfiles(self, allfiles):
  30. self.allfiles = allfiles
  31. def findall(self, dir=os.curdir):
  32. self.allfiles = findall(dir)
  33. def debug_print(self, msg):
  34. """Print 'msg' to stdout if the global DEBUG (taken from the
  35. DISTUTILS_DEBUG environment variable) flag is true.
  36. """
  37. from distutils.debug import DEBUG
  38. if DEBUG:
  39. print msg
  40. # -- List-like methods ---------------------------------------------
  41. def append(self, item):
  42. self.files.append(item)
  43. def extend(self, items):
  44. self.files.extend(items)
  45. def sort(self):
  46. # Not a strict lexical sort!
  47. sortable_files = map(os.path.split, self.files)
  48. sortable_files.sort()
  49. self.files = []
  50. for sort_tuple in sortable_files:
  51. self.files.append(os.path.join(*sort_tuple))
  52. # -- Other miscellaneous utility methods ---------------------------
  53. def remove_duplicates(self):
  54. # Assumes list has been sorted!
  55. for i in range(len(self.files) - 1, 0, -1):
  56. if self.files[i] == self.files[i - 1]:
  57. del self.files[i]
  58. # -- "File template" methods ---------------------------------------
  59. def _parse_template_line(self, line):
  60. words = line.split()
  61. action = words[0]
  62. patterns = dir = dir_pattern = None
  63. if action in ('include', 'exclude',
  64. 'global-include', 'global-exclude'):
  65. if len(words) < 2:
  66. raise DistutilsTemplateError, \
  67. "'%s' expects <pattern1> <pattern2> ..." % action
  68. patterns = map(convert_path, words[1:])
  69. elif action in ('recursive-include', 'recursive-exclude'):
  70. if len(words) < 3:
  71. raise DistutilsTemplateError, \
  72. "'%s' expects <dir> <pattern1> <pattern2> ..." % action
  73. dir = convert_path(words[1])
  74. patterns = map(convert_path, words[2:])
  75. elif action in ('graft', 'prune'):
  76. if len(words) != 2:
  77. raise DistutilsTemplateError, \
  78. "'%s' expects a single <dir_pattern>" % action
  79. dir_pattern = convert_path(words[1])
  80. else:
  81. raise DistutilsTemplateError, "unknown action '%s'" % action
  82. return (action, patterns, dir, dir_pattern)
  83. def process_template_line(self, line):
  84. # Parse the line: split it up, make sure the right number of words
  85. # is there, and return the relevant words. 'action' is always
  86. # defined: it's the first word of the line. Which of the other
  87. # three are defined depends on the action; it'll be either
  88. # patterns, (dir and patterns), or (dir_pattern).
  89. action, patterns, dir, dir_pattern = self._parse_template_line(line)
  90. # OK, now we know that the action is valid and we have the
  91. # right number of words on the line for that action -- so we
  92. # can proceed with minimal error-checking.
  93. if action == 'include':
  94. self.debug_print("include " + ' '.join(patterns))
  95. for pattern in patterns:
  96. if not self.include_pattern(pattern, anchor=1):
  97. log.warn("warning: no files found matching '%s'",
  98. pattern)
  99. elif action == 'exclude':
  100. self.debug_print("exclude " + ' '.join(patterns))
  101. for pattern in patterns:
  102. if not self.exclude_pattern(pattern, anchor=1):
  103. log.warn(("warning: no previously-included files "
  104. "found matching '%s'"), pattern)
  105. elif action == 'global-include':
  106. self.debug_print("global-include " + ' '.join(patterns))
  107. for pattern in patterns:
  108. if not self.include_pattern(pattern, anchor=0):
  109. log.warn(("warning: no files found matching '%s' " +
  110. "anywhere in distribution"), pattern)
  111. elif action == 'global-exclude':
  112. self.debug_print("global-exclude " + ' '.join(patterns))
  113. for pattern in patterns:
  114. if not self.exclude_pattern(pattern, anchor=0):
  115. log.warn(("warning: no previously-included files matching "
  116. "'%s' found anywhere in distribution"),
  117. pattern)
  118. elif action == 'recursive-include':
  119. self.debug_print("recursive-include %s %s" %
  120. (dir, ' '.join(patterns)))
  121. for pattern in patterns:
  122. if not self.include_pattern(pattern, prefix=dir):
  123. log.warn(("warning: no files found matching '%s' " +
  124. "under directory '%s'"),
  125. pattern, dir)
  126. elif action == 'recursive-exclude':
  127. self.debug_print("recursive-exclude %s %s" %
  128. (dir, ' '.join(patterns)))
  129. for pattern in patterns:
  130. if not self.exclude_pattern(pattern, prefix=dir):
  131. log.warn(("warning: no previously-included files matching "
  132. "'%s' found under directory '%s'"),
  133. pattern, dir)
  134. elif action == 'graft':
  135. self.debug_print("graft " + dir_pattern)
  136. if not self.include_pattern(None, prefix=dir_pattern):
  137. log.warn("warning: no directories found matching '%s'",
  138. dir_pattern)
  139. elif action == 'prune':
  140. self.debug_print("prune " + dir_pattern)
  141. if not self.exclude_pattern(None, prefix=dir_pattern):
  142. log.warn(("no previously-included directories found " +
  143. "matching '%s'"), dir_pattern)
  144. else:
  145. raise DistutilsInternalError, \
  146. "this cannot happen: invalid action '%s'" % action
  147. # -- Filtering/selection methods -----------------------------------
  148. def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
  149. """Select strings (presumably filenames) from 'self.files' that
  150. match 'pattern', a Unix-style wildcard (glob) pattern.
  151. Patterns are not quite the same as implemented by the 'fnmatch'
  152. module: '*' and '?' match non-special characters, where "special"
  153. is platform-dependent: slash on Unix; colon, slash, and backslash on
  154. DOS/Windows; and colon on Mac OS.
  155. If 'anchor' is true (the default), then the pattern match is more
  156. stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
  157. 'anchor' is false, both of these will match.
  158. If 'prefix' is supplied, then only filenames starting with 'prefix'
  159. (itself a pattern) and ending with 'pattern', with anything in between
  160. them, will match. 'anchor' is ignored in this case.
  161. If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
  162. 'pattern' is assumed to be either a string containing a regex or a
  163. regex object -- no translation is done, the regex is just compiled
  164. and used as-is.
  165. Selected strings will be added to self.files.
  166. Return 1 if files are found.
  167. """
  168. files_found = 0
  169. pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
  170. self.debug_print("include_pattern: applying regex r'%s'" %
  171. pattern_re.pattern)
  172. # delayed loading of allfiles list
  173. if self.allfiles is None:
  174. self.findall()
  175. for name in self.allfiles:
  176. if pattern_re.search(name):
  177. self.debug_print(" adding " + name)
  178. self.files.append(name)
  179. files_found = 1
  180. return files_found
  181. def exclude_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
  182. """Remove strings (presumably filenames) from 'files' that match
  183. 'pattern'.
  184. Other parameters are the same as for 'include_pattern()', above.
  185. The list 'self.files' is modified in place. Return 1 if files are
  186. found.
  187. """
  188. files_found = 0
  189. pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
  190. self.debug_print("exclude_pattern: applying regex r'%s'" %
  191. pattern_re.pattern)
  192. for i in range(len(self.files)-1, -1, -1):
  193. if pattern_re.search(self.files[i]):
  194. self.debug_print(" removing " + self.files[i])
  195. del self.files[i]
  196. files_found = 1
  197. return files_found
  198. # ----------------------------------------------------------------------
  199. # Utility functions
  200. def findall(dir = os.curdir):
  201. """Find all files under 'dir' and return the list of full filenames
  202. (relative to 'dir').
  203. """
  204. from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK
  205. list = []
  206. stack = [dir]
  207. pop = stack.pop
  208. push = stack.append
  209. while stack:
  210. dir = pop()
  211. names = os.listdir(dir)
  212. for name in names:
  213. if dir != os.curdir: # avoid the dreaded "./" syndrome
  214. fullname = os.path.join(dir, name)
  215. else:
  216. fullname = name
  217. # Avoid excess stat calls -- just one will do, thank you!
  218. stat = os.stat(fullname)
  219. mode = stat[ST_MODE]
  220. if S_ISREG(mode):
  221. list.append(fullname)
  222. elif S_ISDIR(mode) and not S_ISLNK(mode):
  223. push(fullname)
  224. return list
  225. def glob_to_re(pattern):
  226. """Translate a shell-like glob pattern to a regular expression.
  227. Return a string containing the regex. Differs from
  228. 'fnmatch.translate()' in that '*' does not match "special characters"
  229. (which are platform-specific).
  230. """
  231. pattern_re = fnmatch.translate(pattern)
  232. # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
  233. # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
  234. # and by extension they shouldn't match such "special characters" under
  235. # any OS. So change all non-escaped dots in the RE to match any
  236. # character except the special characters.
  237. # XXX currently the "special characters" are just slash -- i.e. this is
  238. # Unix-only.
  239. pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', r'\1[^/]', pattern_re)
  240. return pattern_re
  241. def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
  242. """Translate a shell-like wildcard pattern to a compiled regular
  243. expression.
  244. Return the compiled regex. If 'is_regex' true,
  245. then 'pattern' is directly compiled to a regex (if it's a string)
  246. or just returned as-is (assumes it's a regex object).
  247. """
  248. if is_regex:
  249. if isinstance(pattern, str):
  250. return re.compile(pattern)
  251. else:
  252. return pattern
  253. if pattern:
  254. pattern_re = glob_to_re(pattern)
  255. else:
  256. pattern_re = ''
  257. if prefix is not None:
  258. # ditch end of pattern character
  259. empty_pattern = glob_to_re('')
  260. prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
  261. pattern_re = "^" + os.path.join(prefix_re, ".*" + pattern_re)
  262. else: # no prefix -- respect anchor flag
  263. if anchor:
  264. pattern_re = "^" + pattern_re
  265. return re.compile(pattern_re)