PageRenderTime 49ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/src/python/pants/source/source_root.py

https://gitlab.com/Ivy001/pants
Python | 356 lines | 239 code | 31 blank | 86 comment | 24 complexity | 051c7a8e1f0d9b390878477fa8b4f5be MD5 | raw file
  1. # coding=utf-8
  2. # Copyright 2015 Pants project contributors (see CONTRIBUTORS.md).
  3. # Licensed under the Apache License, Version 2.0 (see LICENSE).
  4. from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
  5. unicode_literals, with_statement)
  6. import os
  7. from collections import namedtuple
  8. from six.moves import range
  9. from pants.base.project_tree_factory import get_project_tree
  10. from pants.subsystem.subsystem import Subsystem
  11. from pants.util.memo import memoized_method, memoized_property
  12. SourceRoot = namedtuple('_SourceRoot', ['path', 'langs'])
  13. class SourceRootFactory(object):
  14. """Creates source roots that respect language canonicalizations."""
  15. def __init__(self, lang_canonicalizations):
  16. """Creates a source root factory that enforces the given `lang_canonicalizations`.
  17. :param dict lang_canonicalizations: a mapping from language nicknames to the canonical language
  18. names the nickname could represent.
  19. """
  20. self._lang_canonicalizations = lang_canonicalizations
  21. def _canonicalize_langs(self, langs):
  22. for lang in (langs or ()):
  23. canonicalized = self._lang_canonicalizations.get(lang, (lang,))
  24. for canonical in canonicalized:
  25. yield canonical
  26. def create(self, relpath, langs):
  27. """Return a source root at the given `relpath` for the given `langs`.
  28. :returns: :class:`SourceRoot`.
  29. """
  30. return SourceRoot(relpath, tuple(self._canonicalize_langs(langs)))
  31. class SourceRoots(object):
  32. """An interface for querying source roots."""
  33. def __init__(self, source_root_config):
  34. """Create an object for querying source roots via patterns in a trie.
  35. :param source_root_config: The SourceRootConfig for the source root patterns to query against.
  36. Non-test code should not instantiate directly. See SourceRootConfig.get_source_roots().
  37. """
  38. self._trie = source_root_config.create_trie()
  39. self._source_root_factory = source_root_config.source_root_factory
  40. self._options = source_root_config.get_options()
  41. def add_source_root(self, path, langs=tuple()):
  42. """Add the specified fixed source root, which must be relative to the buildroot.
  43. Useful in a limited set of circumstances, e.g., when unpacking sources from a jar with
  44. unknown structure. Tests should prefer to use dirs that match our source root patterns
  45. instead of explicitly setting source roots here.
  46. """
  47. self._trie.add_fixed(path, langs)
  48. def find(self, target):
  49. """Find the source root for the given target, or None.
  50. :param target: Find the source root for this target.
  51. :return: A SourceRoot instance.
  52. """
  53. return self.find_by_path(target.address.spec_path)
  54. def find_by_path(self, path):
  55. """Find the source root for the given path, or None.
  56. :param path: Find the source root for this path, relative to the buildroot.
  57. :return: A SourceRoot instance, or None if the path is not located under a source root
  58. and `unmatched==fail`.
  59. """
  60. matched = self._trie.find(path)
  61. if matched:
  62. return matched
  63. elif self._options.unmatched == 'fail':
  64. return None
  65. elif self._options.unmatched == 'create':
  66. # If no source root is found, use the path directly.
  67. # TODO: Remove this logic. It should be an error to have no matching source root.
  68. return SourceRoot(path, [])
  69. def all_roots(self):
  70. """Return all known source roots.
  71. Returns a generator over (source root, list of langs) pairs.
  72. Note: Requires a directory walk to match actual directories against patterns.
  73. However we don't descend into source roots, once found, so this should be fast in practice.
  74. Note: Does not follow symlinks.
  75. """
  76. project_tree = get_project_tree(self._options)
  77. fixed_roots = set()
  78. for root, langs in self._trie.fixed():
  79. if project_tree.exists(root):
  80. yield self._source_root_factory.create(root, langs)
  81. fixed_roots.add(root)
  82. for relpath, dirnames, _ in project_tree.walk('', topdown=True):
  83. match = self._trie.find(relpath)
  84. if match:
  85. if not any(fixed_root.startswith(relpath) for fixed_root in fixed_roots):
  86. yield match # Found a source root not a prefix of any fixed roots.
  87. del dirnames[:] # Don't continue to walk into it.
  88. class SourceRootConfig(Subsystem):
  89. """Configuration for roots of source trees.
  90. We detect source roots based on a list of source root patterns. E.g., if we have src/java
  91. as a pattern then any directory that ends with src/java will be considered a source root:
  92. src/java, my/project/src/java etc.
  93. A source root may be associated with one or more 'languages'. E.g., src/java can be associated
  94. with java, and src/jvm can be associated with java and scala. Note that this is a generalized
  95. concept of 'language'. For example 'resources' is a language in this sense.
  96. We specify source roots in three ways:
  97. 1. We autoconstruct patterns by appending language names to parent dirs. E.g., for languages
  98. 'java' and 'python', and parents 'src' and 'example/src', we construct the patterns
  99. 'src/java', 'src/python', 'example/src/java' and 'example/src/python'. These are of course
  100. associated with the appropriate language.
  101. 2. We can explicitly specify a mapping from source root pattern to language(s). E.g.,
  102. {
  103. 'src/jvm': ['java', 'scala'],
  104. 'src/py': ['python']
  105. }
  106. 3. We can also bypass the pattern mechanism altogether and specify a list of fixed source roots.
  107. E.g., src/java will match just <buildroot>/src/java, and not <buildroot>/some/dir/src/java.
  108. Note that we distinguish between 'source roots' and 'test roots'. All the above holds for both.
  109. We don't currently use this distinction in a useful way, but we may in the future, and we don't
  110. want to then require everyone to modify their source root declarations, so we implement the
  111. distinction now.
  112. Note also that there's no harm in specifying source root patterns that don't exist in your repo,
  113. within reason. This means that in most cases the defaults below will be sufficient and repo
  114. owners will not need to explicitly specify source root patterns at all.
  115. """
  116. options_scope = 'source'
  117. # TODO: When we have a proper model of the concept of a language, these should really be
  118. # gathered from backends.
  119. _DEFAULT_LANG_CANONICALIZATIONS = {
  120. 'jvm': ('java', 'scala'),
  121. 'protobuf': ('proto',),
  122. 'py': ('python',)
  123. }
  124. _DEFAULT_SOURCE_ROOT_PATTERNS = [
  125. '3rdparty/*',
  126. 'src/*',
  127. 'src/main/*',
  128. ]
  129. _DEFAULT_TEST_ROOT_PATTERNS = [
  130. 'test/*',
  131. 'tests/*',
  132. 'src/test/*'
  133. ]
  134. _DEFAULT_SOURCE_ROOTS = {
  135. # Go requires some special-case handling of source roots. In particular, go buildgen assumes
  136. # that there's a single source root for local code and (optionally) a single source root
  137. # for remote code. This fixed source root shows how to capture that distinction.
  138. # Go repos may need to add their own appropriate special cases in their pants.ini, until we fix
  139. # this hack.
  140. # TODO: Treat third-party/remote code as a separate category (akin to 'source' and 'test').
  141. # Then this hack won't be necessary.
  142. '3rdparty/go': ('go_remote',),
  143. }
  144. _DEFAULT_TEST_ROOTS = {
  145. }
  146. @classmethod
  147. def register_options(cls, register):
  148. super(SourceRootConfig, cls).register_options(register)
  149. register('--unmatched', choices=['create', 'fail'], default='create', advanced=True,
  150. help='Configures the behaviour when sources are defined outside of any configured '
  151. 'source root. `create` will cause a source root to be implicitly created at '
  152. 'the definition location of the sources; `fail` will trigger an error.')
  153. register('--lang-canonicalizations', metavar='<map>', type=dict,
  154. default=cls._DEFAULT_LANG_CANONICALIZATIONS, advanced=True,
  155. help='Map of language aliases to their canonical names.')
  156. register('--source-root-patterns', metavar='<list>', type=list,
  157. default=cls._DEFAULT_SOURCE_ROOT_PATTERNS, advanced=True,
  158. help='A list of source root patterns. Use a "*" wildcard path segment to match the '
  159. 'language name, which will be canonicalized.')
  160. register('--test-root-patterns', metavar='<list>', type=list,
  161. default=cls._DEFAULT_TEST_ROOT_PATTERNS, advanced=True,
  162. help='A list of source root patterns. Use a "*" wildcard path segment to match the '
  163. 'language name, which will be canonicalized.')
  164. register('--source-roots', metavar='<map>', type=dict,
  165. default=cls._DEFAULT_SOURCE_ROOTS, advanced=True,
  166. help='A map of source roots to list of languages. Useful when you want to enumerate '
  167. 'fixed source roots explicitly, instead of relying on patterns.')
  168. register('--test-roots', metavar='<map>', type=dict,
  169. default=cls._DEFAULT_TEST_ROOTS, advanced=True,
  170. help='A map of test roots to list of languages. Useful when you want to enumerate '
  171. 'fixed test roots explicitly, instead of relying on patterns.')
  172. @memoized_method
  173. def get_source_roots(self):
  174. return SourceRoots(self)
  175. def create_trie(self):
  176. """Create a trie of source root patterns from options.
  177. :returns: :class:`SourceRootTrie`
  178. """
  179. trie = SourceRootTrie(self.source_root_factory)
  180. options = self.get_options()
  181. # Add patterns.
  182. for pattern in options.source_root_patterns or []:
  183. trie.add_pattern(pattern)
  184. for pattern in options.test_root_patterns or []:
  185. trie.add_pattern(pattern)
  186. # Now add all fixed source roots.
  187. for path, langs in (options.source_roots or {}).items():
  188. trie.add_fixed(path, langs)
  189. for path, langs in (options.test_roots or {}).items():
  190. trie.add_fixed(path, langs)
  191. return trie
  192. @memoized_property
  193. def source_root_factory(self):
  194. """Creates source roots that respects language canonicalizations.
  195. :returns: :class:`SourceRootFactory`
  196. """
  197. return SourceRootFactory(self.get_options().lang_canonicalizations)
  198. class SourceRootTrie(object):
  199. """A trie for efficiently finding the source root for a path.
  200. Finds the first outermost pattern that matches. E.g., the pattern src/* will match
  201. my/project/src/python/src/java/java.py on src/python, not on src/java.
  202. Implements fixed source roots by prepending a '^/' to them, and then prepending a '^' key to
  203. the path we're matching. E.g., ^/src/java/foo/bar will match both the fixed root ^/src/java and
  204. the pattern src/java, but ^/my/project/src/java/foo/bar will match only the pattern.
  205. """
  206. class Node(object):
  207. def __init__(self):
  208. self.children = {}
  209. self.langs = tuple()
  210. self.is_terminal = False
  211. # We need an explicit terminal flag because not all terminals are leaf nodes, e.g.,
  212. # if we have patterns src/* and src/main/* then the '*' is a terminal (for the first pattern)
  213. # but not a leaf.
  214. def get_child(self, key, langs):
  215. """Return the child node for the given key, or None if no such child.
  216. :param key: The child to return.
  217. :param langs: An output parameter which we update with any langs associated with the child.
  218. """
  219. # An exact match takes precedence over a wildcard match, to support situations such as
  220. # src/* and src/main/*.
  221. ret = self.children.get(key)
  222. if ret:
  223. langs.update(ret.langs)
  224. else:
  225. ret = self.children.get('*')
  226. if ret:
  227. langs.add(key)
  228. return ret
  229. def new_child(self, key):
  230. child = SourceRootTrie.Node()
  231. self.children[key] = child
  232. return child
  233. def subpatterns(self):
  234. if self.children:
  235. for key, child in self.children.items():
  236. for sp, langs in child.subpatterns():
  237. if sp:
  238. yield os.path.join(key, sp), langs
  239. else:
  240. yield key, langs
  241. else:
  242. yield '', self.langs
  243. def __init__(self, source_root_factory):
  244. self._source_root_factory = source_root_factory
  245. self._root = SourceRootTrie.Node()
  246. def add_pattern(self, pattern):
  247. """Add a pattern to the trie."""
  248. self._do_add_pattern(pattern, tuple())
  249. def add_fixed(self, path, langs):
  250. """Add a fixed source root to the trie."""
  251. self._do_add_pattern(os.path.join('^', path), tuple(langs))
  252. def fixed(self):
  253. """Returns a list of just the fixed source roots in the trie."""
  254. for key, child in self._root.children.items():
  255. if key == '^':
  256. return list(child.subpatterns())
  257. return []
  258. def _do_add_pattern(self, pattern, langs):
  259. keys = pattern.split(os.path.sep)
  260. node = self._root
  261. for key in keys:
  262. child = node.children.get(key) # Can't use get_child, as we don't want to wildcard-match.
  263. if not child:
  264. child = node.new_child(key)
  265. node = child
  266. node.langs = langs
  267. node.is_terminal = True
  268. def find(self, path):
  269. """Find the source root for the given path."""
  270. keys = ['^'] + path.split(os.path.sep)
  271. for i in range(len(keys)):
  272. # See if we have a match at position i. We have such a match if following the path
  273. # segments into the trie, from the root, leads us to a leaf.
  274. node = self._root
  275. langs = set()
  276. j = i
  277. while j < len(keys):
  278. child = node.get_child(keys[j], langs)
  279. if child is None:
  280. break
  281. else:
  282. node = child
  283. j += 1
  284. if node.is_terminal:
  285. return self._source_root_factory.create(os.path.join(*keys[1:j]), langs)
  286. # Otherwise, try the next value of i.
  287. return None