PageRenderTime 1647ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/owners.py

https://bitbucket.org/kaendfinger/depot_tools
Python | 299 lines | 283 code | 4 blank | 12 comment | 7 complexity | 110fb678454575477195c870a4c9f825 MD5 | raw file
  1. # Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2. # Use of this source code is governed by a BSD-style license that can be
  3. # found in the LICENSE file.
  4. """A database of OWNERS files.
  5. OWNERS files indicate who is allowed to approve changes in a specific directory
  6. (or who is allowed to make changes without needing approval of another OWNER).
  7. Note that all changes must still be reviewed by someone familiar with the code,
  8. so you may need approval from both an OWNER and a reviewer in many cases.
  9. The syntax of the OWNERS file is, roughly:
  10. lines := (\s* line? \s* "\n")*
  11. line := directive
  12. | "per-file" \s+ glob \s* "=" \s* directive
  13. | comment
  14. directive := "set noparent"
  15. | email_address
  16. | "*"
  17. glob := [a-zA-Z0-9_-*?]+
  18. comment := "#" [^"\n"]*
  19. Email addresses must follow the foo@bar.com short form (exact syntax given
  20. in BASIC_EMAIL_REGEXP, below). Filename globs follow the simple unix
  21. shell conventions, and relative and absolute paths are not allowed (i.e.,
  22. globs only refer to the files in the current directory).
  23. If a user's email is one of the email_addresses in the file, the user is
  24. considered an "OWNER" for all files in the directory.
  25. If the "per-file" directive is used, the line only applies to files in that
  26. directory that match the filename glob specified.
  27. If the "set noparent" directive used, then only entries in this OWNERS file
  28. apply to files in this directory; if the "set noparent" directive is not
  29. used, then entries in OWNERS files in enclosing (upper) directories also
  30. apply (up until a "set noparent is encountered").
  31. If "per-file glob=set noparent" is used, then global directives are ignored
  32. for the glob, and only the "per-file" owners are used for files matching that
  33. glob.
  34. Examples for all of these combinations can be found in tests/owners_unittest.py.
  35. """
  36. import collections
  37. import random
  38. import re
  39. # If this is present by itself on a line, this means that everyone can review.
  40. EVERYONE = '*'
  41. # Recognizes 'X@Y' email addresses. Very simplistic.
  42. BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'
  43. def _assert_is_collection(obj):
  44. assert not isinstance(obj, basestring)
  45. # Module 'collections' has no 'Iterable' member
  46. # pylint: disable=E1101
  47. if hasattr(collections, 'Iterable') and hasattr(collections, 'Sized'):
  48. assert (isinstance(obj, collections.Iterable) and
  49. isinstance(obj, collections.Sized))
  50. class SyntaxErrorInOwnersFile(Exception):
  51. def __init__(self, path, lineno, msg):
  52. super(SyntaxErrorInOwnersFile, self).__init__((path, lineno, msg))
  53. self.path = path
  54. self.lineno = lineno
  55. self.msg = msg
  56. def __str__(self):
  57. return "%s:%d syntax error: %s" % (self.path, self.lineno, self.msg)
  58. class Database(object):
  59. """A database of OWNERS files for a repository.
  60. This class allows you to find a suggested set of reviewers for a list
  61. of changed files, and see if a list of changed files is covered by a
  62. list of reviewers."""
  63. def __init__(self, root, fopen, os_path, glob):
  64. """Args:
  65. root: the path to the root of the Repository
  66. open: function callback to open a text file for reading
  67. os_path: module/object callback with fields for 'abspath', 'dirname',
  68. 'exists', and 'join'
  69. glob: function callback to list entries in a directory match a glob
  70. (i.e., glob.glob)
  71. """
  72. self.root = root
  73. self.fopen = fopen
  74. self.os_path = os_path
  75. self.glob = glob
  76. # Pick a default email regexp to use; callers can override as desired.
  77. self.email_regexp = re.compile(BASIC_EMAIL_REGEXP)
  78. # Mapping of owners to the paths they own.
  79. self.owned_by = {EVERYONE: set()}
  80. # Mapping of paths to authorized owners.
  81. self.owners_for = {}
  82. # Set of paths that stop us from looking above them for owners.
  83. # (This is implicitly true for the root directory).
  84. self.stop_looking = set([''])
  85. def reviewers_for(self, files, author):
  86. """Returns a suggested set of reviewers that will cover the files.
  87. files is a sequence of paths relative to (and under) self.root.
  88. If author is nonempty, we ensure it is not included in the set returned
  89. in order avoid suggesting the author as a reviewer for their own changes."""
  90. self._check_paths(files)
  91. self._load_data_needed_for(files)
  92. suggested_owners = self._covering_set_of_owners_for(files, author)
  93. if EVERYONE in suggested_owners:
  94. if len(suggested_owners) > 1:
  95. suggested_owners.remove(EVERYONE)
  96. else:
  97. suggested_owners = set(['<anyone>'])
  98. return suggested_owners
  99. def files_not_covered_by(self, files, reviewers):
  100. """Returns the files not owned by one of the reviewers.
  101. Args:
  102. files is a sequence of paths relative to (and under) self.root.
  103. reviewers is a sequence of strings matching self.email_regexp.
  104. """
  105. self._check_paths(files)
  106. self._check_reviewers(reviewers)
  107. self._load_data_needed_for(files)
  108. covered_objs = self._objs_covered_by(reviewers)
  109. uncovered_files = [f for f in files
  110. if not self._is_obj_covered_by(f, covered_objs)]
  111. return set(uncovered_files)
  112. def _check_paths(self, files):
  113. def _is_under(f, pfx):
  114. return self.os_path.abspath(self.os_path.join(pfx, f)).startswith(pfx)
  115. _assert_is_collection(files)
  116. assert all(not self.os_path.isabs(f) and
  117. _is_under(f, self.os_path.abspath(self.root)) for f in files)
  118. def _check_reviewers(self, reviewers):
  119. _assert_is_collection(reviewers)
  120. assert all(self.email_regexp.match(r) for r in reviewers)
  121. def _objs_covered_by(self, reviewers):
  122. objs = self.owned_by[EVERYONE]
  123. for r in reviewers:
  124. objs = objs | self.owned_by.get(r, set())
  125. return objs
  126. def _stop_looking(self, objname):
  127. return objname in self.stop_looking
  128. def _is_obj_covered_by(self, objname, covered_objs):
  129. while not objname in covered_objs and not self._stop_looking(objname):
  130. objname = self.os_path.dirname(objname)
  131. return objname in covered_objs
  132. def _enclosing_dir_with_owners(self, objname):
  133. """Returns the innermost enclosing directory that has an OWNERS file."""
  134. dirpath = objname
  135. while not dirpath in self.owners_for:
  136. if self._stop_looking(dirpath):
  137. break
  138. dirpath = self.os_path.dirname(dirpath)
  139. return dirpath
  140. def _load_data_needed_for(self, files):
  141. for f in files:
  142. dirpath = self.os_path.dirname(f)
  143. while not dirpath in self.owners_for:
  144. self._read_owners_in_dir(dirpath)
  145. if self._stop_looking(dirpath):
  146. break
  147. dirpath = self.os_path.dirname(dirpath)
  148. def _read_owners_in_dir(self, dirpath):
  149. owners_path = self.os_path.join(self.root, dirpath, 'OWNERS')
  150. if not self.os_path.exists(owners_path):
  151. return
  152. lineno = 0
  153. for line in self.fopen(owners_path):
  154. lineno += 1
  155. line = line.strip()
  156. if line.startswith('#') or line == '':
  157. continue
  158. if line == 'set noparent':
  159. self.stop_looking.add(dirpath)
  160. continue
  161. m = re.match("per-file (.+)=(.+)", line)
  162. if m:
  163. glob_string = m.group(1).strip()
  164. directive = m.group(2).strip()
  165. full_glob_string = self.os_path.join(self.root, dirpath, glob_string)
  166. if '/' in glob_string or '\\' in glob_string:
  167. raise SyntaxErrorInOwnersFile(owners_path, lineno,
  168. 'per-file globs cannot span directories or use escapes: "%s"' %
  169. line)
  170. baselines = self.glob(full_glob_string)
  171. for baseline in (self.os_path.relpath(b, self.root) for b in baselines):
  172. self._add_entry(baseline, directive, "per-file line",
  173. owners_path, lineno)
  174. continue
  175. if line.startswith('set '):
  176. raise SyntaxErrorInOwnersFile(owners_path, lineno,
  177. 'unknown option: "%s"' % line[4:].strip())
  178. self._add_entry(dirpath, line, "line", owners_path, lineno)
  179. def _add_entry(self, path, directive, line_type, owners_path, lineno):
  180. if directive == "set noparent":
  181. self.stop_looking.add(path)
  182. elif self.email_regexp.match(directive) or directive == EVERYONE:
  183. self.owned_by.setdefault(directive, set()).add(path)
  184. self.owners_for.setdefault(path, set()).add(directive)
  185. else:
  186. raise SyntaxErrorInOwnersFile(owners_path, lineno,
  187. ('%s is not a "set" directive, "*", '
  188. 'or an email address: "%s"' % (line_type, directive)))
  189. def _covering_set_of_owners_for(self, files, author):
  190. dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files)
  191. all_possible_owners = self._all_possible_owners(dirs_remaining, author)
  192. suggested_owners = set()
  193. while dirs_remaining:
  194. owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
  195. suggested_owners.add(owner)
  196. dirs_to_remove = set(el[0] for el in all_possible_owners[owner])
  197. dirs_remaining -= dirs_to_remove
  198. return suggested_owners
  199. def _all_possible_owners(self, dirs, author):
  200. """Returns a list of (potential owner, distance-from-dir) tuples; a
  201. distance of 1 is the lowest/closest possible distance (which makes the
  202. subsequent math easier)."""
  203. all_possible_owners = {}
  204. for current_dir in dirs:
  205. dirname = current_dir
  206. distance = 1
  207. while True:
  208. for owner in self.owners_for.get(dirname, []):
  209. if author and owner == author:
  210. continue
  211. all_possible_owners.setdefault(owner, [])
  212. # If the same person is in multiple OWNERS files above a given
  213. # directory, only count the closest one.
  214. if not any(current_dir == el[0] for el in all_possible_owners[owner]):
  215. all_possible_owners[owner].append((current_dir, distance))
  216. if self._stop_looking(dirname):
  217. break
  218. dirname = self.os_path.dirname(dirname)
  219. distance += 1
  220. return all_possible_owners
  221. @staticmethod
  222. def lowest_cost_owner(all_possible_owners, dirs):
  223. # We want to minimize both the number of reviewers and the distance
  224. # from the files/dirs needing reviews. The "pow(X, 1.75)" below is
  225. # an arbitrarily-selected scaling factor that seems to work well - it
  226. # will select one reviewer in the parent directory over three reviewers
  227. # in subdirs, but not one reviewer over just two.
  228. total_costs_by_owner = {}
  229. for owner in all_possible_owners:
  230. total_distance = 0
  231. num_directories_owned = 0
  232. for dirname, distance in all_possible_owners[owner]:
  233. if dirname in dirs:
  234. total_distance += distance
  235. num_directories_owned += 1
  236. if num_directories_owned:
  237. total_costs_by_owner[owner] = (total_distance /
  238. pow(num_directories_owned, 1.75))
  239. # Return the lowest cost owner. In the case of a tie, pick one randomly.
  240. lowest_cost = min(total_costs_by_owner.itervalues())
  241. lowest_cost_owners = filter(
  242. lambda owner: total_costs_by_owner[owner] == lowest_cost,
  243. total_costs_by_owner)
  244. return random.Random().choice(lowest_cost_owners)