/src/Phan/Ordering.php

https://github.com/phan/phan · PHP · 167 lines · 85 code · 25 blank · 57 comment · 7 complexity · f9d71a71b0141038d89283e1933be48e MD5 · raw file

  1. <?php
  2. declare(strict_types=1);
  3. namespace Phan;
  4. use InvalidArgumentException;
  5. use Phan\Library\Hasher\Consistent;
  6. use Phan\Library\Hasher\Sequential;
  7. /**
  8. * This determines the order in which files will be analyzed.
  9. * Affected by `consistent_hashing_file_order` and `randomize_file_order`.
  10. * By default, files are analyzed in the same order as `.phan/config.php`
  11. */
  12. class Ordering
  13. {
  14. /**
  15. * @var CodeBase
  16. * The entire code base. Used to choose a file analysis ordering.
  17. */
  18. private $code_base;
  19. /**
  20. * @param CodeBase $code_base
  21. * The entire code base. Used to choose a file analysis ordering.
  22. */
  23. public function __construct(CodeBase $code_base)
  24. {
  25. $this->code_base = $code_base;
  26. }
  27. /**
  28. * @param int $process_count
  29. * The number of processes we'd like to divide work up
  30. * amongst.
  31. *
  32. * @param list<string> $analysis_file_list
  33. * A list of files that should be analyzed which will be
  34. * used to ignore any files outside of the list and to
  35. * draw from for any missing files.
  36. *
  37. * @return associative-array<int,list<string>>
  38. * A map from process_id to a list of files to be analyzed
  39. * on that process in stable ordering.
  40. * @throws InvalidArgumentException if $process_count isn't positive.
  41. */
  42. public function orderForProcessCount(
  43. int $process_count,
  44. array $analysis_file_list
  45. ): array {
  46. if ($process_count <= 0) {
  47. throw new InvalidArgumentException("The process count must be greater than zero.");
  48. }
  49. if (Config::getValue('randomize_file_order')) {
  50. $random_proc_file_map = [];
  51. \shuffle($analysis_file_list);
  52. foreach ($analysis_file_list as $i => $file) {
  53. $random_proc_file_map[$i % $process_count][] = $file;
  54. }
  55. return $random_proc_file_map;
  56. }
  57. // Construct a Hasher implementation based on config.
  58. if (Config::getValue('consistent_hashing_file_order')) {
  59. \sort($analysis_file_list, \SORT_STRING);
  60. $hasher = new Consistent($process_count);
  61. } else {
  62. $hasher = new Sequential($process_count);
  63. }
  64. // Create a Set from the file list
  65. $analysis_file_map = [];
  66. foreach ($analysis_file_list as $file) {
  67. $analysis_file_map[$file] = true;
  68. }
  69. // A map from the root of an object hierarchy to all
  70. // elements within that hierarchy
  71. $root_fqsen_list = [];
  72. $file_names_for_classes = [];
  73. // Iterate over each class extracting files
  74. foreach ($this->code_base->getUserDefinedClassMap() as $class) {
  75. // Get the name of the file associated with the class
  76. $file_name = $class->getContext()->getFile();
  77. // Ignore any files that are not to be analyzed
  78. if (!isset($analysis_file_map[$file_name])) {
  79. continue;
  80. }
  81. unset($analysis_file_map[$file_name]);
  82. $file_names_for_classes[$file_name] = $class;
  83. }
  84. if (Config::getValue('consistent_hashing_file_order')) {
  85. \ksort($file_names_for_classes, \SORT_STRING);
  86. }
  87. foreach ($file_names_for_classes as $file_name => $class) {
  88. // Get the class's depth in its object hierarchy and
  89. // the FQSEN of the object at the root of its hierarchy
  90. $hierarchy_depth = $class->getHierarchyDepth($this->code_base);
  91. $hierarchy_root = $class->getHierarchyRootFQSEN($this->code_base);
  92. // Create a bucket for this root if it doesn't exist
  93. if (!isset($root_fqsen_list[(string)$hierarchy_root])) {
  94. $root_fqsen_list[(string)$hierarchy_root] = [];
  95. }
  96. // Append this {file,depth} pair to the hierarchy
  97. // root
  98. $root_fqsen_list[(string)$hierarchy_root][] = [
  99. 'file' => $file_name,
  100. 'depth' => $hierarchy_depth,
  101. ];
  102. }
  103. // Create a map from processor_id to the list of files
  104. // to be analyzed on that processor
  105. $processor_file_list_map = [];
  106. // Sort the set of files with a given root by their
  107. // depth in the hierarchy
  108. foreach ($root_fqsen_list as $root_fqsen => $list) {
  109. \usort(
  110. $list,
  111. /**
  112. * Sort first by depth, and break ties by file name lexicographically
  113. * (usort is not a stable sort).
  114. * @param array{depth:int,file:string} $a
  115. * @param array{depth:int,file:string} $b
  116. */
  117. static function (array $a, array $b): int {
  118. return ($a['depth'] <=> $b['depth']) ?:
  119. \strcmp($a['file'], $b['file']);
  120. }
  121. );
  122. // Choose which process this file list will be
  123. // run on
  124. $process_id = $hasher->getGroup((string)$root_fqsen);
  125. // Append each file to this process list
  126. foreach ($list as $item) {
  127. $processor_file_list_map[$process_id][] = (string)$item['file'];
  128. }
  129. }
  130. // Distribute any remaining files without classes evenly
  131. // between the processes
  132. $hasher->reset();
  133. foreach (\array_keys($analysis_file_map) as $file) {
  134. // Choose which process this file list will be
  135. // run on
  136. $file = (string)$file;
  137. $process_id = $hasher->getGroup($file);
  138. $processor_file_list_map[$process_id][] = $file;
  139. }
  140. return $processor_file_list_map;
  141. }
  142. }