PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/app/models/namespaces/traversal/linear.rb

https://gitlab.com/523/gitlab-ce
Ruby | 267 lines | 145 code | 55 blank | 67 comment | 13 complexity | 791ca3fe3f5aac59f19b796bc8e98820 MD5 | raw file
  1. # frozen_string_literal: true
  2. #
  3. # Query a recursively defined namespace hierarchy using linear methods through
  4. # the traversal_ids attribute.
  5. #
  6. # Namespace is a nested hierarchy of one parent to many children. A search
  7. # using only the parent-child relationships is a slow operation. This process
  8. # was previously optimized using Postgresql recursive common table expressions
  9. # (CTE) with acceptable performance. However, it lead to slower than possible
  10. # performance, and resulted in complicated queries that were difficult to make
  11. # performant.
  12. #
  13. # Instead of searching the hierarchy recursively, we store a `traversal_ids`
  14. # attribute on each node. The `traversal_ids` is an ordered array of Namespace
  15. # IDs that define the traversal path from the root Namespace to the current
  16. # Namespace.
  17. #
  18. # For example, suppose we have the following Namespaces:
  19. #
  20. # GitLab (id: 1) > Engineering (id: 2) > Manage (id: 3) > Access (id: 4)
  21. #
  22. # Then `traversal_ids` for group "Access" is [1, 2, 3, 4]
  23. #
  24. # And we can match against other Namespace `traversal_ids` such that:
  25. #
  26. # - Ancestors are [1], [1, 2], [1, 2, 3]
  27. # - Descendants are [1, 2, 3, 4, *]
  28. # - Root is [1]
  29. # - Hierarchy is [1, *]
  30. #
  31. # Note that this search method works so long as the IDs are unique and the
  32. # traversal path is ordered from root to leaf nodes.
  33. #
  34. # We implement this in the database using Postgresql arrays, indexed by a
  35. # generalized inverted index (gin).
  36. module Namespaces
  37. module Traversal
  38. module Linear
  39. extend ActiveSupport::Concern
  40. include LinearScopes
  41. UnboundedSearch = Class.new(StandardError)
  42. included do
  43. before_update :lock_both_roots, if: -> { parent_id_changed? }
  44. after_update :sync_traversal_ids, if: -> { saved_change_to_parent_id? }
  45. # This uses rails internal before_commit API to sync traversal_ids on namespace create, right before transaction is committed.
  46. # This helps reduce the time during which the root namespace record is locked to ensure updated traversal_ids are valid
  47. before_commit :sync_traversal_ids, on: [:create]
  48. end
  49. class_methods do
  50. # This method looks into a list of namespaces trying to optimise a returned traversal_ids
  51. # into a list of shortest prefixes, due to fact that the shortest prefixes include all childrens.
  52. # Example:
  53. # INPUT: [[4909902], [4909902,51065789], [4909902,51065793], [7135830], [15599674, 1], [15599674, 1, 3], [15599674, 2]]
  54. # RESULT: [[4909902], [7135830], [15599674, 1], [15599674, 2]]
  55. def shortest_traversal_ids_prefixes
  56. raise ArgumentError, 'Feature not supported since the `:use_traversal_ids` is disabled' unless use_traversal_ids?
  57. prefixes = []
  58. # The array needs to be sorted (O(nlogn)) to ensure shortest elements are always first
  59. # This allows to do O(n) search of shortest prefixes
  60. all_traversal_ids = all.order('namespaces.traversal_ids').pluck('namespaces.traversal_ids')
  61. last_prefix = [nil]
  62. all_traversal_ids.each do |traversal_ids|
  63. next if last_prefix == traversal_ids[0..(last_prefix.count - 1)]
  64. last_prefix = traversal_ids
  65. prefixes << traversal_ids
  66. end
  67. prefixes
  68. end
  69. end
  70. def use_traversal_ids?
  71. return false unless Feature.enabled?(:use_traversal_ids)
  72. traversal_ids.present?
  73. end
  74. def use_traversal_ids_for_self_and_hierarchy?
  75. return false unless use_traversal_ids?
  76. return false unless Feature.enabled?(:use_traversal_ids_for_self_and_hierarchy, root_ancestor)
  77. traversal_ids.present?
  78. end
  79. def use_traversal_ids_for_ancestors?
  80. return false unless use_traversal_ids?
  81. return false unless Feature.enabled?(:use_traversal_ids_for_ancestors, root_ancestor)
  82. traversal_ids.present?
  83. end
  84. def use_traversal_ids_for_ancestors_upto?
  85. return false unless use_traversal_ids?
  86. return false unless Feature.enabled?(:use_traversal_ids_for_ancestors_upto, root_ancestor)
  87. traversal_ids.present?
  88. end
  89. def use_traversal_ids_for_root_ancestor?
  90. return false unless Feature.enabled?(:use_traversal_ids_for_root_ancestor)
  91. traversal_ids.present?
  92. end
  93. def root_ancestor
  94. return super unless use_traversal_ids_for_root_ancestor?
  95. strong_memoize(:root_ancestor) do
  96. if parent_id.nil?
  97. self
  98. else
  99. Namespace.find_by(id: traversal_ids.first)
  100. end
  101. end
  102. end
  103. def self_and_descendants
  104. return super unless use_traversal_ids?
  105. lineage(top: self)
  106. end
  107. def self_and_descendant_ids
  108. return super unless use_traversal_ids?
  109. self_and_descendants.as_ids
  110. end
  111. def descendants
  112. return super unless use_traversal_ids?
  113. self_and_descendants.where.not(id: id)
  114. end
  115. def self_and_hierarchy
  116. return super unless use_traversal_ids_for_self_and_hierarchy?
  117. self_and_descendants.or(ancestors)
  118. end
  119. def ancestors(hierarchy_order: nil)
  120. return super unless use_traversal_ids_for_ancestors?
  121. return self.class.none if parent_id.blank?
  122. lineage(bottom: parent, hierarchy_order: hierarchy_order)
  123. end
  124. def ancestor_ids(hierarchy_order: nil)
  125. return super unless use_traversal_ids_for_ancestors?
  126. hierarchy_order == :desc ? traversal_ids[0..-2] : traversal_ids[0..-2].reverse
  127. end
  128. # Returns all ancestors upto but excluding the top.
  129. # When no top is given, all ancestors are returned.
  130. # When top is not found, returns all ancestors.
  131. #
  132. # This copies the behavior of the recursive method. We will deprecate
  133. # this behavior soon.
  134. def ancestors_upto(top = nil, hierarchy_order: nil)
  135. return super unless use_traversal_ids_for_ancestors_upto?
  136. # We can't use a default value in the method definition above because
  137. # we need to preserve those specific parameters for super.
  138. hierarchy_order ||= :desc
  139. # Get all ancestor IDs inclusively between top and our parent.
  140. top_index = top ? traversal_ids.find_index(top.id) : 0
  141. ids = traversal_ids[top_index...-1]
  142. ids_string = ids.map { |id| Integer(id) }.join(',')
  143. # WITH ORDINALITY lets us order the result to match traversal_ids order.
  144. from_sql = <<~SQL
  145. unnest(ARRAY[#{ids_string}]::bigint[]) WITH ORDINALITY AS ancestors(id, ord)
  146. INNER JOIN namespaces ON namespaces.id = ancestors.id
  147. SQL
  148. self.class
  149. .from(Arel.sql(from_sql))
  150. .order('ancestors.ord': hierarchy_order)
  151. end
  152. def self_and_ancestors(hierarchy_order: nil)
  153. return super unless use_traversal_ids_for_ancestors?
  154. return self.class.where(id: id) if parent_id.blank?
  155. lineage(bottom: self, hierarchy_order: hierarchy_order)
  156. end
  157. def self_and_ancestor_ids(hierarchy_order: nil)
  158. return super unless use_traversal_ids_for_ancestors?
  159. hierarchy_order == :desc ? traversal_ids : traversal_ids.reverse
  160. end
  161. private
  162. # Update the traversal_ids for the full hierarchy.
  163. #
  164. # NOTE: self.traversal_ids will be stale. Reload for a fresh record.
  165. def sync_traversal_ids
  166. # Clear any previously memoized root_ancestor as our ancestors have changed.
  167. clear_memoization(:root_ancestor)
  168. Namespace::TraversalHierarchy.for_namespace(self).sync_traversal_ids!
  169. end
  170. # Lock the root of the hierarchy we just left, and lock the root of the hierarchy
  171. # we just joined. In most cases the two hierarchies will be the same.
  172. def lock_both_roots
  173. parent_ids = [
  174. parent_id_was || self.id,
  175. parent_id || self.id
  176. ].compact
  177. roots = Gitlab::ObjectHierarchy
  178. .new(Namespace.where(id: parent_ids))
  179. .base_and_ancestors
  180. .reorder(nil)
  181. .where(parent_id: nil)
  182. Namespace.lock.select(:id).where(id: roots).order(id: :asc).load
  183. end
  184. # Search this namespace's lineage. Bound inclusively by top node.
  185. def lineage(top: nil, bottom: nil, hierarchy_order: nil)
  186. raise UnboundedSearch, 'Must bound search by either top or bottom' unless top || bottom
  187. skope = self.class
  188. if top
  189. skope = skope.where("traversal_ids @> ('{?}')", top.id)
  190. end
  191. if bottom
  192. skope = skope.where(id: bottom.traversal_ids)
  193. end
  194. # The original `with_depth` attribute in ObjectHierarchy increments as you
  195. # walk away from the "base" namespace. This direction changes depending on
  196. # if you are walking up the ancestors or down the descendants.
  197. if hierarchy_order
  198. depth_sql = "ABS(#{traversal_ids.count} - array_length(traversal_ids, 1))"
  199. skope = skope.select(skope.default_select_columns, "#{depth_sql} as depth")
  200. # The SELECT includes an extra depth attribute. We wrap the SQL in a
  201. # standard SELECT to avoid mismatched attribute errors when trying to
  202. # chain future ActiveRelation commands, and retain the ordering.
  203. skope = self.class
  204. .from(skope, self.class.table_name)
  205. .select(skope.arel_table[Arel.star])
  206. .order(depth: hierarchy_order)
  207. end
  208. skope
  209. end
  210. end
  211. end
  212. end