PageRenderTime 53ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/gitlab/background_migration/populate_untracked_uploads.rb

https://gitlab.com/griest/gitlab-ce
Ruby | 259 lines | 197 code | 41 blank | 21 comment | 5 complexity | 41de98ba9cc6050cb1d0740325256493 MD5 | raw file
  1. # frozen_string_literal: true
  2. module Gitlab
  3. module BackgroundMigration
  4. # This class processes a batch of rows in `untracked_files_for_uploads` by
  5. # adding each file to the `uploads` table if it does not exist.
  6. class PopulateUntrackedUploads # rubocop:disable Metrics/ClassLength
  7. # This class is responsible for producing the attributes necessary to
  8. # track an uploaded file in the `uploads` table.
  9. class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength
  10. self.table_name = 'untracked_files_for_uploads'
  11. # Ends with /:random_hex/:filename
  12. FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z}
  13. FULL_PATH_CAPTURE = /\A(.+)#{FILE_UPLOADER_PATH}/
  14. # These regex patterns are tested against a relative path, relative to
  15. # the upload directory.
  16. # For convenience, if there exists a capture group in the pattern, then
  17. # it indicates the model_id.
  18. PATH_PATTERNS = [
  19. {
  20. pattern: %r{\A-/system/appearance/logo/(\d+)/},
  21. uploader: 'AttachmentUploader',
  22. model_type: 'Appearance'
  23. },
  24. {
  25. pattern: %r{\A-/system/appearance/header_logo/(\d+)/},
  26. uploader: 'AttachmentUploader',
  27. model_type: 'Appearance'
  28. },
  29. {
  30. pattern: %r{\A-/system/note/attachment/(\d+)/},
  31. uploader: 'AttachmentUploader',
  32. model_type: 'Note'
  33. },
  34. {
  35. pattern: %r{\A-/system/user/avatar/(\d+)/},
  36. uploader: 'AvatarUploader',
  37. model_type: 'User'
  38. },
  39. {
  40. pattern: %r{\A-/system/group/avatar/(\d+)/},
  41. uploader: 'AvatarUploader',
  42. model_type: 'Namespace'
  43. },
  44. {
  45. pattern: %r{\A-/system/project/avatar/(\d+)/},
  46. uploader: 'AvatarUploader',
  47. model_type: 'Project'
  48. },
  49. {
  50. pattern: FILE_UPLOADER_PATH,
  51. uploader: 'FileUploader',
  52. model_type: 'Project'
  53. }
  54. ].freeze
  55. def to_h
  56. @upload_hash ||= {
  57. path: upload_path,
  58. uploader: uploader,
  59. model_type: model_type,
  60. model_id: model_id,
  61. size: file_size,
  62. checksum: checksum
  63. }
  64. end
  65. def upload_path
  66. # UntrackedFile#path is absolute, but Upload#path depends on uploader
  67. @upload_path ||=
  68. if uploader == 'FileUploader'
  69. # Path relative to project directory in uploads
  70. matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH)
  71. matchd[0].sub(%r{\A/}, '') # remove leading slash
  72. else
  73. path
  74. end
  75. end
  76. def uploader
  77. matching_pattern_map[:uploader]
  78. end
  79. def model_type
  80. matching_pattern_map[:model_type]
  81. end
  82. def model_id
  83. return @model_id if defined?(@model_id)
  84. pattern = matching_pattern_map[:pattern]
  85. matchd = path_relative_to_upload_dir.match(pattern)
  86. # If something is captured (matchd[1] is not nil), it is a model_id
  87. # Only the FileUploader pattern will not match an ID
  88. @model_id = matchd[1] ? matchd[1].to_i : file_uploader_model_id
  89. end
  90. def file_size
  91. File.size(absolute_path)
  92. end
  93. def checksum
  94. Digest::SHA256.file(absolute_path).hexdigest
  95. end
  96. private
  97. def matching_pattern_map
  98. @matching_pattern_map ||= PATH_PATTERNS.find do |path_pattern_map|
  99. path_relative_to_upload_dir.match(path_pattern_map[:pattern])
  100. end
  101. unless @matching_pattern_map
  102. raise "Unknown upload path pattern \"#{path}\""
  103. end
  104. @matching_pattern_map
  105. end
  106. def file_uploader_model_id
  107. matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE)
  108. not_found_msg = <<~MSG
  109. Could not capture project full_path from a FileUploader path:
  110. "#{path_relative_to_upload_dir}"
  111. MSG
  112. raise not_found_msg unless matchd
  113. full_path = matchd[1]
  114. project = Project.find_by_full_path(full_path)
  115. return nil unless project
  116. project.id
  117. end
  118. # Not including a leading slash
  119. def path_relative_to_upload_dir
  120. upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength
  121. base = %r{\A#{Regexp.escape(upload_dir)}/}
  122. @path_relative_to_upload_dir ||= path.sub(base, '')
  123. end
  124. def absolute_path
  125. File.join(Gitlab.config.uploads.storage_path, path)
  126. end
  127. end
  128. # This class is used to query the `uploads` table.
  129. class Upload < ActiveRecord::Base
  130. self.table_name = 'uploads'
  131. end
  132. def perform(start_id, end_id)
  133. return unless migrate?
  134. files = UntrackedFile.where(id: start_id..end_id)
  135. processed_files = insert_uploads_if_needed(files)
  136. processed_files.delete_all
  137. drop_temp_table_if_finished
  138. end
  139. private
  140. def migrate?
  141. UntrackedFile.table_exists? && Upload.table_exists?
  142. end
  143. def insert_uploads_if_needed(files)
  144. filtered_files, error_files = filter_error_files(files)
  145. filtered_files = filter_existing_uploads(filtered_files)
  146. filtered_files = filter_deleted_models(filtered_files)
  147. insert(filtered_files)
  148. processed_files = files.where.not(id: error_files.map(&:id))
  149. processed_files
  150. end
  151. def filter_error_files(files)
  152. files.partition do |file|
  153. begin
  154. file.to_h
  155. true
  156. rescue => e
  157. msg = <<~MSG
  158. Error parsing path "#{file.path}":
  159. #{e.message}
  160. #{e.backtrace.join("\n ")}
  161. MSG
  162. Rails.logger.error(msg)
  163. false
  164. end
  165. end
  166. end
  167. def filter_existing_uploads(files)
  168. paths = files.map(&:upload_path)
  169. existing_paths = Upload.where(path: paths).pluck(:path).to_set
  170. files.reject do |file|
  171. existing_paths.include?(file.upload_path)
  172. end
  173. end
  174. # There are files on disk that are not in the uploads table because their
  175. # model was deleted, and we don't delete the files on disk.
  176. def filter_deleted_models(files)
  177. ids = deleted_model_ids(files)
  178. files.reject do |file|
  179. ids[file.model_type].include?(file.model_id)
  180. end
  181. end
  182. def deleted_model_ids(files)
  183. ids = {
  184. 'Appearance' => [],
  185. 'Namespace' => [],
  186. 'Note' => [],
  187. 'Project' => [],
  188. 'User' => []
  189. }
  190. # group model IDs by model type
  191. files.each do |file|
  192. ids[file.model_type] << file.model_id
  193. end
  194. ids.each do |model_type, model_ids|
  195. model_class = Object.const_get(model_type)
  196. found_ids = model_class.where(id: model_ids.uniq).pluck(:id)
  197. deleted_ids = ids[model_type] - found_ids
  198. ids[model_type] = deleted_ids
  199. end
  200. ids
  201. end
  202. def insert(files)
  203. rows = files.map do |file|
  204. file.to_h.merge(created_at: 'NOW()')
  205. end
  206. Gitlab::Database.bulk_insert('uploads',
  207. rows,
  208. disable_quote: :created_at)
  209. end
  210. def drop_temp_table_if_finished
  211. if UntrackedFile.all.empty?
  212. UntrackedFile.connection.drop_table(:untracked_files_for_uploads,
  213. if_exists: true)
  214. end
  215. end
  216. end
  217. end
  218. end