PageRenderTime 57ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/app/models/importers/link_resolver.rb

https://github.com/instructure/canvas-lms
Ruby | 224 lines | 173 code | 21 blank | 30 comment | 37 complexity | 5a6c62452a37c35f84011db834fdbb41 MD5 | raw file
  1. # frozen_string_literal: true
  2. #
  3. # Copyright (C) 2015 - present Instructure, Inc.
  4. #
  5. # This file is part of Canvas.
  6. #
  7. # Canvas is free software: you can redistribute it and/or modify it under
  8. # the terms of the GNU Affero General Public License as published by the Free
  9. # Software Foundation, version 3 of the License.
  10. #
  11. # Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
  12. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  13. # A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  14. # details.
  15. #
  16. # You should have received a copy of the GNU Affero General Public License along
  17. # with this program. If not, see <http://www.gnu.org/licenses/>.
  18. module Importers
  19. class LinkResolver
  20. include LinkParser::Helpers
  21. def initialize(migration)
  22. @migration = migration
  23. end
  24. def resolve_links!(link_map)
  25. link_map.each_value do |field_links|
  26. field_links.each_value do |links|
  27. links.each do |link|
  28. resolve_link!(link)
  29. end
  30. end
  31. end
  32. end
  33. # finds the :new_value to use to replace the placeholder
  34. def resolve_link!(link)
  35. case link[:link_type]
  36. when :wiki_page
  37. if (linked_wiki_url = context.wiki_pages.where(migration_id: link[:migration_id]).limit(1).pluck(:url).first)
  38. link[:new_value] = "#{context_path}/pages/#{linked_wiki_url}#{link[:query]}"
  39. end
  40. when :discussion_topic
  41. if (linked_topic_id = context.discussion_topics.where(migration_id: link[:migration_id]).limit(1).pluck(:id).first)
  42. link[:new_value] = "#{context_path}/discussion_topics/#{linked_topic_id}#{link[:query]}"
  43. end
  44. when :module_item
  45. if (tag_id = context.context_module_tags.where(migration_id: link[:migration_id]).limit(1).pluck(:id).first)
  46. link[:new_value] = "#{context_path}/modules/items/#{tag_id}#{link[:query]}"
  47. end
  48. when :object
  49. type = link[:type]
  50. migration_id = link[:migration_id]
  51. type_for_url = type
  52. type = "context_modules" if type == "modules"
  53. type = "pages" if type == "wiki"
  54. if type == "pages"
  55. query = resolve_module_item_query(context, link[:query])
  56. link[:new_value] = "#{context_path}/pages/#{migration_id}#{query}"
  57. elsif type == "attachments"
  58. if (att_id = context.attachments.where(migration_id: migration_id).limit(1).pluck(:id).first)
  59. link[:new_value] = "#{context_path}/files/#{att_id}/preview"
  60. end
  61. elsif context.respond_to?(type) && context.send(type).respond_to?(:scope)
  62. scope = context.send(type).scope
  63. if scope.klass.columns_hash["migration_id"] &&
  64. (object_id = scope.where(migration_id: migration_id).limit(1).pluck(:id).first)
  65. query = resolve_module_item_query(context, link[:query])
  66. link[:new_value] = "#{context_path}/#{type_for_url}/#{object_id}#{query}"
  67. end
  68. end
  69. when :media_object
  70. # because we actually might change the node itself
  71. # this part is a little trickier
  72. # tl;dr we've replaced the entire node with the placeholder
  73. # see LinkParser for details
  74. rel_path = link[:rel_path]
  75. node = Nokogiri::HTML5.fragment(link[:old_value]).children.first
  76. new_url = resolve_media_comment_data(node, rel_path)
  77. new_url ||= resolve_relative_file_url(rel_path)
  78. unless new_url
  79. new_url ||= missing_relative_file_url(rel_path)
  80. link[:missing_url] = new_url
  81. end
  82. if node.name == "iframe"
  83. node["src"] = new_url
  84. else
  85. node["href"] = new_url
  86. end
  87. link[:new_value] = node.to_s
  88. when :file
  89. rel_path = link[:rel_path]
  90. new_url = resolve_relative_file_url(rel_path)
  91. unless new_url
  92. new_url = missing_relative_file_url(rel_path)
  93. link[:missing_url] = new_url
  94. end
  95. link[:new_value] = new_url
  96. when :file_ref
  97. file_id = context.attachments.where(migration_id: link[:migration_id]).limit(1).pluck(:id).first
  98. if file_id
  99. rest = link[:rest].presence || "/preview"
  100. link[:new_value] = "#{context_path}/files/#{file_id}#{rest}"
  101. link[:new_value] = "/media_objects_iframe?mediahref=#{link[:new_value]}" if link[:in_media_iframe]
  102. end
  103. else
  104. raise "unrecognized link_type in unresolved link"
  105. end
  106. end
  107. def resolve_module_item_query(context, query)
  108. return query unless query&.include?("module_item_id=")
  109. original_param = query.sub("?", "").split("&").detect { |p| p.include?("module_item_id=") }
  110. mig_id = original_param.split("=").last
  111. tag = context.context_module_tags.where(migration_id: mig_id).first
  112. return query unless tag
  113. new_param = "module_item_id=#{tag.id}"
  114. query.sub(original_param, new_param)
  115. end
  116. def missing_relative_file_url(rel_path)
  117. # the rel_path should already be escaped
  118. File.join(URI.escape("#{context_path}/file_contents/#{Folder.root_folders(context).first.name}"), rel_path.gsub(" ", "%20"))
  119. end
  120. def find_file_in_context(rel_path)
  121. mig_id = nil
  122. # This is for backward-compatibility: canvas attachment filenames are escaped
  123. # with '+' for spaces and older exports have files with that instead of %20
  124. alt_rel_path = rel_path.tr("+", " ")
  125. if @migration.attachment_path_id_lookup
  126. mig_id ||= @migration.attachment_path_id_lookup[rel_path]
  127. mig_id ||= @migration.attachment_path_id_lookup[alt_rel_path]
  128. end
  129. if !mig_id && @migration.attachment_path_id_lookup_lower
  130. mig_id ||= @migration.attachment_path_id_lookup_lower[rel_path.downcase]
  131. mig_id ||= @migration.attachment_path_id_lookup_lower[alt_rel_path.downcase]
  132. end
  133. mig_id && context.attachments.where(migration_id: mig_id).first
  134. end
  135. def resolve_relative_file_url(rel_path)
  136. split = rel_path.split("?")
  137. qs = split.pop if split.length > 1
  138. path = split.join("?")
  139. # since we can't be sure whether a ? is part of a filename or query string, try it both ways
  140. new_url = resolve_relative_file_url_with_qs(path, qs)
  141. new_url ||= resolve_relative_file_url_with_qs(rel_path, "") if qs.present?
  142. new_url
  143. end
  144. def resolve_relative_file_url_with_qs(rel_path, qs)
  145. new_url = nil
  146. rel_path_parts = Pathname.new(rel_path).each_filename.to_a
  147. # e.g. start with "a/b/c.txt" then try "b/c.txt" then try "c.txt"
  148. while new_url.nil? && !rel_path_parts.empty?
  149. sub_path = File.join(rel_path_parts)
  150. if (file = find_file_in_context(sub_path))
  151. new_url = "#{context_path}/files/#{file.id}"
  152. # support other params in the query string, that were exported from the
  153. # original path components and query string. see
  154. # CCHelper::file_query_string
  155. params = Rack::Utils.parse_nested_query(qs.presence || "")
  156. qs = []
  157. new_action = ""
  158. params.each do |k, v|
  159. case k
  160. when /canvas_qs_(.*)/
  161. qs << "#{Rack::Utils.escape($1)}=#{Rack::Utils.escape(v)}"
  162. when /canvas_(.*)/
  163. new_action += "/#{$1}"
  164. end
  165. end
  166. new_url += new_action.presence || "/preview"
  167. new_url += "?#{qs.join("&")}" if qs.present?
  168. end
  169. rel_path_parts.shift
  170. end
  171. new_url
  172. end
  173. def media_iframe_url(media_id, media_type = nil)
  174. url = "/media_objects_iframe/#{media_id}"
  175. url += "?type=#{media_type}" if media_type.present?
  176. url
  177. end
  178. def resolve_media_comment_data(node, rel_path)
  179. if (file = find_file_in_context(rel_path[/^[^?]+/])) # strip query string for this search
  180. media_id = (file.media_object&.media_id || file.media_entry_id)
  181. if media_id && media_id != "maybe"
  182. if node.name == "iframe"
  183. node["data-media-id"] = media_id
  184. return media_iframe_url(media_id, node["data-media-type"])
  185. else
  186. node["id"] = "media_comment_#{media_id}"
  187. return "/media_objects/#{media_id}"
  188. end
  189. end
  190. end
  191. if node["id"] && node["id"] =~ /\Amedia_comment_(.+)\z/
  192. "/media_objects/#{$1}"
  193. elsif node["data-media-id"].present?
  194. media_iframe_url(node["data-media-id"], node["data-media-type"])
  195. else
  196. node.delete("class")
  197. node.delete("id")
  198. node.delete("style")
  199. nil
  200. end
  201. end
  202. end
  203. end