PageRenderTime 28ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/sis/sis_csv.rb

https://github.com/kidakaka/canvas-lms
Ruby | 389 lines | 319 code | 40 blank | 30 comment | 53 complexity | 221a73d3a451ee783b3f0b7a5b284498 MD5 | raw file
  1. #
  2. # Copyright (C) 2011 Instructure, Inc.
  3. #
  4. # This file is part of Canvas.
  5. #
  6. # Canvas is free software: you can redistribute it and/or modify it under
  7. # the terms of the GNU Affero General Public License as published by the Free
  8. # Software Foundation, version 3 of the License.
  9. #
  10. # Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
  11. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. # A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. # details.
  14. #
  15. # You should have received a copy of the GNU Affero General Public License along
  16. # with this program. If not, see <http://www.gnu.org/licenses/>.
  17. #
  18. require 'faster_csv'
  19. require 'zip/zip'
  20. module SIS
  21. class SisCsv
  22. attr_accessor :verify, :root_account, :batch, :errors, :warnings, :finished, :counts, :updates_every
  23. IGNORE_FILES = /__macosx|desktop d[bf]|\A\..*/i
  24. # The order of this array is important:
  25. # * Account must be imported before Term and Course
  26. # * Course must be imported before Section
  27. # * Course and Section must be imported before Xlist
  28. # * Course, Section, and User must be imported before Enrollment
  29. IMPORTERS = [:account, :term, :abstract_course, :course, :section, :xlist, :user, :enrollment, :group, :group_membership, :grade_publishing_results]
  30. def initialize(root_account, opts = {})
  31. opts = opts.with_indifferent_access
  32. @root_account = root_account
  33. @csvs = {}
  34. IMPORTERS.each { |importer| @csvs[importer] = [] }
  35. @rows = {}
  36. IMPORTERS.each { |importer| @rows[importer] = 0 }
  37. @headers = {}
  38. IMPORTERS.each { |importer| @headers[importer] = Set.new }
  39. @files = opts[:files] || []
  40. @batch = opts[:batch]
  41. @logger = opts[:logger]
  42. @counts = {}
  43. IMPORTERS.each { |importer| @counts[importer.to_s.pluralize.to_sym] = 0 }
  44. @total_rows = 1
  45. @current_row = 0
  46. @rows_since_progress_update = 0
  47. @progress_multiplier = opts[:progress_multiplier] || 1
  48. @progress_offset = opts[:progress_offset] || 0
  49. @errors = []
  50. @warnings = []
  51. @pending = false
  52. @finished = false
  53. settings = PluginSetting.settings_for_plugin('sis_import')
  54. @allow_printing = opts[:allow_printing].nil? ? true : opts[:allow_printing]
  55. @parallelism = opts[:parallelism]
  56. @parallelism ||= settings[:parallelism].to_i
  57. @parallelism = 1 if @parallelism < 1
  58. @parallelism = 1 unless @batch
  59. @minimum_rows_for_parallel = settings[:minimum_rows_for_parallel].to_i
  60. @minimum_rows_for_parallel = 1000 if @minimum_rows_for_parallel < 1
  61. @parallel_queue = settings[:queue_for_parallel_jobs]
  62. @parallel_queue = nil if @parallel_queue.blank?
  63. update_pause_vars
  64. end
  65. def self.process(root_account, opts = {})
  66. importer = SisCsv.new(root_account, opts)
  67. importer.process
  68. importer
  69. end
  70. def process
  71. @tmp_dirs = []
  72. @files.each do |file|
  73. if File.file?(file)
  74. if File.extname(file).downcase == '.zip'
  75. tmp_dir = Dir.mktmpdir
  76. @tmp_dirs << tmp_dir
  77. unzip_file(file, tmp_dir)
  78. Dir[File.join(tmp_dir, "**/**")].each do |fn|
  79. process_file(tmp_dir, fn[tmp_dir.size+1 .. -1])
  80. end
  81. elsif File.extname(file).downcase == '.csv'
  82. process_file(File.dirname(file), File.basename(file))
  83. end
  84. end
  85. end
  86. @files = nil
  87. IMPORTERS.each do |importer|
  88. @csvs[importer].each do |csv|
  89. rows = (%x{wc -l '#{csv[:fullpath]}'}.split.first.to_i rescue 0)
  90. @rows[importer] += rows
  91. @total_rows += rows
  92. end
  93. end
  94. @parallelism = 1 if @total_rows <= @minimum_rows_for_parallel
  95. @verify = {}
  96. IMPORTERS.each do |importer|
  97. importerObject = SIS.const_get(importer.to_s.camelcase + 'Importer').new(self)
  98. @csvs[importer].each { |csv| importerObject.verify(csv, @verify) }
  99. @verify[:user_rows] = nil if importer == :user
  100. end
  101. @verify = nil
  102. return unless @errors.empty?
  103. # calculate how often we should update progress to get 1% resolution
  104. # but don't leave us hanging for more than 500 rows at a time
  105. # and don't do it more often than we have work to do
  106. @updates_every = [ [ @total_rows / @parallelism / 100, 500 ].min, 10 ].max
  107. if (@parallelism > 1)
  108. # re-balance the CSVs
  109. @batch.data[:importers] = {}
  110. IMPORTERS.each do |importer|
  111. if (importer != :account)
  112. rebalance_csvs(importer)
  113. end
  114. @batch.data[:importers][importer] = @csvs[importer].length
  115. end
  116. @batch.save!
  117. @rows = nil
  118. @headers = nil
  119. run_next_importer(IMPORTERS.first)
  120. @batch.reload
  121. while @batch.workflow_state.to_sym == :importing
  122. sleep(0.5)
  123. @batch.reload
  124. end
  125. @finished = [:imported, :imported_with_messages].include?(@batch.workflow_state.to_sym)
  126. else
  127. IMPORTERS.each do |importer|
  128. importerObject = SIS.const_get(importer.to_s.camelcase + 'Importer').new(self)
  129. @csvs[importer].each { |csv| importerObject.process(csv) }
  130. end
  131. @finished = true
  132. end
  133. rescue => e
  134. if @batch
  135. error_report = ErrorReport.log_exception(:sis_import, e,
  136. :message => "Importing CSV for account: #{@root_account.id} (#{@root_account.name}) sis_batch_id: #{@batch.id}: #{e.to_s}",
  137. :during_tests => false
  138. )
  139. add_error(nil, "Error while importing CSV. Please contact support. (Error report #{error_report.id})")
  140. else
  141. add_error(nil, "#{e.message}\n#{e.backtrace.join "\n"}")
  142. raise e
  143. end
  144. ensure
  145. @tmp_dirs.each do |tmp_dir|
  146. FileUtils.rm_rf(tmp_dir, :secure => true) if File.directory?(tmp_dir)
  147. end
  148. if @batch && @parallelism == 1
  149. @batch.data[:counts] = @counts
  150. @batch.processing_errors = @errors
  151. @batch.processing_warnings = @warnings
  152. @batch.save
  153. end
  154. if @allow_printing and !@errors.empty? and !@batch
  155. # If there's no batch, then we must be working via the console and we should just error out
  156. @errors.each { |w| puts w.join ": " }
  157. end
  158. end
  159. def logger
  160. @logger ||= Rails.logger
  161. end
  162. def add_error(csv, message)
  163. @errors << [ csv ? csv[:file] : "", message ]
  164. end
  165. def add_warning(csv, message)
  166. @warnings << [ csv ? csv[:file] : "", message ]
  167. end
  168. def update_progress(count = 1)
  169. @current_row += count
  170. return unless @batch
  171. @rows_since_progress_update += count
  172. if @rows_since_progress_update >= @updates_every
  173. if @parallelism > 1
  174. SisBatch.transaction do
  175. @batch.reload(:select => 'data, progress', :lock => true)
  176. @current_row += @batch.data[:current_row] if @batch.data[:current_row]
  177. @batch.data[:current_row] = @current_row
  178. @batch.progress = (((@current_row.to_f/@total_rows) * @progress_multiplier) + @progress_offset) * 100
  179. @batch.save
  180. @current_row = 0
  181. @rows_since_progress_update = 0
  182. end
  183. else
  184. @batch.fast_update_progress( (((@current_row.to_f/@total_rows) * @progress_multiplier) + @progress_offset) * 100)
  185. end
  186. end
  187. if @current_row.to_i % @pause_every == 0
  188. sleep(@pause_duration)
  189. update_pause_vars
  190. end
  191. end
  192. def run_single_importer(importer, csv)
  193. begin
  194. importerObject = SIS.const_get(importer.to_s.camelcase + 'Importer').new(self)
  195. if csv[:attachment]
  196. file = csv[:attachment].open
  197. csv[:fullpath] = file.path
  198. end
  199. importerObject.process(csv)
  200. run_next_importer(IMPORTERS[IMPORTERS.index(importer) + 1]) if complete_importer(importer)
  201. rescue => e
  202. error_report = ErrorReport.log_exception(:sis_import, e,
  203. :message => "Importing CSV for account: #{@root_account.id} (#{@root_account.name}) sis_batch_id: #{@batch.id}: #{e.to_s}",
  204. :during_tests => false
  205. )
  206. add_error(nil, "Error while importing CSV. Please contact support. (Error report #{error_report.id})")
  207. @batch.processing_errors ||= []
  208. @batch.processing_warnings ||= []
  209. @batch.processing_errors.concat(@errors)
  210. @batch.processing_warnings.concat(@warnings)
  211. @batch.workflow_state = :failed_with_messages
  212. @batch.save!
  213. ensure
  214. file.close if file
  215. end
  216. end
  217. private
  218. def run_next_importer(importer)
  219. return finish if importer.nil?
  220. return run_next_importer(IMPORTERS[IMPORTERS.index(importer) + 1]) if @csvs[importer].empty?
  221. if (importer == :account)
  222. @csvs[importer].each { |csv| run_single_importer(importer, csv) }
  223. return
  224. end
  225. # logger doesn't serialize well
  226. @logger = nil
  227. @csvs[importer].each { |csv| self.send_later_enqueue_args(:run_single_importer, { :queue => @queue, :priority => Delayed::LOW_PRIORITY }, importer, csv) }
  228. end
  229. def complete_importer(importer)
  230. return unless @batch
  231. SisBatch.transaction do
  232. @batch.reload(:lock => true)
  233. @batch.data[:importers][importer] -= 1
  234. @batch.data[:counts] ||= {}
  235. @counts.each do |k, v|
  236. @batch.data[:counts][k] ||= 0
  237. @batch.data[:counts][k] += v
  238. @counts[k] = 0
  239. end
  240. @current_row += @batch.data[:current_row] if @batch.data[:current_row]
  241. @batch.data[:current_row] = @current_row
  242. @batch.progress = (((@current_row.to_f/@total_rows) * @progress_multiplier) + @progress_offset) * 100
  243. @batch.processing_errors ||= []
  244. @batch.processing_warnings ||= []
  245. @batch.processing_errors.concat(@errors)
  246. @batch.processing_warnings.concat(@warnings)
  247. @current_row = 0
  248. @batch.save
  249. return @batch.data[:importers][importer] == 0
  250. end
  251. end
  252. def finish
  253. @batch.finish(true)
  254. @finished = true
  255. end
  256. def update_pause_vars
  257. return unless @batch
  258. # throttling can be set on individual SisBatch instances, and also
  259. # site-wide in the Setting table.
  260. @batch.reload(:select => 'data') # update to catch changes to pause vars
  261. @pause_every = (@batch.data[:pause_every] || Setting.get('sis_batch_pause_every', 100)).to_i
  262. @pause_duration = (@batch.data[:pause_duration] || Setting.get('sis_batch_pause_duration', 0)).to_f
  263. end
  264. def unzip_file(file, dest)
  265. Zip::ZipFile.open(file) do |zip_file|
  266. zip_file.each do |f|
  267. f_path = File.join(dest, f.name)
  268. FileUtils.mkdir_p(File.dirname(f_path))
  269. zip_file.extract(f, f_path) unless File.exist?(f_path)
  270. end
  271. end
  272. end
  273. def rebalance_csvs(importer)
  274. rows_per_batch = (@rows[importer].to_f / @parallelism).ceil.to_i
  275. new_csvs = []
  276. out_csv = nil
  277. tmp_dir = Dir.mktmpdir
  278. @tmp_dirs << tmp_dir
  279. temp_file = 0
  280. headers = @headers[importer].to_a
  281. path = nil
  282. begin
  283. Attachment.skip_scribd_submits
  284. @csvs[importer].each do |csv|
  285. remaining_in_batch = 0
  286. FasterCSV.foreach(csv[:fullpath], SisImporter::PARSE_ARGS) do |row|
  287. if remaining_in_batch == 0
  288. temp_file += 1
  289. if out_csv
  290. out_csv.close
  291. out_csv = nil
  292. att = Attachment.new
  293. att.context = @batch
  294. att.uploaded_data = ActionController::TestUploadedFile.new(path, Attachment.mimetype(path))
  295. att.display_name = new_csvs.last[:file]
  296. att.save!
  297. new_csvs.last.delete(:fullpath)
  298. new_csvs.last[:attachment] = att
  299. end
  300. path = File.join(tmp_dir, "#{importer}#{temp_file}.csv")
  301. out_csv = FasterCSV.open(path, "wb", {:headers => headers, :write_headers => true})
  302. new_csvs << {:file => csv[:file]}
  303. remaining_in_batch = rows_per_batch
  304. end
  305. out_row = FasterCSV::Row.new(headers, []);
  306. headers.each { |header| out_row[header] = row[header] }
  307. out_csv << out_row
  308. remaining_in_batch -= 1
  309. end
  310. end
  311. if out_csv
  312. out_csv.close
  313. out_csv = nil
  314. att = Attachment.new
  315. att.context = @batch
  316. att.uploaded_data = ActionController::TestUploadedFile.new(path, Attachment.mimetype(path))
  317. att.display_name = new_csvs.last[:file]
  318. att.save!
  319. new_csvs.last.delete(:fullpath)
  320. new_csvs.last[:attachment] = att
  321. end
  322. ensure
  323. out_csv.close if out_csv
  324. Attachment.skip_scribd_submits(false)
  325. end
  326. @csvs[importer] = new_csvs
  327. end
  328. def process_file(base, file)
  329. csv = { :base => base, :file => file, :fullpath => File.join(base, file) }
  330. if File.file?(csv[:fullpath]) && File.extname(csv[:fullpath]).downcase == '.csv'
  331. FasterCSV.foreach(csv[:fullpath], SisImporter::PARSE_ARGS) do |row|
  332. importer = IMPORTERS.index do |importer|
  333. if SIS.const_get(importer.to_s.camelcase + 'Importer').send('is_' + importer.to_s + '_csv?', row)
  334. @csvs[importer] << csv
  335. @headers[importer].merge(row.headers)
  336. true
  337. else
  338. false
  339. end
  340. end
  341. add_error(csv, "Couldn't find Canvas CSV import headers") if importer.nil?
  342. break
  343. end
  344. elsif !File.directory?(csv[:fullpath]) && !(csv[:fullpath] =~ IGNORE_FILES)
  345. add_warning(csv, "Skipping unknown file type")
  346. end
  347. end
  348. end
  349. end