PageRenderTime 52ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/jwa_ingestor.rb

https://bitbucket.org/mediashelf/jwa_fedora
Ruby | 128 lines | 122 code | 6 blank | 0 comment | 4 complexity | 63b2bd2c69bdccf5aff90eebca34533b MD5 | raw file
Possible License(s): GPL-2.0, IPL-1.0
  1. require 'rubygems'
  2. gem 'fastercsv'
  3. require 'fastercsv'
  4. require 'active_fedora'
  5. class JWAIngestor
  6. def initialize(rootdir, job)
  7. Fedora::Repository.register(FEDORA_URL)
  8. @bnmap = Hash.new
  9. ActiveFedora::SolrService.register(SOLR_URL)
  10. @rootdir=rootdir
  11. @job = job
  12. self
  13. end
  14. def run(dir=@rootdir, parent=nil)
  15. process(dir)
  16. end
  17. def process(file, parent=nil)
  18. o=nil
  19. if(parent)
  20. o = ActiveFedora::Base.new
  21. o.add_relationship(:is_member_of, parent)
  22. else
  23. o = OralHistory.new
  24. Dir["#{file}/*"].each do |f|
  25. @job.log("processing file #{f}")
  26. if f =~ /(.*)\.(mp3|wav)/
  27. base = File.basename($1)
  28. a=@bnmap[base]
  29. unless a
  30. a = AudioRecord.new
  31. a.add_relationship(:is_part_of, o)
  32. @bnmap[base]=a
  33. end
  34. @job.log("processing file as audiorecord ds")
  35. a.add_datastream(create_file_ds(f))
  36. a.save
  37. else
  38. unless File.directory?(f)
  39. process_metadata(f,o) if f =~/metadata.csv/
  40. o.add_datastream(create_file_ds(f))
  41. else
  42. process(f, o)
  43. end
  44. end
  45. end
  46. end
  47. o.save
  48. o.pid
  49. end
  50. def process_metadata(file, oral)
  51. @job.log("processing metadata file #{File.basename(file)}")
  52. pstream = oral.datastreams['properties']
  53. dc = oral.datastreams['dublin_core']
  54. sen_pass = oral.datastreams['sensitive_passages']
  55. sig_pass = oral.datastreams['significant_passages']
  56. field_map = {
  57. 'contributor.narrator'=>[pstream, 'narrator'],
  58. 'publisher'=>[dc, 'publisher'],
  59. 'creator'=>[dc, 'creator'],
  60. 'format.extent'=>[dc, 'extent'],
  61. 'format.medium'=>[dc, 'medium'],
  62. 'format.mimetype'=>[dc, 'format'],
  63. 'type'=>[dc, 'type'],
  64. 'rights'=>[dc, 'rights'],
  65. 'language'=>[dc, 'language'],
  66. 'significant_passage'=>[sig_pass, 'significant_passage'],
  67. 'description.bio'=>[pstream, 'bio'],
  68. 'description.abstract'=>[dc, 'description'],
  69. 'Significant passages'=>[sig_pass, 'significant_passage'],
  70. 'sensitive_passage'=>[sen_pass, 'sensitive_passage'],
  71. 'Sensitive passages'=>[sen_pass, 'sensitive_passage'],
  72. 'format.medium.available'=>[pstream, 'hard_copy_availability'],
  73. 'format.medium.where'=>[pstream, 'hard_copy_location'],
  74. 'contributor.other'=>[pstream, 'other_contributor'],
  75. 'contributor.transcripteditor'=>[pstream, 'transcript_editor'],
  76. 'title'=>[dc, 'title'],
  77. 'subject'=>[dc, 'subject'],
  78. 'date'=>[dc, 'date'],
  79. 'title.alternative'=>[dc, 'alternative'],
  80. 'contributor.interviewer'=>[pstream, 'interviewer'],
  81. 'location.recording'=>[pstream, 'location'],
  82. 'coverage.temporal'=>[dc, 'temporal'],
  83. 'coverage.spatial'=>[dc, 'spatial'],
  84. 'coverage.spacial'=>[dc, 'spatial'],
  85. 'subject.lcsh' =>[dc, 'subject_heading'],
  86. 'notes'=>[pstream, 'notes']
  87. }
  88. FasterCSV::foreach(file) do |row|
  89. row.compact!
  90. d = field_map[row.first.strip]
  91. if d
  92. d.first.send("#{d.last}_append", demoronize(row.last.strip)) if row.size >1
  93. else
  94. @job.log "unknown field #{row.inspect}" if RAILS_ENV=='production'
  95. end
  96. end
  97. end
  98. private
  99. def demoronize(str)
  100. s = str.dup
  101. s.gsub!(/\x82/,',')
  102. s.gsub!(/\x84/,',,')
  103. s.gsub!(/\x85/,'...')
  104. s.gsub!(/\x88/,'^')
  105. s.gsub!(/\x89/,'o/oo')
  106. s.gsub!(/\x8b/,'<')
  107. s.gsub!(/\x8c/,'OE')
  108. s.gsub!(/\x91|\x92/,"'")
  109. s.gsub!(/\x93|\x94/,'"')
  110. s.gsub!(/\x95/,'*')
  111. s.gsub!(/\x96/,'-')
  112. s.gsub!(/\x97/,'--')
  113. s.gsub!(/\x98/,'~')
  114. s.gsub!(/\x99/,'TM')
  115. s.gsub!(/\x9b/,'>')
  116. s.gsub(/\x9c/,'oe')
  117. end
  118. def create_file_ds(f)
  119. @job.log("creating file ds ")
  120. ActiveFedora::Datastream.new(:dsID=>File.basename(f), :controlGroup=>'M', :blob=>File.open(f), :dsLabel=>File.basename(f))
  121. end
  122. end