jwa_ingestor.rb | searchcode

/lib/jwa_ingestor.rb

https://bitbucket.org/mediashelf/jwa_fedora · Ruby · 128 lines · 122 code · 6 blank · 0 comment · 4 complexity · 63b2bd2c69bdccf5aff90eebca34533b MD5 · raw file

require 'rubygems'
gem 'fastercsv'
require 'fastercsv'
require 'active_fedora'
class JWAIngestor
  def initialize(rootdir, job)
    Fedora::Repository.register(FEDORA_URL)
    @bnmap = Hash.new
    ActiveFedora::SolrService.register(SOLR_URL)
    @rootdir=rootdir
    @job = job
    self
  end

  def run(dir=@rootdir, parent=nil)
    process(dir)
  end

  def process(file, parent=nil)
    o=nil
    if(parent)
      o = ActiveFedora::Base.new
      o.add_relationship(:is_member_of, parent)
    else
      o = OralHistory.new

      Dir["#{file}/*"].each do |f|
        @job.log("processing file #{f}")
        if f =~ /(.*)\.(mp3|wav)/
          base = File.basename($1)
          a=@bnmap[base]
          unless  a
            a = AudioRecord.new
            a.add_relationship(:is_part_of, o)
            @bnmap[base]=a
          end
          @job.log("processing file as audiorecord ds")
          a.add_datastream(create_file_ds(f))
          a.save
        else
          unless File.directory?(f)
            process_metadata(f,o) if f =~/metadata.csv/
            o.add_datastream(create_file_ds(f))
          else
            process(f, o)
          end
        end
      end
    end
    o.save
    o.pid
  end
  def process_metadata(file, oral)
    @job.log("processing metadata file #{File.basename(file)}")
    pstream = oral.datastreams['properties']
    dc = oral.datastreams['dublin_core']
    sen_pass = oral.datastreams['sensitive_passages']
    sig_pass = oral.datastreams['significant_passages']

    field_map  = {
      'contributor.narrator'=>[pstream, 'narrator'],
      'publisher'=>[dc, 'publisher'],
      'creator'=>[dc, 'creator'],
      'format.extent'=>[dc, 'extent'],
      'format.medium'=>[dc, 'medium'],
      'format.mimetype'=>[dc, 'format'],
      'type'=>[dc, 'type'],
      'rights'=>[dc, 'rights'],
      'language'=>[dc, 'language'],
      'significant_passage'=>[sig_pass, 'significant_passage'],
      'description.bio'=>[pstream, 'bio'],
      'description.abstract'=>[dc, 'description'],
      'Significant passages'=>[sig_pass, 'significant_passage'],
      'sensitive_passage'=>[sen_pass, 'sensitive_passage'],
      'Sensitive passages'=>[sen_pass, 'sensitive_passage'],
      'format.medium.available'=>[pstream, 'hard_copy_availability'],
      'format.medium.where'=>[pstream, 'hard_copy_location'],
      'contributor.other'=>[pstream, 'other_contributor'],
      'contributor.transcripteditor'=>[pstream, 'transcript_editor'],
      'title'=>[dc, 'title'],
      'subject'=>[dc, 'subject'],
      'date'=>[dc, 'date'],
      'title.alternative'=>[dc, 'alternative'],
      'contributor.interviewer'=>[pstream, 'interviewer'],
      'location.recording'=>[pstream, 'location'],
      'coverage.temporal'=>[dc, 'temporal'],
      'coverage.spatial'=>[dc, 'spatial'],
      'coverage.spacial'=>[dc, 'spatial'],
      'subject.lcsh' =>[dc, 'subject_heading'],
      'notes'=>[pstream, 'notes']
    }


    FasterCSV::foreach(file) do |row|
      row.compact!
      d = field_map[row.first.strip]
      if d
        d.first.send("#{d.last}_append", demoronize(row.last.strip)) if row.size >1
      else
        @job.log "unknown field #{row.inspect}" if RAILS_ENV=='production'
      end
    end
  end
  private 
  def demoronize(str)
    s = str.dup
    s.gsub!(/\x82/,',') 
    s.gsub!(/\x84/,',,') 
    s.gsub!(/\x85/,'...') 
    s.gsub!(/\x88/,'^') 
    s.gsub!(/\x89/,'o/oo') 
    s.gsub!(/\x8b/,'<') 
    s.gsub!(/\x8c/,'OE') 
    s.gsub!(/\x91|\x92/,"'") 
    s.gsub!(/\x93|\x94/,'"') 
    s.gsub!(/\x95/,'*') 
    s.gsub!(/\x96/,'-') 
    s.gsub!(/\x97/,'--') 
    s.gsub!(/\x98/,'~') 
    s.gsub!(/\x99/,'TM') 
    s.gsub!(/\x9b/,'>') 
    s.gsub(/\x9c/,'oe') 
  end
  def create_file_ds(f)
    @job.log("creating file ds ")
    ActiveFedora::Datastream.new(:dsID=>File.basename(f), :controlGroup=>'M', :blob=>File.open(f), :dsLabel=>File.basename(f))
  end
end