PageRenderTime 181ms CodeModel.GetById 15ms RepoModel.GetById 2ms app.codeStats 0ms

/lib/rdfmake.rb

https://bitbucket.org/jeffreycwitt/rdfbuilder
Ruby | 205 lines | 162 code | 34 blank | 9 comment | 21 complexity | be6aff269d87a8f081d1ef188c2dba03 MD5 | raw file
  1. #!/usr/bin/env ruby
  2. #required dependencies
  3. require 'rubygems'
  4. require 'nokogiri'
  5. require_relative 'rdfconfig.rb'
  6. #config info -- this should eventually go in a separate config file
  7. t = ConfigInfo.new
  8. $base_location = t.base_location
  9. #$base_location = "/Users/JCWitt/Documents/PlaoulTranscriptions/"
  10. $trans_base_location = "/Users/JCWitt/Documents/PlaoulTranslations/"
  11. def extract_info(filestem, xpath, prefix)
  12. if prefix != 'none'
  13. full_filename = "#{$base_location}#{filestem}/#{prefix}_#{filestem}.xml"
  14. else
  15. full_filename = "#{$base_location}#{filestem}/#{filestem}.xml"
  16. end
  17. f = File.open("#{full_filename}", "r")
  18. doc = Nokogiri::XML(f)
  19. f.close
  20. node_value = ""
  21. doc.xpath("#{xpath}").each do |node|
  22. node_value = node.content
  23. end
  24. return node_value
  25. end
  26. def make_rdf_xml(date, filestem)
  27. full_filename = "#{filestem}.xml"
  28. document_title = extract_info("#{filestem}", "//xmlns:titleStmt/xmlns:title", 'none')
  29. amp = "&"
  30. bitfs = "#{filestem}".downcase
  31. builder = Nokogiri::XML::Builder.new do |xml|
  32. xml.comment("This RDF file was created on #{date} by RDFbuilder: a ruby script written by Jeffrey C. Witt
  33. RDFbuilder is publically available at http://bitbucket.org/jeffreycwitt/rdfbuilder
  34. Please contact Jeffrey C. Witt at jcwitt@loyola.edu if you would like to help develop this script for a wider audience")
  35. xml.RDF("xmlns:rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
  36. "xmlns:role" => "http://www.loc.gov/loc.terms/relators/",
  37. "xmlns:rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
  38. "xmlns:plaoul" =>"http://petrusplaoul.org",
  39. "xmlns:collex" =>"http://www.collex.org/schema#",
  40. "xmlns:dcterms" =>"http://purl.org/dc/terms/",
  41. "xmlns:dc" => "http://purl.org/dc/elements/1.1/") {
  42. #this line is need to add the namespace prefix to the root element
  43. xml.parent.namespace = xml.parent.namespace_definitions.find{|ns|ns.prefix=="rdf"}
  44. xml['plaoul'].project("rdf:about" => "http://petrusplaoul.org/text/uri/edited/#{filestem}") {
  45. xml['dc'].title "#{document_title}"
  46. xml['dc'].language "Latin"
  47. xml['dc'].date "1392"
  48. xml['dc'].type "Codex"
  49. xml['dc'].subject "Philosophy"
  50. xml['dc'].subject "Theology"
  51. xml['role'].AUT "Petrus Plaoul"
  52. xml['role'].EDT "Witt, Jeffrey"
  53. xml['collex'].genre "Philosophy"
  54. xml['collex'].genre "Religion"
  55. xml['collex'].discipline "Philosophy"
  56. xml['collex'].discipline "Religious Studies"
  57. xml['collex'].federation "MESA"
  58. xml['collex'].archive "Plaoul"
  59. xml['collex'].source_xml("rdf:resource" => "https://bitbucket.org/jeffreycwitt/#{bitfs}/raw/master/#{filestem}.xml")
  60. xml['collex'].text_("rdf:resource" => "http://petrusplaoul.org/plaintext/index.php?fs=#{filestem}#{amp}ms=edited")
  61. xml['dcterms'].isPartOf("rdf:resource" => "http://petrusplaoul.org")
  62. if File.exists?("#{$base_location}#{filestem}/reims_#{filestem}.xml")
  63. xml['dcterms'].hasPart("rdf:resource" => "http://petrusplaoul.org/text/uri/reims/#{filestem}")
  64. end
  65. if File.exists?("#{$base_location}#{filestem}/vat_#{filestem}.xml")
  66. xml['dcterms'].hasPart("rdf:resource" => "http://petrusplaoul.org/text/uri/vat/#{filestem}")
  67. end
  68. if File.exists?("#{$base_location}#{filestem}/svict_#{filestem}.xml")
  69. xml['dcterms'].hasPart("rdf:resource" => "http://petrusplaoul.org/text/uri/svict/#{filestem}")
  70. end
  71. if File.exists?("#{$base_location}#{filestem}/sorb_#{filestem}.xml")
  72. xml['dcterms'].hasPart("rdf:resource" => "http://petrusplaoul.org/text/uri/sorb/#{filestem}")
  73. end
  74. #if File.exists?("#{$trans_base_location}trans_engl_#{filestem}/trans_engl_#{filestem}.xml")
  75. # xml['dcterms'].hasPart("rdf:resource" => "http://petrusplaoul.org/text/uri/engltranslation/#{filestem}") {xml.text "#{document_title} [Reims Transcription]"}
  76. #end
  77. xml['rdfs'].seeAlso("rdf:resource" => "http://petrusplaoul.org/text/textdisplay.php?fs=#{filestem}")
  78. }
  79. }
  80. end
  81. end
  82. def write_rdf_all(date)
  83. print "test"
  84. file = "/Users/JCWitt/WebPages/petrusplaoul-mirror/projectfiles/projectdata.xml"
  85. f = File.open("#{file}", "r")
  86. doc = Nokogiri::XML(f)
  87. fsList = doc.xpath("//div[@id='body']//fileName/@filestem").each do |filestem|
  88. print "#{filestem}"
  89. write_rdf("#{date}", "#{filestem}", "none")
  90. if File.exists?("#{$base_location}/#{filestem}/reims_#{filestem}.xml")
  91. write_rdf("#{date}", "#{filestem}", "reims")
  92. end
  93. if File.exists?("#{$base_location}/#{filestem}/vat_#{filestem}.xml")
  94. write_rdf("#{date}", "#{filestem}", "vat")
  95. end
  96. if File.exists?("#{$base_location}/#{filestem}/sorb_#{filestem}.xml")
  97. write_rdf("#{date}", "#{filestem}", "sorb")
  98. end
  99. if File.exists?("#{$base_location}/#{filestem}/svict_#{filestem}.xml")
  100. write_rdf("#{date}", "#{filestem}", "svict")
  101. end
  102. end
  103. f.close
  104. end
  105. def write_rdf(date, filestem, prefix)
  106. if prefix != 'none'
  107. builder = make_rdf_dipl_xml("#{date}", "#{filestem}", "#{prefix}")
  108. o = File.new("/Users/JCWitt/Documents/PlaoulRDFFiles/rdf_#{prefix}_#{filestem}.xml", "w")
  109. else
  110. builder = make_rdf_xml("#{date}", "#{filestem}")
  111. o = File.new("/Users/JCWitt/Documents/PlaoulRDFFiles/rdf_#{filestem}.xml", "w")
  112. end
  113. o.write(builder.to_xml :indent => 3, :encoding => 'UTF-8')
  114. o.close
  115. puts "it seems to have worked"
  116. end
  117. def make_rdf_dipl_xml(date, filestem, prefix)
  118. amp = "&"
  119. full_filename = "#{prefix}_#{filestem}.xml"
  120. document_title = extract_info("#{filestem}", "//xmlns:titleStmt/xmlns:title", "#{prefix}")
  121. main_doc_title = extract_info("#{filestem}", "//xmlns:titleStmt/xmlns:title", "none")
  122. bitfs = "#{filestem}".downcase
  123. if prefix == 'sorb'
  124. manuscript = "sorbonne"
  125. msabbrev = "sorb"
  126. elsif prefix == 'reims'
  127. manuscript = "reims"
  128. msabbrev = "reims"
  129. elsif prefix == 'vat'
  130. manuscript = 'vatican'
  131. msabbrev = "vat"
  132. elsif prefix == 'svict'
  133. manuscript = 'stvictor'
  134. msabbrev = "svict"
  135. end
  136. builder = Nokogiri::XML::Builder.new do |xml|
  137. xml.comment("This RDF file was created on #{date} by RDFbuilder: a ruby script written by Jeffrey C. Witt
  138. RDFbuilder is publically available at http://bitbucket.org/jeffreycwitt/rdfbuilder
  139. Please contact Jeffrey C. Witt at jcwitt@loyola.edu if you would like to help develop this script for a wider audience")
  140. xml.RDF("xmlns:rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
  141. "xmlns:role" => "http://www.loc.gov/loc.terms/relators/",
  142. "xmlns:rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
  143. "xmlns:plaoul" =>"http://petrusplaoul.org",
  144. "xmlns:collex" =>"http://www.collex.org/schema#",
  145. "xmlns:dcterms" =>"http://purl.org/dc/terms/",
  146. "xmlns:dc" => "http://purl.org/dc/elements/1.1/") {
  147. #this line is need to add the namespace prefix to the root element
  148. xml.parent.namespace = xml.parent.namespace_definitions.find{|ns|ns.prefix=="rdf"}
  149. xml['plaoul'].project("rdf:about" => "http://petrusplaoul.org/text/uri/#{prefix}/#{filestem}"){
  150. xml['dc'].title "#{document_title}"
  151. xml['dc'].language "Latin"
  152. xml['dc'].date "1392"
  153. xml['dc'].type "Manuscript"
  154. xml['dc'].subject "Philosophy"
  155. xml['dc'].subject "Theology"
  156. xml['role'].AUT "Petrus Plaoul"
  157. xml['role'].EDT "Witt, Jeffrey"
  158. xml['collex'].genre "Philosophy"
  159. xml['collex'].genre "Religion"
  160. xml['collex'].discipline "Philosophy"
  161. xml['collex'].discipline "Religious Studies"
  162. xml['collex'].federation "MESA"
  163. xml['collex'].archive "Plaoul"
  164. xml['collex'].source_xml("rdf:resource" => "https://bitbucket.org/jeffreycwitt/#{bitfs}/raw/master/#{prefix}_#{filestem}.xml")
  165. xml['collex'].text_("rdf:resource" => "http://petrusplaoul.org/plaintext/index.php?fs=#{filestem}#{amp}ms=#{msabbrev}")
  166. xml['dcterms'].isPartOf("rdf:resource" => "http://petrusplaoul.org/text/uri/edited/#{filestem}")
  167. xml['rdfs'].seeAlso("rdf:resource" => "http://petrusplaoul.org/text/textdisplay.php?fs=#{filestem}&ms=#{msabbrev}")
  168. }
  169. }
  170. end
  171. end