PageRenderTime 47ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/marc_document.rb

https://github.com/rsinger/The-Visible-Kilgour
Ruby | 316 lines | 280 code | 36 blank | 0 comment | 15 complexity | c9545cce2fcf2cbe835ce79047d63a20 MD5 | raw file
  1. class String
  2. def strip_trailing_punctuation
  3. self.sub(/[.;\/]\s*$/,'')
  4. end
  5. end
  6. class MARCDocument < Ferret::Document
  7. def format_field(field, separator=' ', include_codes=[])
  8. return nil unless field
  9. parts = []
  10. field.each do |subfield|
  11. if include_codes.empty? or include_codes.index(subfield.code)
  12. parts << subfield.value
  13. end
  14. end
  15. parts.join(separator).strip_trailing_punctuation
  16. end
  17. def date_modified(marc)
  18. DateTime.parse(marc['005'].value)
  19. end
  20. def lcc(marc)
  21. lccs = []
  22. lcc_fields = marc.find_all{|f| f.tag == "053"}
  23. lcc_fields.each do | f |
  24. lccs << format_field(f, nil, ['a'])
  25. end
  26. lcc_fields
  27. end
  28. def normalize_lccn(marc)
  29. lccn = marc['010']['a']
  30. lccn.gsub(/\s/,'')
  31. end
  32. def format_subdivisions(f)
  33. format_field(f, '--', ['v', 'x', 'y', 'z'])
  34. end
  35. def format_personal_field(f)
  36. main = format_field(f, ' ', ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't'])
  37. sub = format_subdivisions(f)
  38. if(sub.empty?)
  39. return main
  40. end
  41. "#{main}--#{sub}"
  42. end
  43. def format_corporate_field(f)
  44. main = format_field(f, ' ', ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't'])
  45. sub = format_subdivisions(f)
  46. if(sub.empty?)
  47. return main
  48. end
  49. "#{main}--#{sub}"
  50. end
  51. def format_meeting_field(f)
  52. main = format_field(f, ' ', ['a', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'p', 'q', 's', 't'])
  53. sub = format_subdivisions(f)
  54. if(sub.empty?)
  55. return main
  56. end
  57. "#{main}--#{sub}"
  58. end
  59. def format_title_field(f)
  60. main = format_field(f, ' ', ['a', 'd', 'f', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't'])
  61. sub = format_subdivisions(f)
  62. if(sub.empty?)
  63. return main
  64. end
  65. "#{main}--#{sub}"
  66. end
  67. def format_chronological_field(f)
  68. main = format_field(f, ' ', ['a', 'i'])
  69. sub = format_subdivisions(f)
  70. if(sub.empty?)
  71. return main
  72. end
  73. "#{main}--#{sub}"
  74. end
  75. def format_topical_field(f)
  76. main = format_field(f, ' ', ['a', 'b'])
  77. sub = format_subdivisions(f)
  78. if(sub.empty?)
  79. return main
  80. end
  81. "#{main}--#{sub}"
  82. end
  83. def format_geographic_field(f)
  84. main = format_field(f, ' ', ['a'])
  85. sub = format_subdivisions(f)
  86. if(sub.empty?)
  87. return main
  88. end
  89. "#{main}--#{sub}"
  90. end
  91. def format_genre_field(f)
  92. format_geographic_field(f)
  93. end
  94. def format_general_subd_field(f)
  95. format_subdivisions(f)
  96. end
  97. def format_geographic_subd_field(f)
  98. format_subdivisions(f)
  99. end
  100. def format_chronological_subd_field(f)
  101. format_subdivisions(f)
  102. end
  103. def format_form_subd_field(f)
  104. format_subdivisions(f)
  105. end
  106. def subdivision_strings(f)
  107. subd_codes = ['v','x','y','z']
  108. f.each do |sub|
  109. if subd_codes.index(sub.code)
  110. if self["#{sub.code}_str".to_sym]
  111. self["#{sub.code}_str".to_sym] = [*self["#{sub.code}_str".to_sym]]
  112. self["#{sub.code}_str".to_sym] << sub.value
  113. else
  114. self["#{sub.code}_str".to_sym] = sub.value
  115. end
  116. end
  117. end
  118. end
  119. def format_personal_name(f)
  120. self[:label] = format_personal_field(f)
  121. self[:top_term_str] = format_field(f, ' ', ['a', 'b', 'c', 'd', 'q'])
  122. subdivision_strings(f)
  123. end
  124. def format_corporate_name(f)
  125. self[:label] = format_corporate_field(f)
  126. self[:top_term_str] = format_field(f, ' ', ['a', 'b', 'c', 'd'])
  127. subdivision_strings(f)
  128. end
  129. def format_meeting(f)
  130. self[:label] = format_meeting_field(f)
  131. subdivision_strings(f)
  132. end
  133. def format_uniform_title(f)
  134. self[:label] = format_title_field(f)
  135. self[:top_term_str] = format_field(f, ' ',['a', 'd', 'f', 'g', 'n', 'o', 'p', 'r', 's', 't'])
  136. subdivision_strings(f)
  137. end
  138. def format_chronological_term(f)
  139. self[:label] = format_chronological_field(f)
  140. subdivision_strings(f)
  141. end
  142. def format_topical_term(f)
  143. self[:label] = format_chronological_field(f)
  144. self[:top_term_str] = format_field(f, ' ', ['a', 'b'])
  145. subdivision_strings(f)
  146. end
  147. def format_geographical_name(f)
  148. self[:label] = format_geographic_field(f)
  149. self[:top_term_str] = format_field(f, ' ', ['a'])
  150. subdivision_strings(f)
  151. end
  152. def format_genre_term(f)
  153. self[:label] = format_genre_field(f)
  154. self[:top_term_str] = format_field(f, ' ', ['a'])
  155. subdivision_strings(f)
  156. end
  157. def format_general_subdivision(f)
  158. self[:label] = format_general_subd_field(f)
  159. subdivision_strings(f)
  160. end
  161. def format_geographic_subdivision(f)
  162. self[:label] = format_geographic_subd_field(f)
  163. subdivision_strings(f)
  164. end
  165. def format_chronological_subdivision(f)
  166. self[:label] = format_chronological_subd_field(f)
  167. subdivision_strings(f)
  168. end
  169. def format_form_subdivision(f)
  170. self[:label] = format_form_subd_field(f)
  171. subdivision_strings(f)
  172. end
  173. def get_alt_labels(marc)
  174. alt_labels = []
  175. alts = marc.find_all {|f| f.tag =~ /^4../}
  176. alts.each do | a |
  177. alt_labels << case a.tag
  178. when "400" then format_personal_field(a)
  179. when "410" then format_corporate_field(a)
  180. when "411" then format_meeting_field(a)
  181. when "430" then format_title_field(a)
  182. when "448" then format_chronological_field(a)
  183. when "450" then format_topical_field(a)
  184. when "451" then format_geographic_field(a)
  185. when "455" then format_genre_field(a)
  186. when "480" then format_general_subd_field(a)
  187. when "481" then format_geographic_subd_field(a)
  188. when "482" then format_chronological_subd_field(a)
  189. when "485" then format_form_subd_field(a)
  190. end
  191. end
  192. alt_labels
  193. end
  194. def related_terms(marc)
  195. tracings = {:broader_str=>[], :narrower_str=>[], :related_str=>[]}
  196. fields = marc.find_all{|f| f.tag =~ /^5../}
  197. fields.each do | field |
  198. term = case field.tag
  199. when "500" then format_personal_field(field)
  200. when "510" then format_corporate_field(field)
  201. when "511" then format_meeting_field(field)
  202. when "530" then format_title_field(field)
  203. when "548" then format_chronological_field(field)
  204. when "550" then format_topical_field(field)
  205. when "551" then format_geographic_field(field)
  206. when "555" then format_genre_field(field)
  207. when "580" then format_general_subd_field(field)
  208. when "581" then format_geographic_subd_field(field)
  209. when "582" then format_chronological_subd_field(field)
  210. when "585" then format_form_subd_field(field)
  211. end
  212. w = field['w']
  213. case w
  214. when 'g' then tracings[:broader_str] << term
  215. when 'h' then tracings[:narrower_str] << term
  216. else tracings[:related_str] << term
  217. end
  218. end
  219. tracings.each_pair do |k, v|
  220. unless v.empty?
  221. self[k] = v
  222. end
  223. end
  224. end
  225. def initialize(marc)
  226. super()
  227. self[:lccn] = normalize_lccn(marc)
  228. self[:lcc] = lcc(marc)
  229. self[:marc_last_modified] = date_modified(marc)
  230. self[:heading_type] = case
  231. when marc['100']
  232. format_personal_name(marc['100'])
  233. "Personal Name"
  234. when marc['110']
  235. format_corporate_name(marc['110'])
  236. "Corporate Name"
  237. when marc['111']
  238. format_meeting(marc['111'])
  239. "Meeting"
  240. when marc['130']
  241. format_uniform_title(marc['130'])
  242. "Uniform Title"
  243. when marc['148']
  244. format_chronological_term(marc['148'])
  245. "Chronological Term"
  246. when marc['150']
  247. format_topical_term(marc['150'])
  248. "Topical Term"
  249. when marc['151']
  250. format_geographical_name(marc['151'])
  251. "Geographical Name"
  252. when marc['155']
  253. format_genre_term(marc['155'])
  254. "Genre/Form Term"
  255. when marc['180']
  256. format_general_subdivision(marc['180'])
  257. "General Subdivision"
  258. when marc['181']
  259. format_geographic_subdivision(marc['181'])
  260. "Geographic Subdivision"
  261. when marc['182']
  262. format_chronological_subdivision(marc['182'])
  263. "Chronological Subdivision"
  264. when marc['185']
  265. format_form_subdivision(marc['185'])
  266. "Form Subdivision"
  267. end
  268. self[:label_str] = self[:label]
  269. self[:alt_labels] = get_alt_labels(marc)
  270. self[:alt_labels_str] = self[:alt_labels]
  271. related_terms(marc)
  272. self[:marc_record] = marc.to_marc
  273. self[:last_modified] = DateTime.now
  274. end
  275. def compare_last_modified(other)
  276. return :less_than if other[:marc_last_modified] > self[:marc_last_modified]
  277. return :equal if other[:marc_last_modified] == self[:marc_last_modified]
  278. return :greater_than
  279. end
  280. end