PageRenderTime 62ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 0ms

/src/test/clojure/topoged/test/entity.clj

https://github.com/m0smith/topoged
Clojure | 292 lines | 266 code | 26 blank | 0 comment | 6 complexity | c7b87ebbd02199f5667be0cf4548b383 MD5 | raw file
  1. (ns topoged.test.entity
  2. (:use [clojure.java.io :only [reader writer input-stream output-stream]]
  3. [clojure.contrib.seq-utils :only (find-first)]
  4. [clojure.pprint :only (pprint)]
  5. [topoged.file :only (copy-md5)]
  6. [topoged.gedcom :only [gedcom-seq]]))
  7. (defn conjv
  8. ( [coll x] (conj (if coll coll #{}) x))
  9. ( [coll x & xs]
  10. (if xs
  11. (recur (conjv coll x) (first xs) (rest xs))
  12. (conjv coll x)))
  13. )
  14. (defstruct TYPE :TYPE_ID :TYPE_NAME)
  15. (defstruct SOURCE :SOURCE_ID :TYPE_ID)
  16. (defstruct SOURCE_WITHIN_SOURCE :PARENT_SOURCE_ID :MEMBER_SOURCE_ID :ORDER)
  17. (defstruct REPRESENTAITON :SOURCE_ID :TYPE_ID :CONTENT :COMMENTS)
  18. (defstruct REPOSITORY :REPOSITORY_ID :TYPE_ID )
  19. (defstruct REPOSITORY_SOURCE :REPOSITORY_ID :SOURCE_ID :CALL_NUMBER :DESCRIPTION)
  20. (defstruct ATTRIBUTE_OWNER :OWNER_ID :OWNER_TYPE_ID :ATTRIBUTE_ID)
  21. (defstruct PERSONA :PERSONA_ID)
  22. (defstruct PERSONA_SOURCE :PERSONA_ID :SOURCE_ID :ID_IN_SOURCE)
  23. (defstruct GROUP :GROUP_ID :TYPE_ID)
  24. (defstruct GROUP_SOURCE :GROUP_ID :SOURCE_ID :ID_IN_SOURCE)
  25. (defstruct ATTRIBUTE_ATTRIBUTE :PARENT_ATTRIBUTE_ID :MEMBER_ATTRIBUTE_ID)
  26. (defstruct ATTRIBUTE :ATTRIBUTE_ID :TYPE_ID :VALUE)
  27. (defn update-state [state key value]
  28. (update-in state [key] conjv value))
  29. (defn find-type [state name]
  30. (let [types (:type state)]
  31. (first (filter #(= (:id %) name) types))))
  32. (def initial-state
  33. {
  34. :type, #{
  35. (struct TYPE "GEDCOM" "GEDCOM")
  36. (struct TYPE "REPOSITORY" "REPOSITORY")
  37. (struct TYPE "PERSONA" "PERSONA")
  38. (struct TYPE "GROUP" "GROUP")
  39. (struct TYPE "MD5" "MD5")
  40. (struct TYPE "text/plain" "text/plain")
  41. }
  42. })
  43. (defn handler-factory [m]
  44. (fn [state rec]
  45. (if-let [func (get m (:tag rec))]
  46. (func state rec)
  47. state)))
  48. (defn match-in-fn [ [& ks] val]
  49. #(= val (get-in % ks)))
  50. (defn subm-handler [source-zero]
  51. (fn [state rec]
  52. (let [repo-id (:value rec)
  53. state (-> state
  54. (update-state :repository
  55. (struct REPOSITORY repo-id "HOME"))
  56. (update-state :repository_source
  57. (struct REPOSITORY_SOURCE repo-id (:SOURCE_ID source-zero))))]
  58. (reduce #(let [line-number (get-in %2 [:attrs :line-number])
  59. rep (get-in %2 [:attrs :representation])
  60. tag (:tag %2)
  61. source-id (str "SOURCE" line-number)]
  62. (-> %1
  63. (update-state :source
  64. (struct SOURCE
  65. source-id
  66. "LINE"))
  67. (update-state :source_within_source
  68. (struct SOURCE_WITHIN_SOURCE
  69. (:SOURCE_ID source-zero)
  70. source-id
  71. line-number))
  72. (update-state :representation
  73. (struct REPRESENTAITON
  74. source-id
  75. "text/plain"
  76. rep))
  77. (update-state :attribute
  78. (struct ATTRIBUTE
  79. (str "ATTR" line-number)
  80. (name tag)
  81. (:value %2)
  82. ))
  83. (update-state :type
  84. (struct TYPE (name tag) (name tag)))
  85. (update-state :attribute_owner
  86. (struct ATTRIBUTE_OWNER repo-id "REPOSITORY"
  87. (str "ATTR" line-number)))))
  88. state (:content rec)))))
  89. (defn indi-handler [source-zero]
  90. (fn [state rec]
  91. (let [persona-id (:value rec)
  92. line-number (get-in rec [:attrs :line-number])
  93. source-id (str "SOURCE" line-number)
  94. rep (get-in rec [:attrs :representation])
  95. state (-> state
  96. (update-state :persona (struct PERSONA persona-id ))
  97. (update-state :source (struct SOURCE source-id "LINE"))
  98. (update-state :source_within_source
  99. (struct SOURCE_WITHIN_SOURCE
  100. (:SOURCE_ID source-zero)
  101. source-id line-number))
  102. (update-state :representation
  103. (struct REPRESENTAITON
  104. source-id
  105. "text/plain"
  106. rep))
  107. (update-state :persona_source
  108. (struct PERSONA_SOURCE persona-id source-id persona-id)))]
  109. (letfn [(persona-attribute [state line-number]
  110. (update-state state :attribute_owner
  111. (struct ATTRIBUTE_OWNER persona-id "PERSONA"
  112. (str "ATTR" line-number))))
  113. (attr-attribute [attr-id]
  114. (fn [state line-number]
  115. (update-state state :attribute_attribute
  116. (struct ATTRIBUTE_ATTRIBUTE attr-id
  117. (str "ATTR" line-number)))))
  118. (content-handler [attr-parent-func]
  119. (fn [state rec]
  120. (let [line-number (get-in rec [:attrs :line-number])
  121. rep (get-in rec [:attrs :representation])
  122. tag (:tag rec)
  123. source-id (str "SOURCE" line-number)]
  124. (-> state
  125. (update-state :source
  126. (struct SOURCE
  127. source-id
  128. "LINE"))
  129. (update-state :source_within_source
  130. (struct SOURCE_WITHIN_SOURCE
  131. (:SOURCE_ID source-zero)
  132. source-id
  133. line-number))
  134. (update-state :representation
  135. (struct REPRESENTAITON
  136. source-id
  137. "text/plain"
  138. rep))
  139. (update-state :attribute
  140. (struct ATTRIBUTE
  141. (str "ATTR" line-number)
  142. (name tag)
  143. (:value rec)
  144. ))
  145. (update-state :type
  146. (struct TYPE (name tag) (name tag)))
  147. (attr-parent-func line-number)
  148. (reduce-content rec (attr-attribute (str "ATTR" line-number)))
  149. ))))
  150. (reduce-content [state rec func]
  151. (if-let [content (seq (:content rec))]
  152. (reduce (content-handler func) state content)
  153. state))]
  154. (reduce-content state rec persona-attribute)))))
  155. (defn fam-handler [source-zero]
  156. (fn [state rec]
  157. (let [group-id (:value rec)
  158. line-number (get-in rec [:attrs :line-number])
  159. source-id (str "SOURCE" line-number)
  160. rep (get-in rec [:attrs :representation])
  161. state (-> state
  162. (update-state :group (struct GROUP group-id "FAMILY"))
  163. (update-state :source (struct SOURCE source-id "LINE"))
  164. (update-state :source_within_source
  165. (struct SOURCE_WITHIN_SOURCE
  166. (:SOURCE_ID source-zero)
  167. source-id line-number))
  168. (update-state :representation
  169. (struct REPRESENTAITON
  170. source-id
  171. "text/plain"
  172. rep))
  173. (update-state :group_source
  174. (struct GROUP_SOURCE group-id source-id group-id)))]
  175. (letfn [(group-attribute [state line-number]
  176. (update-state state :attribute_owner
  177. (struct ATTRIBUTE_OWNER group-id "GROUP"
  178. (str "ATTR" line-number))))
  179. (attr-attribute [attr-id]
  180. (fn [state line-number]
  181. (update-state state :attribute_attribute
  182. (struct ATTRIBUTE_ATTRIBUTE attr-id
  183. (str "ATTR" line-number)))))
  184. (content-handler [attr-parent-func]
  185. (fn [state rec]
  186. (let [line-number (get-in rec [:attrs :line-number])
  187. rep (get-in rec [:attrs :representation])
  188. tag (:tag rec)
  189. source-id (str "SOURCE" line-number)]
  190. (-> state
  191. (update-state :source
  192. (struct SOURCE
  193. source-id
  194. "LINE"))
  195. (update-state :source_within_source
  196. (struct SOURCE_WITHIN_SOURCE
  197. (:SOURCE_ID source-zero)
  198. source-id
  199. line-number))
  200. (update-state :representation
  201. (struct REPRESENTAITON
  202. source-id
  203. "text/plain"
  204. rep))
  205. (update-state :attribute
  206. (struct ATTRIBUTE
  207. (str "ATTR" line-number)
  208. (name tag)
  209. (:value rec)
  210. ))
  211. (update-state :type
  212. (struct TYPE (name tag) (name tag)))
  213. (attr-parent-func line-number)
  214. (reduce-content rec (attr-attribute (str "ATTR" line-number)))
  215. ))))
  216. (reduce-content [state rec func]
  217. (if-let [content (seq (:content rec))]
  218. (reduce (content-handler func) state content)
  219. state))]
  220. (reduce-content state rec group-attribute)))))
  221. (defn process-gedcom [f]
  222. (let [out-name "/tmp/f.ged"
  223. source-zero (struct SOURCE "SOURCE0" "GEDCOM")
  224. md5 (let [in f
  225. out out-name]
  226. (copy-md5 in out))
  227. state (merge initial-state
  228. { :source,
  229. #{
  230. source-zero
  231. }
  232. :representation
  233. #{
  234. (struct REPRESENTAITON "GEDCOM" "MD5" md5)
  235. (struct REPRESENTAITON "GEDCOM" "text/plain" (slurp out-name))
  236. }
  237. })
  238. handler (handler-factory
  239. {
  240. :HEAD #(merge %1 {:forward
  241. {
  242. (-> (find-first (match-in-fn [:tag] :SUBM) (:content %2) )
  243. :value)
  244. source-zero}})
  245. :SUBM (subm-handler source-zero)
  246. :INDI (indi-handler source-zero)
  247. :FAM (fam-handler source-zero)
  248. })]
  249. (with-open [rdr (reader out-name)]
  250. (reduce handler state (gedcom-seq (line-seq rdr))))))
  251. (defn seq-csv [s]
  252. (let [q "\""]
  253. (if (seq s)
  254. (str q (apply str (interpose (str q "," q) s)) q \newline)
  255. (println s))))
  256. (defn to-csv [m]
  257. (apply str
  258. (map seq-csv
  259. (mapcat identity
  260. (for [key (sort (filter (complement #{:forward}) (keys m)))]
  261. (let [recs (m key)
  262. kys (keys (first recs))]
  263. (concat [[ ""] [ ""] [ (str "TABLE: " (.toUpperCase (name key)))] (map name kys)]
  264. (for [rec (sort-by #(vec (map % kys)) recs)]
  265. (map get (repeat rec) kys)))))))))
  266. (defn simp [] (spit "doc/simple.csv" (to-csv (process-gedcom "src/test/resources/simple.ged"))))