PageRenderTime 58ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 0ms

/src/main/clojure/incanter/io.clj

https://github.com/purcell/incanter
Clojure | 327 lines | 242 code | 60 blank | 25 comment | 32 complexity | 81cdbd2f98042dfe826fa6e823a1b46d MD5 | raw file
  1. ;;; io.clj -- Data I/O library for Clojure built on CSVReader
  2. ;; by David Edgar Liebke http://incanter.org
  3. ;; March 11, 2009
  4. ;; Copyright (c) David Edgar Liebke, 2009. All rights reserved. The use
  5. ;; and distribution terms for this software are covered by the Eclipse
  6. ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
  7. ;; which can be found in the file epl-v10.htincanter.at the root of this
  8. ;; distribution. By using this software in any fashion, you are
  9. ;; agreeing to be bound by the terms of this license. You must not
  10. ;; remove this notice, or any other, from this software.
  11. ;; CHANGE LOG
  12. ;; March 11, 2009: First version
  13. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  14. ;; DATA IO FUNCTIONS
  15. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  16. (ns
  17. #^{:doc
  18. "
  19. lib for handy io goodness.
  20. -ability to pretty print reports of various models
  21. -read in clojure and json literals in different ways; files, classpath resources, etc.
  22. -support for csv and sql
  23. http://stackoverflow.com/questions/613929/how-do-i-connect-to-a-mysql-database-from-clojure
  24. ;;example sql transformation
  25. (def stuff (sql-select
  26. (sql-unique \"something\")
  27. (sql-where (str \"something = \"something\"))
  28. (sql-from \"mytable\"))
  29. (sql-order-by \"foo, bar\")))
  30. (defn transform-query [t q] #(with-query-results res [(q %)] (t res)))
  31. (defn sql-transformer [key-query
  32. transform
  33. view-query
  34. output-file]
  35. (with-data key-query
  36. #(with-out-writer output-file
  37. (dorun (do-for :something % (transform-query transform view-query))))))
  38. (defn sql-to-hadoop [transform]
  39. (sql-transformer stuff transform myview \"/target/dir/preprocessed.pre\"))
  40. (defn transform-dates [] (sql-to-hadoop #(binding [*print-dup* true] (prn (preprocess %)))))
  41. "}
  42. incanter.io
  43. ;(:gen-class)
  44. (:import (java.io FileReader FileWriter File)
  45. (au.com.bytecode.opencsv CSVReader))
  46. (:use [incanter.core :only (dataset save)])
  47. (:use [org.danlarkin.json
  48. :only [decode-from-reader decode-from-str encode-to-str]])
  49. (:use [clojure.contrib.duck-streams :only [reader read-lines spit]])
  50. (:use [incanter.chrono :only [joda-date]])
  51. (:use [clojure.contrib.pprint :only [pprint]])
  52. (:use [incanter.classification :only [model-from-maps]])
  53. (:use [incanter.transformations :only [sort-map-of-maps all-keys]])
  54. (:use [clojure.contrib.java-utils :only [file]])
  55. (:use [clojure.contrib.sql
  56. :only [with-connection with-query-results]])
  57. (:use [clojure.contrib.str-utils :only [str-join]])
  58. (:use [incanter.core :only (dataset save)]))
  59. (defn- parse-string [value]
  60. (try (Integer/parseInt value)
  61. (catch NumberFormatException _
  62. (try (Double/parseDouble value)
  63. (catch NumberFormatException _ value)))))
  64. (defn- get-input-reader [location]
  65. (try
  66. (java.io.InputStreamReader. (.openStream (java.net.URL. location)))
  67. (catch java.net.MalformedURLException _
  68. (java.io.FileReader. location))))
  69. (defn read-dataset
  70. "
  71. Returns a dataset read from a file or a URL.
  72. Options:
  73. :delim (default \\,), other options (\\tab \\space \\| etc)
  74. :quote (default \\\") character used for quoting strings
  75. :skip (default 0) the number of lines to skip at the top of the file.
  76. :header (default false) indicates the file has a header line
  77. "
  78. ([filename & options]
  79. (let [opts (when options (apply assoc {} options))
  80. delim (or (:delim opts) \,) ; space delim default
  81. quote-char (or (:quote opts) \")
  82. skip (or (:skip opts) 0)
  83. header? (or (:header opts) false)]
  84. (with-open [reader #^CSVReader (CSVReader.
  85. (get-input-reader filename)
  86. delim
  87. quote-char
  88. skip)]
  89. (let [data-lines (map seq (seq (.readAll reader)))
  90. raw-data (filter #(> (count %) 0)
  91. (map (fn [line] (filter #(not= % "") line))
  92. data-lines))
  93. parsed-data (into [] (map (fn [row] (into [] (map parse-string row)))
  94. raw-data))
  95. ]
  96. (if header?
  97. ; have header row
  98. (dataset (first parsed-data) (rest parsed-data))
  99. ; no header row so build a default one
  100. (let [col-count (count (first parsed-data))
  101. col-names (apply vector (map str
  102. (repeat col-count "col")
  103. (iterate inc 0)))]
  104. (dataset col-names parsed-data))))))))
  105. (defmethod save incanter.Matrix [mat filename & options]
  106. (let [opts (when options (apply assoc {} options))
  107. delim (or (:delim opts) \,)
  108. header (or (:header opts) nil)
  109. append? (if (true? (:append opts)) true false)
  110. file-writer (java.io.FileWriter. filename append?)]
  111. (do
  112. (when (and header (not append?))
  113. (.write file-writer (str (first header)))
  114. (doseq [column-name (rest header)]
  115. (.write file-writer (str delim column-name)))
  116. (.write file-writer (str \newline)))
  117. (doseq [row mat]
  118. (if (number? row)
  119. (.write file-writer (str row \newline))
  120. (do
  121. (.write file-writer (str (first row)))
  122. (doseq [column (rest row)]
  123. (.write file-writer (str delim column)))
  124. (.write file-writer (str \newline)))))
  125. (.flush file-writer)
  126. (.close file-writer))))
  127. (defmethod save :incanter.core/dataset [dataset filename & options]
  128. (let [opts (when options (apply assoc {} options))
  129. delim (or (:delim opts) \,)
  130. header (or (:header opts) (:column-names dataset))
  131. append? (if (true? (:append opts)) true false)
  132. file-writer (java.io.FileWriter. filename append?)
  133. rows (:rows dataset)
  134. columns (:column-names dataset)]
  135. (do
  136. (when (and header (not append?))
  137. (.write file-writer (str (first header)))
  138. (doseq [column-name (rest header)]
  139. (.write file-writer (str delim column-name)))
  140. (.write file-writer (str \newline)))
  141. (doseq [row rows]
  142. (do
  143. (.write file-writer (str (row (first columns))))
  144. (doseq [column-name (rest columns)]
  145. (.write file-writer (str delim (row column-name))))
  146. (.write file-writer (str \newline))))
  147. (.flush file-writer)
  148. (.close file-writer))))
  149. (defn read-map
  150. [& keys]
  151. (into {} (for [k keys] [k (comp eval read-string)])))
  152. (defn string-date-read-map
  153. [& keys]
  154. (into {} (for [k keys] [k joda-date])))
  155. (defn read-json-file
  156. ""
  157. [f]
  158. (decode-from-reader (reader f)))
  159. ;;todo if we want this process to be lazy we can remove the doall.
  160. (defn read-json-lines
  161. [f]
  162. (doall
  163. (for [l (read-lines f)] (decode-from-str l))))
  164. ;;TODO: switch back to stream impl?
  165. (defn clj-to-json-file
  166. ""
  167. [c f]
  168. (spit (File. f)
  169. (encode-to-str c)))
  170. ;;doesn't work in maven builds. must use fn below.
  171. ;;(ClassLoader/getSystemResource f)))))
  172. (defn load-resource
  173. ""
  174. [f]
  175. (.getResourceAsStream
  176. (.getClassLoader
  177. (class *ns*)) f))
  178. (defn read-from-classpath
  179. ""
  180. [f]
  181. (reader (load-resource f)))
  182. (defn json-from-classpath
  183. ""
  184. [f]
  185. (decode-from-reader (read-from-classpath f)))
  186. (def report-model (comp pprint sort-map-of-maps model-from-maps))
  187. (defn package-model
  188. [file prob-map-tuple]
  189. (clj-to-json-file (model-from-maps prob-map-tuple) file))
  190. (defn unpackage-model
  191. [file]
  192. (read-json-file file))
  193. (defn into-file
  194. ""
  195. [filename stuff]
  196. (let [f (file filename)]
  197. (spit f stuff)))
  198. (defn csv-line
  199. "turn a vector into a csv line"
  200. [v]
  201. (let [commas (repeat (- (count v) 1) ", ")
  202. ;;the seperated list must be a vector so that conj appends
  203. ;;conj prepends for list type.
  204. seperated (into [] (interleave v commas))
  205. tail (last v)
  206. cells (conj seperated (str tail "\n"))]
  207. (apply str cells)))
  208. (defn csv-table
  209. "turn a 2-level map into a csv table"
  210. [m]
  211. (let [column-names (all-keys (vals m))
  212. rows (for [[k v] m]
  213. (cons k
  214. (for [name column-names] (if-let [val (v name)] val 0))))
  215. table (cons (cons "" column-names) rows)]
  216. (apply str
  217. (map csv-line table))))
  218. (defn with-mysql-results
  219. "
  220. takes dbinfo, query and a fn and applys the fn to query results.
  221. example dbinfo:
  222. {:host \"localhost\"
  223. :port 3306
  224. :name \"testimport\"
  225. :classname \"com.mysql.jdbc.Driver\"
  226. :subprotocol \"mysql\"
  227. :user \"root\"
  228. :password \"12345\"}
  229. "
  230. [dbinfo query f]
  231. (let [db (merge dbinfo {:subname (str "//" (:host dbinfo)
  232. ":" (:port dbinfo)
  233. "/" (:name dbinfo))})]
  234. (with-connection db
  235. (with-query-results rs [query] (f rs)))))
  236. (defn sql-query
  237. ""
  238. [d q]
  239. (let [printer #(println (:internaluage :iso_code %))]
  240. (with-mysql-results d q
  241. #(dorun (map printer %)))))
  242. (defn query
  243. ""
  244. [table sample & columns]
  245. (str "select " (str-join ", " columns)
  246. " from " table
  247. " limit " sample))
  248. (defn sql-select
  249. ""
  250. [& x]
  251. (str-join " " (cons "select" x)))
  252. (def sql-from #(str "from " %))
  253. (def sql-unique #(str "distinct " %))
  254. (def sql-limit #(str "limit " %))
  255. (def random-row "order by rand()")
  256. (defn sql-order-by
  257. ""
  258. [c]
  259. (str "order by " c))
  260. (defn sql-where
  261. ""
  262. [pred]
  263. (str "where " pred))
  264. (defn columns
  265. ""
  266. [& x]
  267. (str-join ", " x))