/test/tech/v3/dataset/smile/data_test.clj

https://github.com/techascent/tech.ml.dataset · Clojure · 45 lines · 37 code · 4 blank · 4 comment · 4 complexity · 03aac486b60cf75ae7e3908c7b6b095d MD5 · raw file

  1. (ns tech.v3.dataset.smile.data-test
  2. (:require [tech.v3.dataset :as ds]
  3. [tech.v3.datatype.functional :as dfn]
  4. [tech.v3.libs.smile.data :as smile-data]
  5. [clojure.test :refer [deftest is]])
  6. (:import [smile.data DataFrame]))
  7. (deftest stocks-test
  8. (let [stocks (ds/->dataset "test/data/stocks.csv")
  9. df-stocks (smile-data/dataset->smile-dataframe stocks)
  10. new-val (smile-data/smile-dataframe->dataset df-stocks)]
  11. (is (instance? DataFrame df-stocks))
  12. ;;Datetime types included
  13. (is (= (vec ((ds/ensure-array-backed stocks) "date"))
  14. (vec (new-val "date"))))
  15. (is (= (vec ((ds/ensure-array-backed stocks) "symbol"))
  16. (vec (new-val "symbol"))))
  17. (is (dfn/equals (stocks "price")
  18. (new-val "price")))))
  19. (deftest ames-test
  20. (let [ames-src (-> (ds/->dataset "test/data/ames-house-prices/train.csv")
  21. (ds/select-rows (range 10)))
  22. ames-ary (ds/ensure-array-backed ames-src)
  23. df-ames (smile-data/dataset->smile-dataframe ames-ary)
  24. new-val (smile-data/smile-dataframe->dataset df-ames)]
  25. (is (every? = (map vector
  26. (map (comp :datatype meta) (vals ames-src))
  27. (map (comp :datatype meta) (vals ames-ary))
  28. (map (comp :datatype meta) (vals new-val))
  29. )))
  30. (is (java.util.Objects/equals (ds/missing ames-src)
  31. (ds/missing ames-ary)))
  32. ;;Missing for booleans gets lost in the translation with inference turned on.
  33. #_(is (java.util.Objects/equals (ds/missing ames-src)
  34. (ds/missing new-val)))
  35. (is (instance? DataFrame df-ames))
  36. ;;Datetime types included
  37. (is (= (vec (ames-src "SalePrice"))
  38. (vec (new-val "SalePrice"))))
  39. ;;Missing for booleans gets lost in the translation with inference turned on.
  40. #_(is (= (vec (ames-src "PoolQC"))
  41. (vec (new-val "PoolQC"))))))