PageRenderTime 42ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/test/utest.rb

https://github.com/dsturnbull/mrtoolkit
Ruby | 471 lines | 380 code | 66 blank | 25 comment | 0 complexity | 441ce3be721acb3acb5b59ff269bfb4f MD5 | raw file
  1. require 'test/unit'
  2. require 'mrtoolkit'
  3. require 'regression'
  4. ##############################
  5. # Log example.
  6. #
  7. # Reformats the date and time into one field.
  8. # Reducer adds an extra column
  9. JobBase.testing(true)
  10. class LogMap < MapBase
  11. def declare
  12. field :date
  13. field :time
  14. field :url
  15. emit :date_time
  16. emit :url
  17. end
  18. def process(input, output)
  19. output.date_time = input.date + "T" + input.time
  20. output.url = input.url
  21. output
  22. end
  23. end
  24. class LogReduce < ReduceBase
  25. def declare
  26. field :date_time
  27. field :url
  28. emit :date_time
  29. emit :url
  30. emit :junk
  31. end
  32. def process(input, output)
  33. output = copy_struct input, output
  34. output.junk = "x"
  35. output
  36. end
  37. end
  38. class LogJob < JobBase
  39. def job
  40. mapper LogMap
  41. reducer LogReduce
  42. infiles "test-in/test1-in"
  43. outfiles "test-out"
  44. end
  45. end
  46. class TestMRToolkit < Test::Unit::TestCase
  47. def test_log
  48. LogJob.run_command
  49. out = File.read("test-out")
  50. expected = "2008-10-01T10:30:00\t1.2.3.4\tx\n" +
  51. "2008-10-02T11:30:00\t1.2.3.5\tx\n"
  52. assert_equal(expected, out)
  53. end
  54. end
  55. ##########################################
  56. #
  57. # Computs count, total, and sum of squares.
  58. class SumMap < MapBase
  59. def declare
  60. field :value
  61. emit :count
  62. emit :total
  63. emit :sum_of_squares
  64. end
  65. def process(input, output)
  66. v = input.value.to_f
  67. output.count = 1
  68. output.total = v
  69. output.sum_of_squares = v * v
  70. output
  71. end
  72. end
  73. # This could be done with canned reducer
  74. class MySumReduce < ReduceBase
  75. def declare
  76. field :count
  77. field :total
  78. field :sum_of_squares
  79. emit :count
  80. emit :total
  81. emit :sum_of_squares
  82. end
  83. def process_begin(dummy, output)
  84. @count = 0
  85. @total = 0
  86. @sum_of_squares = 0
  87. nil
  88. end
  89. def process(input, output)
  90. @count += input.count.to_f
  91. @total += input.total.to_f
  92. @sum_of_squares += input.sum_of_squares.to_f
  93. nil
  94. end
  95. def process_end(dummy, output)
  96. output.count = @count
  97. output.total = @total
  98. output.sum_of_squares = @sum_of_squares
  99. output
  100. end
  101. end
  102. class SumJob < JobBase
  103. def job
  104. mapper SumMap
  105. reducer MySumReduce
  106. infiles "test-in/test2-in"
  107. outfiles "test-out"
  108. end
  109. end
  110. class TestMRToolkit < Test::Unit::TestCase
  111. def test_sum
  112. SumJob.run_command
  113. out = File.read("test-out")
  114. expected = "4.0\t43.0\t1005.0\n"
  115. assert_equal(expected, out)
  116. end
  117. end
  118. ######################################
  119. #
  120. # Grops times into one-minute buckets
  121. # Calculates counts for each bucket
  122. require 'parsedate'
  123. class MinMap < MapBase
  124. def declare
  125. field :dt
  126. field :tm
  127. emit :minute
  128. emit :count
  129. end
  130. def process(input, output)
  131. res = ParseDate.parsedate(input.dt + " " + input.tm)
  132. t = Time.local(*res)
  133. min = t.min + 60 * (t.hour + 24 * t.wday)
  134. output.count = 1
  135. output.minute = min
  136. output
  137. end
  138. end
  139. class MyMinReduce < ReduceBase
  140. def declare
  141. field :minute
  142. field :count
  143. emit :min
  144. emit :count
  145. end
  146. def process_init(input, output)
  147. @count = 0
  148. nil
  149. end
  150. def process_each(input, output)
  151. @count += 1
  152. nil
  153. end
  154. def process_term(input, output)
  155. output.min = @last
  156. output.count = @count
  157. output
  158. end
  159. end
  160. class MyMinJob < JobBase
  161. def job
  162. mapper MinMap
  163. reducer MyMinReduce
  164. infiles "test-in/test3-in"
  165. outfiles "test-out"
  166. end
  167. end
  168. class TestMRToolkit < Test::Unit::TestCase
  169. def test_min
  170. MyMinJob.run_command
  171. out = File.read("test-out")
  172. expected = "8460\t1\n" +
  173. "8461\t1\n" +
  174. "8470\t3\n"
  175. assert_equal(expected, out)
  176. end
  177. end
  178. #################################
  179. #
  180. # This is the previous one, but with a standard reducer.
  181. class CollectJob < JobBase
  182. def job
  183. mapper MinMap
  184. reducer CopyReduce, 1
  185. infiles "test-in/test3-in"
  186. outfiles "test-out"
  187. end
  188. end
  189. class TestMRToolkit < Test::Unit::TestCase
  190. def test_collect
  191. CollectJob.run_command
  192. out = File.read("test-out")
  193. expected = "8460\n" +
  194. "8461\n" +
  195. "8470\n" +
  196. "8470\n" +
  197. "8470\n"
  198. assert_equal(expected, out)
  199. end
  200. end
  201. #################################
  202. #
  203. # This is the previous one, but with adifferent
  204. # standard reducer. This produces the same output
  205. # as the custom reducer.
  206. class UniqueJob < JobBase
  207. def job
  208. mapper MinMap
  209. reducer UniqueReduce
  210. infiles "test-in/test3-in"
  211. outfiles "test-out"
  212. end
  213. end
  214. class TestMRToolkit < Test::Unit::TestCase
  215. def test_unique
  216. UniqueJob.run_command
  217. out = File.read("test-out")
  218. expected = "8460\n" +
  219. "8461\n" +
  220. "8470\n"
  221. assert_equal(expected, out)
  222. end
  223. end
  224. ###############################
  225. #
  226. # Exercises SumReduce, which sums a variable
  227. # set of columns.
  228. class GSumJob < JobBase
  229. def job
  230. mapper CopyMap, 3
  231. reducer SumReduce, 3
  232. infiles "test-in/test6-in"
  233. outfiles "test-out"
  234. end
  235. end
  236. class TestMRToolkit < Test::Unit::TestCase
  237. def test_gsum
  238. GSumJob.run_command
  239. out = File.read("test-out")
  240. expected = "12.0\t9.0\t8.0\n"
  241. assert_equal(expected, out)
  242. end
  243. end
  244. class SelectJob < JobBase
  245. def job
  246. mapper SelectMap, /^10[23]/
  247. reducer CopyReduce
  248. infiles "test-in/test5-in"
  249. outfiles "test-out"
  250. end
  251. end
  252. class TestMRToolkit < Test::Unit::TestCase
  253. def test_select
  254. SelectJob.run_command
  255. out = File.read("test-out")
  256. expected = "102\n102\n102\n102\n103\n"
  257. assert_equal(expected, out)
  258. end
  259. end
  260. class SampleJob < JobBase
  261. def job
  262. mapper CopyMap, 3
  263. reducer SampleReduce, 10
  264. infiles "test-in/test7-in"
  265. outfiles "test-out"
  266. end
  267. end
  268. class TestMRToolkit < Test::Unit::TestCase
  269. def test_sample
  270. srand 1234
  271. SampleJob.run_command
  272. out = File.read("test-out")
  273. expected = "5\n20\n7\n12\n2\n8\n3\n16\n17\n18\n"
  274. assert_equal(expected, out)
  275. end
  276. end
  277. class MaxJob < JobBase
  278. def job
  279. mapper CopyMap, 3
  280. reducer MaxReduce, 3
  281. infiles "test-in/test4-in"
  282. outfiles "test-out"
  283. end
  284. end
  285. class TestMRToolkit < Test::Unit::TestCase
  286. def test_max
  287. MaxJob.run_command
  288. out = File.read("test-out")
  289. expected = "4\t10\n3\t3\n2\t2\n"
  290. assert_equal(expected, out)
  291. end
  292. end
  293. class MinJob < JobBase
  294. def job
  295. mapper CopyMap, 3
  296. reducer MinReduce, 3
  297. infiles "test-in/test4-in"
  298. outfiles "test-out"
  299. end
  300. end
  301. class TestMRToolkit < Test::Unit::TestCase
  302. def test_min
  303. MinJob.run_command
  304. out = File.read("test-out")
  305. expected = "3\t3\n2\t2\n1\t1\n"
  306. assert_equal(expected, out)
  307. end
  308. end
  309. class UniqueSumJob < JobBase
  310. def job
  311. mapper CopyMap, 2
  312. reducer UniqueSumReduce
  313. infiles "test-in/test5-in"
  314. outfiles "test-out"
  315. end
  316. end
  317. class TestMRToolkit < Test::Unit::TestCase
  318. def test_unique_sum
  319. UniqueSumJob.run_command
  320. out = File.read("test-out")
  321. expected = "100\t3\n101\t2\n102\t4\n103\t1\n104\t2\n"
  322. assert_equal(expected, out)
  323. end
  324. end
  325. class UniqueCountJob < JobBase
  326. def job
  327. mapper CopyMap
  328. reducer UniqueCountReduce
  329. infiles "test-in/test5-in"
  330. outfiles "test-out"
  331. end
  332. end
  333. class TestMRToolkit < Test::Unit::TestCase
  334. def test_unique_count
  335. UniqueCountJob.run_command
  336. out = File.read("test-out")
  337. expected = "100\t3\n101\t2\n102\t4\n103\t1\n104\t2\n"
  338. assert_equal(expected, out)
  339. end
  340. end
  341. class MaxUniqueSumJob < JobBase
  342. def job
  343. mapper CopyMap, 3
  344. reducer MaxUniqueSumReduce, 3
  345. infiles "test-in/test5-in"
  346. outfiles "test-out"
  347. end
  348. end
  349. class TestMRToolkit < Test::Unit::TestCase
  350. def test_max_unique_sum
  351. MaxUniqueSumJob.run_command
  352. out = File.read("test-out")
  353. expected = "102\t4\n100\t3\n101\t2\n"
  354. assert_equal(expected, out)
  355. end
  356. end
  357. class UniqueIndexedSumJob < JobBase
  358. def job
  359. mapper CopyMap, 3
  360. reducer UniqueIndexedSumReduce, 3
  361. infiles "test-in/test8-in"
  362. outfiles "test-out"
  363. end
  364. end
  365. class TestMRToolkit < Test::Unit::TestCase
  366. def test_unique_indexed_sum
  367. UniqueIndexedSumJob.run_command
  368. out = File.read("test-out")
  369. expected = "100\t1000\t3\n100\t1001\t1\n200\t1000\t2\n200\t1001\t1\n"
  370. assert_equal(expected, out)
  371. end
  372. end
  373. class UniqueFirstJob < JobBase
  374. def job
  375. mapper CopyMap, 4
  376. reducer UniqueFirstReduce, 3, 1
  377. infiles "test-in/test9-in"
  378. outfiles "test-out"
  379. end
  380. end
  381. class TestMRToolkit < Test::Unit::TestCase
  382. def test_unique_first
  383. UniqueFirstJob.run_command
  384. out = File.read("test-out")
  385. expected = "a\ta\ta\nx1\ty1\tz1\n"
  386. assert_equal(expected, out)
  387. end
  388. end
  389. class TestRegression < Test::Unit::TestCase
  390. def test_regress
  391. x = [1, 2, 3]
  392. y = [1, 2, 3]
  393. reg = LinearRegression.new(x, y)
  394. assert_equal([1, 2, 3], reg.fit(x))
  395. x = [1, 2, 3, 4]
  396. y = [1, 5, 5, 9]
  397. reg = LinearRegression.new(x, y)
  398. assert_equal(2, reg.slope)
  399. assert_equal(0, reg.offset)
  400. y = [1, 5, 5, 9]
  401. reg = LinearRegression.new(x, y)
  402. assert_equal(2, reg.slope)
  403. assert_equal(0, reg.offset)
  404. end
  405. end