/test/pig/datafu/test/pig/util/CoalesceTests.java

https://github.com/mbastian/datafu · Java · 513 lines · 260 code · 65 blank · 188 comment · 9 complexity · 2da2cb7ad1b55642569c391c5ab267bb MD5 · raw file

  1. package datafu.test.pig.util;
  2. import java.util.List;
  3. import junit.framework.Assert;
  4. import org.adrianwalker.multilinestring.Multiline;
  5. import org.apache.pig.data.Tuple;
  6. import org.apache.pig.impl.logicalLayer.FrontendException;
  7. import org.apache.pig.pigunit.PigTest;
  8. import org.joda.time.DateTime;
  9. import org.joda.time.DateTimeZone;
  10. import org.testng.annotations.Test;
  11. import datafu.test.pig.PigTests;
  12. public class CoalesceTests extends PigTests
  13. {
  14. /**
  15. register $JAR_PATH
  16. define COALESCE datafu.pig.util.Coalesce();
  17. data = LOAD 'input' using PigStorage(',') AS (testcase:INT,val1:INT,val2:INT,val3:INT);
  18. data2 = FOREACH data GENERATE testcase, COALESCE(val1,val2,val3) as result;
  19. describe data2;
  20. data3 = FOREACH data2 GENERATE testcase, result;
  21. STORE data3 INTO 'output';
  22. */
  23. @Multiline private static String coalesceIntTest;
  24. @Test
  25. public void coalesceIntTest() throws Exception
  26. {
  27. PigTest test = createPigTestFromString(coalesceIntTest);
  28. this.writeLinesToFile("input", "1,1,2,3",
  29. "2,,2,3",
  30. "3,,,3",
  31. "4,,,",
  32. "5,1,,3",
  33. "6,1,,");
  34. test.runScript();
  35. List<Tuple> lines = this.getLinesForAlias(test, "data3");
  36. Assert.assertEquals(6, lines.size());
  37. for (Tuple t : lines)
  38. {
  39. switch((Integer)t.get(0))
  40. {
  41. case 1:
  42. Assert.assertEquals(1, t.get(1)); break;
  43. case 2:
  44. Assert.assertEquals(2, t.get(1)); break;
  45. case 3:
  46. Assert.assertEquals(3, t.get(1)); break;
  47. case 4:
  48. Assert.assertEquals(null, t.get(1)); break;
  49. case 5:
  50. Assert.assertEquals(1, t.get(1)); break;
  51. case 6:
  52. Assert.assertEquals(1, t.get(1)); break;
  53. default:
  54. Assert.fail("Did not expect: " + t.get(1));
  55. }
  56. }
  57. }
  58. /**
  59. register $JAR_PATH
  60. define COALESCE datafu.pig.util.Coalesce();
  61. data = LOAD 'input' using PigStorage(',') AS (testcase:INT,val1:LONG);
  62. data2 = FOREACH data GENERATE testcase, COALESCE(val1,100L) as result;
  63. describe data2;
  64. data3 = FOREACH data2 GENERATE testcase, result;
  65. data4 = FOREACH data3 GENERATE testcase, result*100 as result;
  66. STORE data4 INTO 'output';
  67. */
  68. @Multiline private static String coalesceLongTest;
  69. @Test
  70. public void coalesceLongTest() throws Exception
  71. {
  72. PigTest test = createPigTestFromString(coalesceLongTest);
  73. this.writeLinesToFile("input", "1,5",
  74. "2,");
  75. test.runScript();
  76. List<Tuple> lines = this.getLinesForAlias(test, "data4");
  77. Assert.assertEquals(2, lines.size());
  78. for (Tuple t : lines)
  79. {
  80. switch((Integer)t.get(0))
  81. {
  82. case 1:
  83. Assert.assertEquals(500L, t.get(1)); break;
  84. case 2:
  85. Assert.assertEquals(10000L, t.get(1)); break;
  86. default:
  87. Assert.fail("Did not expect: " + t.get(1));
  88. }
  89. }
  90. }
  91. /**
  92. register $JAR_PATH
  93. define COALESCE datafu.pig.util.Coalesce();
  94. data = LOAD 'input' using PigStorage(',') AS (testcase:INT,val1:LONG);
  95. data2 = FOREACH data GENERATE testcase, COALESCE(val1,100) as result;
  96. describe data2;
  97. data3 = FOREACH data2 GENERATE testcase, result;
  98. data4 = FOREACH data3 GENERATE testcase, result*100 as result;
  99. STORE data4 INTO 'output';
  100. */
  101. @Multiline private static String coalesceCastIntToLongTestFails;
  102. // The first parameter is a long and the fixed value is an int.
  103. // They cannot be merged without the lazy option.
  104. @Test(expectedExceptions=FrontendException.class)
  105. public void coalesceCastIntToLongTestFails() throws Exception
  106. {
  107. PigTest test = createPigTestFromString(coalesceCastIntToLongTestFails);
  108. this.writeLinesToFile("input", "1,5",
  109. "2,");
  110. test.runScript();
  111. List<Tuple> lines = this.getLinesForAlias(test, "data4");
  112. Assert.assertEquals(2, lines.size());
  113. for (Tuple t : lines)
  114. {
  115. switch((Integer)t.get(0))
  116. {
  117. case 1:
  118. Assert.assertEquals(500L, t.get(1)); break;
  119. case 2:
  120. Assert.assertEquals(10000L, t.get(1)); break;
  121. default:
  122. Assert.fail("Did not expect: " + t.get(1));
  123. }
  124. }
  125. }
  126. /**
  127. register $JAR_PATH
  128. define COALESCE datafu.pig.util.Coalesce('lazy');
  129. data = LOAD 'input' using PigStorage(',') AS (testcase:INT,val1:LONG);
  130. data2 = FOREACH data GENERATE testcase, COALESCE(val1,100) as result;
  131. describe data2;
  132. data3 = FOREACH data2 GENERATE testcase, result;
  133. data4 = FOREACH data3 GENERATE testcase, result*100 as result;
  134. STORE data4 INTO 'output';
  135. */
  136. @Multiline private static String coalesceIntAndLongTest;
  137. // The first parameter is a long and the fixed value is an int.
  138. // They are merged to a long.
  139. @Test
  140. public void coalesceCastIntToLongTest1() throws Exception
  141. {
  142. PigTest test = createPigTestFromString(coalesceIntAndLongTest);
  143. this.writeLinesToFile("input", "1,5",
  144. "2,");
  145. test.runScript();
  146. List<Tuple> lines = this.getLinesForAlias(test, "data4");
  147. Assert.assertEquals(2, lines.size());
  148. for (Tuple t : lines)
  149. {
  150. switch((Integer)t.get(0))
  151. {
  152. case 1:
  153. Assert.assertEquals(500L, t.get(1)); break;
  154. case 2:
  155. Assert.assertEquals(10000L, t.get(1)); break;
  156. default:
  157. Assert.fail("Did not expect: " + t.get(1));
  158. }
  159. }
  160. }
  161. /**
  162. register $JAR_PATH
  163. define COALESCE datafu.pig.util.Coalesce('lazy');
  164. data = LOAD 'input' using PigStorage(',') AS (testcase:INT,val1:INT);
  165. data2 = FOREACH data GENERATE testcase, COALESCE(val1,100L) as result;
  166. describe data2;
  167. data3 = FOREACH data2 GENERATE testcase, result;
  168. data4 = FOREACH data3 GENERATE testcase, result*100 as result;
  169. STORE data4 INTO 'output';
  170. */
  171. @Multiline private static String coalesceIntAndLongTest2;
  172. // The first parameter is an int, but the fixed parameter is a long.
  173. // They are merged to a long.
  174. @Test
  175. public void coalesceCastIntToLongTest2() throws Exception
  176. {
  177. PigTest test = createPigTestFromString(coalesceIntAndLongTest2);
  178. this.writeLinesToFile("input", "1,5",
  179. "2,");
  180. test.runScript();
  181. List<Tuple> lines = this.getLinesForAlias(test, "data4");
  182. Assert.assertEquals(2, lines.size());
  183. for (Tuple t : lines)
  184. {
  185. switch((Integer)t.get(0))
  186. {
  187. case 1:
  188. Assert.assertEquals(500L, t.get(1)); break;
  189. case 2:
  190. Assert.assertEquals(10000L, t.get(1)); break;
  191. default:
  192. Assert.fail("Did not expect: " + t.get(1));
  193. }
  194. }
  195. }
  196. /**
  197. register $JAR_PATH
  198. define COALESCE datafu.pig.util.Coalesce('lazy');
  199. data = LOAD 'input' using PigStorage(',') AS (testcase:INT,val1:INT);
  200. data2 = FOREACH data GENERATE testcase, COALESCE(val1,100.0) as result;
  201. describe data2;
  202. data3 = FOREACH data2 GENERATE testcase, result;
  203. data4 = FOREACH data3 GENERATE testcase, result*100 as result;
  204. STORE data4 INTO 'output';
  205. */
  206. @Multiline private static String coalesceIntAndDoubleTest;
  207. // The first parameter is an int, but the fixed parameter is a long.
  208. // They are merged to a long.
  209. @Test
  210. public void coalesceCastIntToDoubleTest() throws Exception
  211. {
  212. PigTest test = createPigTestFromString(coalesceIntAndDoubleTest);
  213. this.writeLinesToFile("input", "1,5",
  214. "2,");
  215. test.runScript();
  216. List<Tuple> lines = this.getLinesForAlias(test, "data4");
  217. Assert.assertEquals(2, lines.size());
  218. for (Tuple t : lines)
  219. {
  220. switch((Integer)t.get(0))
  221. {
  222. case 1:
  223. Assert.assertEquals(500.0, t.get(1)); break;
  224. case 2:
  225. Assert.assertEquals(10000.0, t.get(1)); break;
  226. default:
  227. Assert.fail("Did not expect: " + t.get(1));
  228. }
  229. }
  230. }
  231. /**
  232. register $JAR_PATH
  233. define COALESCE datafu.pig.util.Coalesce();
  234. data = LOAD 'input' using PigStorage(',') AS (testcase:INT,val1:LONG);
  235. data = FOREACH data GENERATE testcase, (val1 IS NOT NULL ? ToDate(val1) : (datetime)null) as val1;
  236. data2 = FOREACH data GENERATE testcase, COALESCE(val1,ToDate('1970-01-01T00:00:00.000Z')) as result;
  237. --describe data2;
  238. data3 = FOREACH data2 GENERATE testcase, result;
  239. STORE data3 INTO 'output';
  240. */
  241. @Multiline private static String coalesceCastIntToDatetimeTest;
  242. @Test
  243. public void coalesceCastIntToDatetimeTest() throws Exception
  244. {
  245. PigTest test = createPigTestFromString(coalesceCastIntToDatetimeTest);
  246. this.writeLinesToFile("input", "1,1375826183000",
  247. "2,");
  248. test.runScript();
  249. List<Tuple> lines = this.getLinesForAlias(test, "data3");
  250. Assert.assertEquals(2, lines.size());
  251. for (Tuple t : lines)
  252. {
  253. Integer testcase = (Integer)t.get(0);
  254. Assert.assertNotNull(testcase);
  255. switch(testcase)
  256. {
  257. case 1:
  258. Assert.assertEquals("2013-08-06T21:56:23.000Z", ((DateTime)t.get(1)).toDateTime(DateTimeZone.UTC).toString()); break;
  259. case 2:
  260. Assert.assertEquals("1970-01-01T00:00:00.000Z", t.get(1).toString()); break;
  261. default:
  262. Assert.fail("Did not expect: " + t.get(1));
  263. }
  264. }
  265. }
  266. /**
  267. register $JAR_PATH
  268. define COALESCE datafu.pig.util.Coalesce('lazy');
  269. data = LOAD 'input' using PigStorage(',') AS (testcase:INT,val1:LONG);
  270. data = FOREACH data GENERATE testcase, (val1 IS NOT NULL ? ToDate(val1) : (datetime)null) as val1;
  271. data2 = FOREACH data GENERATE testcase, COALESCE(val1,ToDate('1970-01-01T00:00:00.000Z')) as result;
  272. --describe data2;
  273. data3 = FOREACH data2 GENERATE testcase, result;
  274. STORE data3 INTO 'output';
  275. */
  276. @Multiline private static String coalesceCastIntToDatetimeLazyTest;
  277. @Test
  278. public void coalesceCastIntToDatetimeLazyTest() throws Exception
  279. {
  280. PigTest test = createPigTestFromString(coalesceCastIntToDatetimeLazyTest);
  281. this.writeLinesToFile("input", "1,1375826183000",
  282. "2,");
  283. test.runScript();
  284. List<Tuple> lines = this.getLinesForAlias(test, "data3");
  285. Assert.assertEquals(2, lines.size());
  286. for (Tuple t : lines)
  287. {
  288. Integer testcase = (Integer)t.get(0);
  289. Assert.assertNotNull(testcase);
  290. switch(testcase)
  291. {
  292. case 1:
  293. Assert.assertEquals("2013-08-06T21:56:23.000Z", ((DateTime)t.get(1)).toDateTime(DateTimeZone.UTC).toString()); break;
  294. case 2:
  295. Assert.assertEquals("1970-01-01T00:00:00.000Z", t.get(1).toString()); break;
  296. default:
  297. Assert.fail("Did not expect: " + t.get(1));
  298. }
  299. }
  300. }
  301. /**
  302. register $JAR_PATH
  303. define COALESCE datafu.pig.util.Coalesce();
  304. data = LOAD 'input' using PigStorage(',') AS (testcase:INT,val1:INT,val2:LONG);
  305. data2 = FOREACH data GENERATE testcase, COALESCE(val1,val2) as result;
  306. describe data2;
  307. data3 = FOREACH data2 GENERATE testcase, result;
  308. STORE data3 INTO 'output';
  309. */
  310. @Multiline private static String coalesceBagIncompatibleTypeTest;
  311. @Test(expectedExceptions=FrontendException.class)
  312. public void coalesceBagIncompatibleTypeTest() throws Exception
  313. {
  314. PigTest test = createPigTestFromString(coalesceBagIncompatibleTypeTest);
  315. this.writeLinesToFile("input", "1,1,2L}");
  316. test.runScript();
  317. this.getLinesForAlias(test, "data3");
  318. }
  319. /**
  320. register $JAR_PATH
  321. define COALESCE datafu.pig.util.Coalesce('lazy');
  322. define EmptyBagToNullFields datafu.pig.bags.EmptyBagToNullFields();
  323. input1 = LOAD 'input1' using PigStorage(',') AS (val1:INT,val2:INT);
  324. input2 = LOAD 'input2' using PigStorage(',') AS (val1:INT,val2:INT);
  325. input3 = LOAD 'input3' using PigStorage(',') AS (val1:INT,val2:INT);
  326. data4 = COGROUP input1 BY val1,
  327. input2 BY val1,
  328. input3 BY val1;
  329. dump data4;
  330. data4 = FOREACH data4 GENERATE
  331. FLATTEN(input1),
  332. FLATTEN(EmptyBagToNullFields(input2)),
  333. FLATTEN(EmptyBagToNullFields(input3));
  334. dump data4;
  335. describe data4;
  336. data5 = FOREACH data4 GENERATE input1::val1 as val1, COALESCE(input2::val2,0L) as val2, COALESCE(input3::val2,0L) as val3;
  337. --describe data5;
  338. STORE data5 INTO 'output';
  339. */
  340. @Multiline private static String leftJoinTest;
  341. @Test
  342. public void leftJoinTest() throws Exception
  343. {
  344. PigTest test = createPigTestFromString(leftJoinTest);
  345. this.writeLinesToFile("input1", "1,1",
  346. "2,2",
  347. "5,5");
  348. this.writeLinesToFile("input2", "1,10",
  349. "3,30",
  350. "5,50");
  351. this.writeLinesToFile("input3", "2,100",
  352. "5,500");
  353. test.runScript();
  354. List<Tuple> lines = this.getLinesForAlias(test, "data5");
  355. Assert.assertEquals(3, lines.size());
  356. for (Tuple t : lines)
  357. {
  358. switch((Integer)t.get(0))
  359. {
  360. case 1:
  361. Assert.assertEquals(10L, t.get(1));
  362. Assert.assertEquals(0L, t.get(2));
  363. break;
  364. case 2:
  365. Assert.assertEquals(0L, t.get(1));
  366. Assert.assertEquals(100L, t.get(2));
  367. break;
  368. case 5:
  369. Assert.assertEquals(50L, t.get(1));
  370. Assert.assertEquals(500L, t.get(2));
  371. break;
  372. default:
  373. Assert.fail("Did not expect: " + t.get(0));
  374. }
  375. }
  376. }
  377. }