PageRenderTime 45ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/src/main/java/nl/tudelft/ewi/se/in4355/server/jobs/wordcount/WordCountJob.scala

https://gitlab.com/fptudelft/in4355-fp-project
Scala | 84 lines | 65 code | 19 blank | 0 comment | 6 complexity | 7e51db62c85ce94f54f8bfc0bc798aba MD5 | raw file
  1. package nl.tudelft.ewi.se.in4355.server.jobs.wordcount
  2. import scala.collection.JavaConversions
  3. import com.google.gson.reflect.TypeToken
  4. import nl.tudelft.ewi.se.in4355.server.jobs.MapTask
  5. import nl.tudelft.ewi.se.in4355.server.jobs.TaskTracker
  6. import nl.tudelft.ewi.se.in4355.server.jobs.ReduceTask
  7. import java.util.concurrent.Callable
  8. class WordCountJob(val inputFile: String) extends Callable[WordIndex] {
  9. val tracker = TaskTracker;
  10. def call(): WordIndex = reduceAll(map());
  11. private def map(): WordIndex = {
  12. var results = new WordIndex();
  13. val data = readLines(inputFile).grouped(500).map((x) => JavaConversions.seqAsJavaList(x)).toList;
  14. println("Mapping " + data.size + " data packages");
  15. val mapTask = new MapTask[java.util.List[String], WordCountList](read("wordcount-mappercombiner.js"), data, new TypeToken[WordCountList]() {}) {
  16. def handleAnswer(result: WordCountList) {
  17. for (index <- 0 to result.wordCounts.size - 1) {
  18. val count = result.wordCounts.get(index);
  19. results.insert(count);
  20. }
  21. }
  22. };
  23. tracker.submitTask(mapTask);
  24. while (!mapTask.completed) {
  25. Thread.sleep(100);
  26. }
  27. return results;
  28. }
  29. private def reduceAll(results: WordIndex): WordIndex = {
  30. var size = 0;
  31. var prevSize = -1;
  32. while (size != prevSize) {
  33. var reduceData = results.takeAll;
  34. prevSize = size;
  35. size = reduceData.size;
  36. var groupedData = reduceData.grouped(10000).map((x) => JavaConversions.seqAsJavaList(x)).toList;
  37. println("Reducing " + groupedData.size + " data packages (" + size + " words total)");
  38. reduce(results, groupedData);
  39. }
  40. return results;
  41. }
  42. private def reduce(results: WordIndex, groupedData: List[java.util.List[WordCount]]) {
  43. val reduceTask = new ReduceTask[java.util.List[WordCount], WordCountList](read("wordcount-reducer.js"), groupedData, new TypeToken[WordCountList]() {}) {
  44. def handleAnswer(result: WordCountList) {
  45. for (index <- 0 to result.wordCounts.size - 1) {
  46. val count = result.wordCounts.get(index);
  47. results.insert(count);
  48. }
  49. }
  50. };
  51. tracker.submitTask(reduceTask);
  52. while (!reduceTask.completed) {
  53. Thread.sleep(100);
  54. }
  55. }
  56. private def read(fileName: String) = {
  57. readLines(fileName).foldLeft("")((x, y) => (x.+("\n" + y)));
  58. }
  59. private def readLines(fileName: String) = {
  60. scala.io.Source.fromInputStream(resourceStream(fileName)).getLines().toList.filter((s: String) => !s.isEmpty())
  61. }
  62. private def resourceStream(fileName: String) = {
  63. getClass.getResourceAsStream("/" + fileName)
  64. }
  65. }