in4355-fp-project /src/main/java/nl/tudelft/ewi/se/in4355/server/jobs/wordcount/WordCountJob.scala

Language Scala Lines 85
MD5 Hash 7e51db62c85ce94f54f8bfc0bc798aba
Repository https://gitlab.com/fptudelft/in4355-fp-project.git View Raw File
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
package nl.tudelft.ewi.se.in4355.server.jobs.wordcount

import scala.collection.JavaConversions
import com.google.gson.reflect.TypeToken
import nl.tudelft.ewi.se.in4355.server.jobs.MapTask
import nl.tudelft.ewi.se.in4355.server.jobs.TaskTracker
import nl.tudelft.ewi.se.in4355.server.jobs.ReduceTask
import java.util.concurrent.Callable

class WordCountJob(val inputFile: String) extends Callable[WordIndex] {

  val tracker = TaskTracker;

  def call(): WordIndex = reduceAll(map());

  private def map(): WordIndex = {
    var results = new WordIndex();

    val data = readLines(inputFile).grouped(500).map((x) => JavaConversions.seqAsJavaList(x)).toList;
    println("Mapping " + data.size + " data packages");

    val mapTask = new MapTask[java.util.List[String], WordCountList](read("wordcount-mappercombiner.js"), data, new TypeToken[WordCountList]() {}) {
      def handleAnswer(result: WordCountList) {
        for (index <- 0 to result.wordCounts.size - 1) {
          val count = result.wordCounts.get(index);
          results.insert(count);
        }
      }
    };
    tracker.submitTask(mapTask);

    while (!mapTask.completed) {
      Thread.sleep(100);
    }

    return results;
  }

  private def reduceAll(results: WordIndex): WordIndex = {
    var size = 0;
    var prevSize = -1;

    while (size != prevSize) {
      var reduceData = results.takeAll;
      prevSize = size;
      size = reduceData.size;
      var groupedData = reduceData.grouped(10000).map((x) => JavaConversions.seqAsJavaList(x)).toList;
      println("Reducing " + groupedData.size + " data packages (" + size + " words total)");

      reduce(results, groupedData);
    }

    return results;
  }

  private def reduce(results: WordIndex, groupedData: List[java.util.List[WordCount]]) {
    val reduceTask = new ReduceTask[java.util.List[WordCount], WordCountList](read("wordcount-reducer.js"), groupedData, new TypeToken[WordCountList]() {}) {
      def handleAnswer(result: WordCountList) {
        for (index <- 0 to result.wordCounts.size - 1) {
          val count = result.wordCounts.get(index);
          results.insert(count);
        }
      }
    };

    tracker.submitTask(reduceTask);
    while (!reduceTask.completed) {
      Thread.sleep(100);
    }
  }

  private def read(fileName: String) = {
    readLines(fileName).foldLeft("")((x, y) => (x.+("\n" + y)));
  }

  private def readLines(fileName: String) = {
    scala.io.Source.fromInputStream(resourceStream(fileName)).getLines().toList.filter((s: String) => !s.isEmpty())
  }

  private def resourceStream(fileName: String) = {
    getClass.getResourceAsStream("/" + fileName)
  }

}
Back to Top