PageRenderTime 37ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/src/main/scala/updown/data/io/TweetFeatureReader.scala

https://bitbucket.org/speriosu/updown
Scala | 38 lines | 16 code | 8 blank | 14 comment | 1 complexity | 2fcd76a6d4326031bf02972e79ceea23 MD5 | raw file
  1. package updown.data.io
  2. import updown.data._
  3. object TweetFeatureReader {
  4. val featureRowRE = """^([^|]*)\|([^|]*)\|([^|]*)\|(.*)$""".r //python verbose regexes are so much nicer :/
  5. def apply(inputFile: String): List[GoldLabeledTweet] = {
  6. val lines = scala.io.Source.fromFile(inputFile, "utf-8").getLines.toList
  7. for (line <- lines) yield {
  8. parseLine(line)
  9. }
  10. }
  11. def parseLine(line: String): GoldLabeledTweet = {
  12. val featureRowRE(tweetid, userid, featureString, label) = line
  13. val features = featureString.split(",").toList.map(_.trim).filter(_.length > 0) // filter out features that are all whitespace or the empty string
  14. GoldLabeledTweet(tweetid, userid, features, SentimentLabel.figureItOut(label))
  15. }
  16. }
  17. /*object RawTweetFeatureReader {
  18. val featureRowRE = """^([^|]*)\|([^|]*)\|([^|]*)\|(.*)$""".r
  19. def apply(inputFile: String): List[GoldLabeledTweet] = {
  20. val lines = scala.io.Source.fromFile(inputFile, "utf-8").getLines.toList
  21. for (line <- lines) yield {
  22. parseLine(line: String): GoldLabeledTweet = {
  23. }
  24. }
  25. }
  26. */