PageRenderTime 79ms CodeModel.GetById 0ms RepoModel.GetById 0ms app.codeStats 0ms

/indexer/src/main/scala/output/PolygonIndexer.scala

https://gitlab.com/18runt88/twofishes
Scala | 59 lines | 49 code | 9 blank | 1 comment | 3 complexity | 9c36905f0309e6fe0532d50c33b774a0 MD5 | raw file
  1. package com.foursquare.twofishes.output
  2. import com.foursquare.twofishes.Indexes
  3. import com.foursquare.twofishes.mongo.{MongoGeocodeDAO, PolygonIndex, PolygonIndexDAO}
  4. import com.foursquare.twofishes.util.StoredFeatureId
  5. import com.mongodb.Bytes
  6. import com.mongodb.casbah.Imports._
  7. import com.novus.salat._
  8. import com.novus.salat.annotations._
  9. import com.novus.salat.dao._
  10. import com.novus.salat.global._
  11. import com.vividsolutions.jts.io.WKBReader
  12. import java.io._
  13. import org.apache.hadoop.hbase.util.Bytes._
  14. import scalaj.collection.Implicits._
  15. class PolygonIndexer(override val basepath: String, override val fidMap: FidMap) extends Indexer {
  16. val index = Indexes.GeometryIndex
  17. override val outputs = Seq(index)
  18. def writeIndexImpl() {
  19. val polygonSize = PolygonIndexDAO.collection.count()
  20. val usedPolygonSize = MongoGeocodeDAO.count(MongoDBObject("hasPoly" -> true))
  21. val hasPolyCursor =
  22. MongoGeocodeDAO.find(MongoDBObject("hasPoly" -> true))
  23. .sort(orderBy = MongoDBObject("_id" -> 1)) // sort by _id asc
  24. hasPolyCursor.option = Bytes.QUERYOPTION_NOTIMEOUT
  25. val writer = buildMapFileWriter(index)
  26. val wkbReader = new WKBReader()
  27. var numUsedPolygon = 0
  28. val groupSize = 1000
  29. // would be great to unify this with featuresIndex
  30. for {
  31. (g, groupIndex) <- hasPolyCursor.grouped(groupSize).zipWithIndex
  32. group = g.toList
  33. toFindPolys: Map[Long, ObjectId] = group.filter(f => f.hasPoly).map(r => (r._id, r.polyId)).toMap
  34. polyMap: Map[ObjectId, PolygonIndex] = PolygonIndexDAO.find(MongoDBObject("_id" -> MongoDBObject("$in" -> toFindPolys.values.toList)))
  35. .toList
  36. .groupBy(_._id).map({case (k, v) => (k, v(0))})
  37. (f, polygonIndex) <- group.zipWithIndex
  38. poly <- polyMap.get(f.polyId)
  39. } {
  40. if (polygonIndex == 0) {
  41. logger.info("PolygonIndexer: outputted %d of %d used polys, %d of %d total polys seen".format(
  42. numUsedPolygon, usedPolygonSize, polygonSize, groupIndex*groupSize))
  43. }
  44. numUsedPolygon += 1
  45. writer.append(f.featureId, wkbReader.read(poly.polygon))
  46. }
  47. writer.close()
  48. logger.info("done")
  49. }
  50. }