PageRenderTime 59ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/indexer/src/main/scala/output/RevGeoIndexer.scala

https://gitlab.com/18runt88/twofishes
Scala | 83 lines | 71 code | 10 blank | 2 comment | 9 complexity | 6882988712932302d80fca54fc85fc92 MD5 | raw file
  1. package com.foursquare.twofishes.output
  2. import com.foursquare.twofishes.{CellGeometries, CellGeometry, Indexes, YahooWoeType}
  3. import com.foursquare.twofishes.mongo.{MongoGeocodeDAO, RevGeoIndexDAO}
  4. import com.foursquare.twofishes.util.RevGeoConstants
  5. import com.mongodb.Bytes
  6. import com.mongodb.casbah.Imports._
  7. import com.novus.salat._
  8. import com.novus.salat.annotations._
  9. import com.novus.salat.dao._
  10. import com.novus.salat.global._
  11. import java.io._
  12. import java.nio.ByteBuffer
  13. import org.apache.hadoop.hbase.util.Bytes._
  14. import scala.collection.mutable.ListBuffer
  15. import scalaj.collection.Implicits._
  16. class RevGeoIndexer(
  17. override val basepath: String,
  18. override val fidMap: FidMap,
  19. polygonMap: Map[ObjectId, List[(Long, YahooWoeType)]]
  20. ) extends Indexer with RevGeoConstants{
  21. val index = Indexes.S2Index
  22. override val outputs = Seq(index)
  23. lazy val writer = buildMapFileWriter(
  24. index,
  25. Map(
  26. "minS2Level" -> minS2LevelForRevGeo.toString,
  27. "maxS2Level" -> maxS2LevelForRevGeo.toString,
  28. "levelMod" -> defaultLevelModForRevGeo.toString
  29. )
  30. )
  31. def writeRevGeoIndex(
  32. restrict: MongoDBObject
  33. ) = {
  34. val total = RevGeoIndexDAO.count(restrict)
  35. val revGeoCursor = RevGeoIndexDAO.find(restrict)
  36. .sort(orderBy = MongoDBObject("cellid" -> 1))
  37. revGeoCursor.option = Bytes.QUERYOPTION_NOTIMEOUT
  38. var currentKey = 0L
  39. var currentCells = new ListBuffer[CellGeometry]
  40. for {
  41. (revgeoIndexRecord, index) <- revGeoCursor.zipWithIndex
  42. (geoid, woeType) <- polygonMap.getOrElse(revgeoIndexRecord.polyId, Nil)
  43. } {
  44. if (index % 10000 == 0) {
  45. logger.info("processed %d of %d revgeo entries for %s".format(index, total, restrict))
  46. }
  47. if (currentKey != revgeoIndexRecord.cellid) {
  48. if (currentKey != 0L) {
  49. writer.append(currentKey, CellGeometries(currentCells))
  50. }
  51. currentKey = revgeoIndexRecord.cellid
  52. currentCells.clear
  53. }
  54. val builder = CellGeometry.newBuilder
  55. .woeType(woeType)
  56. .longId(geoid)
  57. if (revgeoIndexRecord.full) {
  58. builder.full(true)
  59. } else {
  60. builder.wkbGeometry(revgeoIndexRecord.geom.map(ByteBuffer.wrap))
  61. }
  62. currentCells.append(builder.result)
  63. }
  64. writer.append(currentKey, CellGeometries(currentCells))
  65. }
  66. def writeIndexImpl() {
  67. // in byte order, positives come before negative
  68. writeRevGeoIndex(MongoDBObject("cellid" -> MongoDBObject("$gte" -> 0)))
  69. writeRevGeoIndex(MongoDBObject("cellid" -> MongoDBObject("$lt" -> 0)))
  70. //
  71. writer.close()
  72. }
  73. }