PageRenderTime 48ms CodeModel.GetById 0ms RepoModel.GetById 0ms app.codeStats 0ms

/indexer/src/main/scala/output/FeatureIndexer.scala

https://gitlab.com/18runt88/twofishes
Scala | 111 lines | 93 code | 15 blank | 3 comment | 6 complexity | 7f826ee22f5fc0c858d575572f4943aa MD5 | raw file
  1. package com.foursquare.twofishes.output
  2. import com.foursquare.twofishes.{GeocodeRecord, GeocodeServingFeature, Indexes, YahooWoeType}
  3. import com.foursquare.twofishes.mongo.{PolygonIndexDAO, PolygonIndex, MongoGeocodeDAO, RevGeoIndexDAO}
  4. import com.foursquare.twofishes.util.{GeoTools, GeometryUtils, StoredFeatureId}
  5. import com.foursquare.twofishes.Identity._
  6. import com.mongodb.Bytes
  7. import com.mongodb.casbah.Imports._
  8. import com.novus.salat._
  9. import com.novus.salat.annotations._
  10. import com.novus.salat.dao._
  11. import com.novus.salat.global._
  12. import com.vividsolutions.jts.io.WKBReader
  13. import java.io._
  14. import org.apache.hadoop.hbase.util.Bytes._
  15. import scalaj.collection.Implicits._
  16. class FeatureIndexer(
  17. override val basepath: String,
  18. override val fidMap: FidMap,
  19. polygonMap: Map[ObjectId, List[(Long, YahooWoeType)]]
  20. ) extends Indexer {
  21. def canonicalizeParentId(fid: StoredFeatureId) = fidMap.get(fid)
  22. val index = Indexes.FeatureIndex
  23. override val outputs = Seq(index)
  24. def makeGeocodeRecordWithoutGeometry(g: GeocodeRecord, poly: Option[PolygonIndex]): GeocodeServingFeature = {
  25. val fullFeature = poly.map(p =>
  26. g.copy(
  27. polygon = Some(p.polygon),
  28. polygonSource = Some(p.source))
  29. ).getOrElse(g).toGeocodeServingFeature()
  30. val partialFeature = fullFeature.copy(
  31. feature = fullFeature.feature.copy(
  32. geometry = fullFeature.feature.geometry.copy(wkbGeometry = null)
  33. )
  34. )
  35. makeGeocodeServingFeature(partialFeature)
  36. }
  37. def makeGeocodeRecord(g: GeocodeRecord) = {
  38. makeGeocodeServingFeature(g.toGeocodeServingFeature())
  39. }
  40. val wkbReader = new WKBReader()
  41. def makeGeocodeServingFeature(f: GeocodeServingFeature) = {
  42. var parents = (for {
  43. parentLongId <- f.scoringFeatures.parentIds
  44. parentFid <- StoredFeatureId.fromLong(parentLongId)
  45. parentId <- canonicalizeParentId(parentFid)
  46. } yield {
  47. parentFid
  48. }).map(_.longId)
  49. if (f.scoringFeatures.parentIds.isEmpty &&
  50. f.feature.woeType !=? YahooWoeType.COUNTRY) {
  51. // take the center and reverse geocode it against the revgeo index!
  52. val geom = GeoTools.pointToGeometry(f.feature.geometryOrNull.center)
  53. val cells: Seq[Long] = GeometryUtils.s2PolygonCovering(geom).map(_.id)
  54. // now for each cell, find the matches in our index
  55. val candidates = RevGeoIndexDAO.find(MongoDBObject("cellid" -> MongoDBObject("$in" -> cells)))
  56. // for each candidate, check if it's full or we're in it
  57. val matches = (for {
  58. revGeoCell <- candidates
  59. fidLong <- polygonMap.getOrElse(revGeoCell.polyId, Nil)
  60. if (revGeoCell.full || revGeoCell.geom.exists(geomBytes =>
  61. wkbReader.read(geomBytes).contains(geom)))
  62. } yield { fidLong }).toList
  63. parents = matches.map(_._1)
  64. }
  65. f.copy(
  66. scoringFeatures = f.scoringFeatures.copy(parentIds = parents)
  67. )
  68. }
  69. def writeIndexImpl() {
  70. val writer = buildMapFileWriter(index, indexInterval = Some(2))
  71. var fidCount = 0
  72. val fidSize = MongoGeocodeDAO.collection.count()
  73. val fidCursor = MongoGeocodeDAO.find(MongoDBObject())
  74. .sort(orderBy = MongoDBObject("_id" -> 1)) // sort by _id asc
  75. fidCursor.option = Bytes.QUERYOPTION_NOTIMEOUT
  76. for {
  77. gCursor <- fidCursor.grouped(1000)
  78. group = gCursor.toList
  79. toFindPolys: Map[Long, ObjectId] = group.filter(f => f.hasPoly).map(r => (r._id, r.polyId)).toMap
  80. polyMap: Map[ObjectId, PolygonIndex] =
  81. PolygonIndexDAO.find(MongoDBObject("_id" -> MongoDBObject("$in" -> toFindPolys.values.toList)))
  82. .toList
  83. .groupBy(_._id).map({case (k, v) => (k, v(0))})
  84. f <- group
  85. } {
  86. val polyOpt = polyMap.get(f.polyId)
  87. writer.append(
  88. f.featureId, makeGeocodeRecordWithoutGeometry(f, polyOpt))
  89. fidCount += 1
  90. if (fidCount % 100000 == 0) {
  91. logger.info("processed %d of %d features".format(fidCount, fidSize))
  92. }
  93. }
  94. writer.close()
  95. }
  96. }