PageRenderTime 27ms CodeModel.GetById 0ms RepoModel.GetById 1ms app.codeStats 0ms

/indexer/src/main/scala/output/IdIndexer.scala

https://gitlab.com/18runt88/twofishes
Scala | 51 lines | 44 code | 7 blank | 0 comment | 1 complexity | 862bda8f81a6ab6677592699a9573654 MD5 | raw file
  1. package com.foursquare.twofishes.output
  2. import com.foursquare.twofishes.{Indexes, SlugEntryMap}
  3. import com.foursquare.twofishes.util.StoredFeatureId
  4. import com.foursquare.twofishes.Identity._
  5. import com.mongodb.Bytes
  6. import com.mongodb.casbah.Imports._
  7. import com.novus.salat._
  8. import com.novus.salat.annotations._
  9. import com.novus.salat.dao._
  10. import com.novus.salat.global._
  11. import java.io._
  12. import org.apache.hadoop.hbase.util.Bytes._
  13. import scalaj.collection.Implicits._
  14. import com.foursquare.twofishes.mongo.MongoGeocodeDAO
  15. class IdIndexer(
  16. override val basepath: String,
  17. override val fidMap: FidMap,
  18. slugEntryMap: SlugEntryMap.SlugEntryMap
  19. ) extends Indexer {
  20. val index = Indexes.IdMappingIndex
  21. override val outputs = Seq(index)
  22. def writeIndexImpl() {
  23. val slugEntries: List[(String, StoredFeatureId)] = for {
  24. (slug, entry) <- slugEntryMap.toList
  25. fid <- StoredFeatureId.fromHumanReadableString(entry.id)
  26. canonicalFid <- fidMap.get(fid)
  27. } yield {
  28. slug -> canonicalFid
  29. }
  30. val featureCursor = MongoGeocodeDAO.find(MongoDBObject())
  31. featureCursor.option = Bytes.QUERYOPTION_NOTIMEOUT
  32. val extraIds: List[(String, StoredFeatureId)] = featureCursor.flatMap(f => {
  33. (for {
  34. id <- f.ids.filterNot(_ =? f._id)
  35. extraId <- StoredFeatureId.fromLong(id)
  36. } yield { List((id.toString -> f.featureId), (extraId.humanReadableString -> f.featureId)) }).flatten
  37. }).toList
  38. val writer = buildMapFileWriter(index)
  39. val sortedEntries = (slugEntries ++ extraIds).distinct.sortWith((a, b) => lexicalSort(a._1, b._1)).foreach({case (k, v) => {
  40. writer.append(k, v)
  41. }})
  42. writer.close()
  43. }
  44. }