/magda-indexer/src/main/scala/au/csiro/data61/magda/indexer/IndexerApp.scala

https://github.com/magda-io/magda · Scala · 104 lines · 88 code · 14 blank · 2 comment · 4 complexity · 44e7c8512a456bc42bbf9864641fea4e MD5 · raw file

  1. package au.csiro.data61.magda.indexer
  2. import akka.actor.Actor
  3. import akka.actor.ActorLogging
  4. import akka.actor.ActorSystem
  5. import akka.actor.DeadLetter
  6. import akka.actor.Props
  7. import akka.event.Logging
  8. import akka.stream.ActorMaterializer
  9. import au.csiro.data61.magda.AppConfig
  10. import au.csiro.data61.magda.indexer.search.SearchIndexer
  11. import au.csiro.data61.magda.search.elasticsearch.DefaultClientProvider
  12. import au.csiro.data61.magda.search.elasticsearch.DefaultIndices
  13. import akka.http.scaladsl.Http
  14. import au.csiro.data61.magda.indexer.external.registry.RegisterWebhook.{
  15. initWebhook,
  16. ShouldCrawl,
  17. ShouldNotCrawl
  18. }
  19. import au.csiro.data61.magda.indexer.crawler.RegistryCrawler
  20. import au.csiro.data61.magda.client.RegistryExternalInterface
  21. import scala.concurrent.Future
  22. import au.csiro.data61.magda.search.elasticsearch.IndexDefinition
  23. import au.csiro.data61.magda.search.elasticsearch.Indices
  24. object IndexerApp extends App {
  25. implicit val config = AppConfig.conf()
  26. implicit val system = ActorSystem("indexer", config)
  27. implicit val executor = system.dispatcher
  28. implicit val materializer = ActorMaterializer()
  29. val logger = Logging(system, getClass)
  30. logger.info("Starting Indexer")
  31. logger.info("Log level is {}", config.getString("akka.loglevel"))
  32. val listener = system.actorOf(Props(classOf[Listener]))
  33. system.eventStream.subscribe(listener, classOf[DeadLetter])
  34. logger.debug("Starting Crawler")
  35. val registryInterface = new RegistryExternalInterface()
  36. val indexer = SearchIndexer(new DefaultClientProvider, DefaultIndices)
  37. val crawler = new RegistryCrawler(registryInterface, indexer)
  38. val api = new IndexerApi(crawler, indexer)
  39. logger.info(
  40. s"Listening on ${config.getString("http.interface")}:${config.getInt("http.port")}"
  41. )
  42. Http().bindAndHandle(
  43. api.routes,
  44. config.getString("http.interface"),
  45. config.getInt("http.port")
  46. )
  47. {
  48. if (config.getBoolean("registry.registerForWebhooks")) {
  49. initWebhook(registryInterface)
  50. } else {
  51. Future(ShouldCrawl)
  52. }
  53. } flatMap {
  54. case ShouldCrawl => Future(ShouldCrawl)
  55. case ShouldNotCrawl => {
  56. logger.info("Checking to see if datasets index is empty")
  57. indexer
  58. .isEmpty(Indices.DataSetsIndex)
  59. .map(
  60. isEmpty =>
  61. if (isEmpty) {
  62. logger.info("Datasets index is empty, recrawling")
  63. ShouldCrawl
  64. } else {
  65. logger.info("Datasets index is NOT empty. No need to recrawl.")
  66. ShouldNotCrawl
  67. }
  68. )
  69. }
  70. } map {
  71. case ShouldCrawl => {
  72. crawler.crawl()
  73. }
  74. case _ => // this means we were able to resume a webhook, so all good now :)
  75. } recover {
  76. case e: Throwable =>
  77. logger.error(e, "Error while initializing")
  78. // This is a super massive problem - might as well just crash to make it super-obvious and to
  79. // use K8S' restart logic
  80. logger.error(
  81. "Failure to register webhook or perform initial crawl is an unrecoverable and drastic error, crashing"
  82. )
  83. System.exit(1)
  84. }
  85. }
  86. class Listener extends Actor with ActorLogging {
  87. def receive = {
  88. case d: DeadLetter => log.debug(d.message.toString())
  89. }
  90. }