PageRenderTime 45ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/modules/dataset/app/controllers/organization/DataSetImport.scala

https://github.com/delving/culture-hub
Scala | 156 lines | 122 code | 23 blank | 11 comment | 5 complexity | 9aa0a4362033d8bfc8436976af8f59a6 MD5 | raw file
  1. package controllers.organization
  2. import play.api.mvc._
  3. import controllers.{ SipCreatorEndPoint, OrganizationController }
  4. import java.util.zip.ZipFile
  5. import scala.collection.JavaConverters._
  6. import scala.io.Source
  7. import models._
  8. import core.CultureHubPlugin
  9. import com.mongodb.BasicDBObject
  10. import models.FormatAccessControl
  11. import core.messages.CollectionCreated
  12. import models.Details
  13. import com.mongodb.casbah.commons.MongoDBObject
  14. import play.api.libs.ws.WS
  15. import scala.concurrent.{ ExecutionContext, Await, Future }
  16. import play.api.libs.Files.TemporaryFile
  17. import org.apache.commons.io.FileUtils
  18. import java.io.File
  19. import ExecutionContext.Implicits.global
  20. import play.api.libs.MimeTypes
  21. import com.escalatesoft.subcut.inject.BindingModule
  22. /**
  23. * Importer, useful for development and debugging, use e.g. like this::
  24. *
  25. * curl -i -F name=RAEAD_delving.zip -F filedata=@RAEAD_delving.zip http://delving.localhost:9000/admin/dataset/import\?userName\=bob
  26. *
  27. * @author Manuel Bernhardt <bernhardt.manuel@gmail.com>
  28. */
  29. class DataSetImport(implicit val bindingModule: BindingModule) extends OrganizationController {
  30. // [dir/]HASH__type[_prefix].extension
  31. val FileName = """([^/]*)/([^_]*)__([^._]*)_?([^.]*).(.*)""".r
  32. def importSIP(userName: Option[String]) = Root {
  33. MultitenantAction(parse.temporaryFile) {
  34. implicit request =>
  35. userName map { user =>
  36. val zipFile = new ZipFile(request.body.file)
  37. val allEntries = zipFile.entries.asScala
  38. .filterNot(_.isDirectory)
  39. .filterNot(_.getName.split("/").length > 2)
  40. .map { entry =>
  41. val name = entry.getName
  42. val is = zipFile.getInputStream(entry)
  43. (name, (is, entry.getTime))
  44. }.toMap
  45. val entries = allEntries.groupBy { e =>
  46. if (FileName.findAllMatchIn(e._1).isEmpty) {
  47. e._1
  48. } else {
  49. val FileName(dir, hash, kind, prefix, extension) = e._1
  50. kind
  51. }
  52. }.map { grouped =>
  53. val mostRecent = grouped._2.toSeq.sortBy(_._2._2).reverse.head
  54. (mostRecent._1, mostRecent._2._1)
  55. }.toMap
  56. log.info("Importer: found entries\n\n" + entries.map(_._1).mkString("\n"))
  57. entries.find(e => e._1.contains("dataset_facts.txt")).map { facts =>
  58. val s = Source.fromInputStream(facts._2, "UTF-8")
  59. val factsMap = s.getLines().map { line =>
  60. val Array(key, value) = line.split("=")
  61. (key, value)
  62. }.toMap
  63. // I can haz set?
  64. val spec = factsMap("spec")
  65. val set = DataSet.dao.findOne(MongoDBObject("spec" -> spec)) getOrElse {
  66. val formats = factsMap("schemaVersions").split(",").map { v =>
  67. val Array(prefix, version) = v.split("_")
  68. prefix -> version
  69. }.toMap
  70. DataSet.dao.insert(
  71. DataSet(
  72. spec = spec,
  73. orgId = configuration.orgId,
  74. userName = user,
  75. description = None,
  76. state = DataSetState.INCOMPLETE,
  77. details = Details(
  78. name = factsMap("name"),
  79. facts = new BasicDBObject(factsMap.asJava)
  80. ),
  81. invalidRecords = formats.map(f => (f._1, List.empty)),
  82. mappings = formats.map(f => (f._1, Mapping(schemaPrefix = f._1, schemaVersion = f._2))),
  83. formatAccessControl = formats.map(f => f._1 -> FormatAccessControl(accessType = "public"))
  84. )
  85. )
  86. log.info("Created set for import " + spec)
  87. DataSetEvent ! DataSetEvent.Created(configuration.orgId, spec, connectedUser)
  88. CultureHubPlugin.broadcastMessage(CollectionCreated(spec, configuration))
  89. }
  90. // and now upload the stuff, hacky way
  91. val commands: Map[String, Future[String]] = entries
  92. .filterNot(f => FileName.findAllMatchIn(f._1).isEmpty)
  93. .filterNot(_._1.contains("_imported"))
  94. .map { file =>
  95. val cleanName = file._1.substring(file._1.indexOf("/") + 1)
  96. // why are there no stream utils around here to directly post from a stream?
  97. val t = new File(System.getProperty("java.io.tmpdir"), cleanName)
  98. try {
  99. t.createNewFile()
  100. } catch {
  101. case e: Throwable =>
  102. log.warn("Couldn't create temporary location for " + t.getAbsolutePath)
  103. }
  104. val temp = TemporaryFile(t) // auto-cleanup on GC
  105. FileUtils.copyInputStreamToFile(file._2, t)
  106. val mimeType = {
  107. val m = MimeTypes.forFileName(cleanName).getOrElse("unknown/unknown")
  108. if (m == "application/x-compressed") "application/x-gzip" else m
  109. }
  110. log.info(s"Temporary extracted file at ${t.getAbsolutePath} with mimeType $mimeType")
  111. (
  112. file._1,
  113. WS.url(s"http://delving.localhost:9000/api/sip-creator/submit/${configuration.orgId}/$spec/$cleanName")
  114. .withQueryString("userName" -> user)
  115. .withHeaders(
  116. "Content-Type" -> mimeType
  117. )
  118. .post(t).map(r => r.body))
  119. }.toMap
  120. import scala.concurrent.duration._
  121. val responses: Map[String, String] = commands.map(r => (r._1, Await.result(r._2, 10 seconds)))
  122. Ok(
  123. responses.map(r => s"${r._1} => ${r._2}").mkString("\n")
  124. )
  125. } getOrElse {
  126. BadRequest("No dataset_facts.txt found in this SIP")
  127. }
  128. } getOrElse {
  129. BadRequest("Nope.")
  130. }
  131. }
  132. }
  133. }