PageRenderTime 50ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/tools/migration/0.4/settings04/src/main/scala/io/prediction/tools/migration/Settings04.scala

https://github.com/eddieliu/PredictionIO
Scala | 242 lines | 208 code | 26 blank | 8 comment | 18 complexity | 1a7e4db21a79c46349d57c0a798c72e0 MD5 | raw file
  1. package io.prediction.tools.migration
  2. //import com.mongodb.casbah.Imports._
  3. import com.typesafe.config._
  4. import com.mongodb.casbah.query.Imports._
  5. import com.mongodb.casbah.Imports.{MongoConnection}
  6. object Settings04 {
  7. val config = ConfigFactory.load()
  8. val dbHost: String = try { config.getString("db.host") } catch { case _: Throwable => "127.0.0.1" }
  9. val dbPort: Int = try { config.getInt("db.port") } catch { case _: Throwable => 27017 }
  10. val dbName: String = try { config.getString("db.name") } catch { case _: Throwable => "predictionio" }
  11. val db = MongoConnection(dbHost, dbPort)(dbName)
  12. val engineColl = db("engines")
  13. val offlineEvalColl = db("offlineEvals")
  14. val offlineEvalSplitterColl = db("offlineEvalSplitters")
  15. val offlineEvalResultsColl = db("offlineEvalResults")
  16. val algoColl = db("algos")
  17. val seqColl = db("seq")
  18. /** Get the next sequence number from the given sequence name. */
  19. def genNext(name: String): Int = {
  20. val qFind = MongoDBObject("_id" -> name)
  21. val qField = MongoDBObject("next" -> 1)
  22. val qSort = MongoDBObject()
  23. val qRemove = false
  24. val qModify = $inc("next" -> 1)
  25. val qReturnNew = true
  26. val qUpsert = true
  27. seqColl.findAndModify(qFind, qField, qSort, qRemove, qModify, qReturnNew, qUpsert).get.getAsOrElse[Number]("next", 0).intValue
  28. }
  29. def main(args: Array[String]) {
  30. println("PredictionIO 0.4 Migration")
  31. println("Convert Engine.enginetype to Engine.infoid in MongoDB")
  32. println()
  33. println(s"Database host: $dbHost")
  34. println(s"Database port: $dbPort")
  35. println(s"Database name: $dbName")
  36. println()
  37. println("Looking for Engines without infoid...")
  38. val engines = engineColl.find(MongoDBObject("infoid" -> MongoDBObject("$exists" -> false))).toList
  39. if (engines.length > 0) {
  40. println(s"Found ${engines.length} Engines without infoid. Proceed to convert enginetype to infoid?")
  41. val choice = readLine("Enter 'YES' to proceed: ")
  42. choice match {
  43. case "YES" => {
  44. engines map { engine =>
  45. engineColl.update(MongoDBObject("_id" -> engine.as[Int]("_id")), MongoDBObject("$set" -> MongoDBObject("infoid" -> engine.as[String]("enginetype")), "$unset" -> MongoDBObject("enginetype" -> 1)))
  46. }
  47. println("Done")
  48. }
  49. case _ => println("Aborted")
  50. }
  51. } else {
  52. println("None found")
  53. }
  54. //
  55. println()
  56. println("Looking for OfflineEvals without OfflineEvalSplitter...")
  57. // create OfflineEvalSplitter for existing offlineEvalRecords which don't have one
  58. val offlineEvals = offlineEvalColl.find()
  59. val offlineEvalWithoutSplitter = (offlineEvals filter { eval => (offlineEvalSplitterColl.find(MongoDBObject("evalid" -> eval.as[Int]("_id"))).count == 0) }).toStream
  60. val offlineEvalWithoutSplitterSize = offlineEvalWithoutSplitter.size
  61. if (offlineEvalWithoutSplitterSize > 0) {
  62. println(s"Found ${offlineEvalWithoutSplitterSize} OfflineEvals without OfflineEvalSplitter. Proceed to add OfflineEvalSplitter for these records?")
  63. val choice = readLine("Enter 'YES' to proceed: ")
  64. choice match {
  65. case "YES" => {
  66. offlineEvalWithoutSplitter foreach { eval =>
  67. val id = genNext("offlineEvalSplitterId")
  68. val evalid = eval.as[Int]("_id")
  69. println(s"Insert OfflineEvalSplitter for OfflineEval ID = $evalid")
  70. offlineEvalSplitterColl.insert(MongoDBObject(
  71. "_id" -> id,
  72. "evalid" -> evalid,
  73. "name" -> ("sim-eval-" + evalid + "-splitter"),
  74. "infoid" -> "trainingtestsplit",
  75. "settings" -> Map(
  76. "trainingPercent" -> 0.8,
  77. "validationPercent" -> 0.0,
  78. "testPercent" -> 0.2,
  79. "timeorder" -> false
  80. )
  81. ))
  82. }
  83. println("Done")
  84. }
  85. case _ => println("Aborted")
  86. }
  87. } else {
  88. println("None found")
  89. }
  90. //
  91. println()
  92. println("Looking for OfflineEvals with obsolete fields trainingsize, testsize, timeorder...")
  93. val oldFieldExists = $or(("trainingsize" -> MongoDBObject("$exists" -> true)),
  94. ("testsize" -> MongoDBObject("$exists" -> true)),
  95. ("timeorder" -> MongoDBObject("$exists" -> true)))
  96. val offlineEvalsWithOldFields = offlineEvalColl.find(oldFieldExists)
  97. if (offlineEvalsWithOldFields.length > 0) {
  98. println(s"Found ${offlineEvalsWithOldFields.length} OfflineEvals with obsolete fields. Proceed to remove these obsolete fields from the records?")
  99. val choice = readLine("Enter 'YES' to proceed: ")
  100. choice match {
  101. case "YES" => {
  102. offlineEvalsWithOldFields foreach { eval =>
  103. val evalid = eval.as[Int]("_id")
  104. println(s"Remove obsolete fields for OfflineEval ID = $evalid")
  105. offlineEvalColl.update(MongoDBObject("_id" -> evalid), MongoDBObject("$unset" -> MongoDBObject("trainingsize" -> 1, "testsize" -> 1, "timeorder" -> 1)) )
  106. }
  107. println("Done")
  108. }
  109. case _ => println("Aborted")
  110. }
  111. } else {
  112. println("None found")
  113. }
  114. //
  115. println()
  116. println("Looking for OfflineEvals without iterations...")
  117. val offlineEvalsWithoutIterations = offlineEvalColl.find(MongoDBObject("iterations" -> MongoDBObject("$exists" -> false)))
  118. if (offlineEvalsWithoutIterations.length > 0) {
  119. println(s"Found ${offlineEvalsWithoutIterations.length} OfflineEvals without iterations. Proceed to add the 'iterations' field to these records?")
  120. val choice = readLine("Enter 'YES' to proceed: ")
  121. choice match {
  122. case "YES" => {
  123. offlineEvalsWithoutIterations foreach { eval =>
  124. val evalid = eval.as[Int]("_id")
  125. println(s"Update OfflineEval ID = $evalid with 'iterations'")
  126. offlineEvalColl.update(MongoDBObject("_id" -> evalid), MongoDBObject("$set" -> MongoDBObject("iterations" -> 1)) )
  127. }
  128. println("Done")
  129. }
  130. case _ => println("Aborted")
  131. }
  132. } else {
  133. println("None Found")
  134. }
  135. //
  136. println()
  137. println("Looking for OfflineEvalResults without iteration or splitset...")
  138. val offlineEvalResultsAll = offlineEvalResultsColl.find()
  139. val offlineEvalResultsSelected = (offlineEvalResultsAll filter { result =>
  140. val iteration: Option[Int] = result.getAs[Int]("iteration")
  141. val splitset: Option[String] = result.getAs[String]("splitset")
  142. ((iteration == None) || ((splitset != Some("test")) && (splitset != Some("validation"))) )}).toStream
  143. val offlineEvalResultsSelectedSize = offlineEvalResultsSelected.size
  144. if (offlineEvalResultsSelectedSize > 0) {
  145. println(s"Found ${offlineEvalResultsSelectedSize} OfflineEvalResults without proper iteration or splitset field. Proceed to update?")
  146. val choice = readLine("Enter 'YES' to proceed: ")
  147. choice match {
  148. case "YES" => {
  149. offlineEvalResultsSelected foreach { result =>
  150. val resultid = result.as[String]("_id")
  151. val evalid = result.as[Int]("evalid")
  152. val metricid = result.as[Int]("metricid")
  153. val algoid = result.as[Int]("algoid")
  154. val score = result.as[Double]("score")
  155. val iteration: Option[Int] = result.getAs[Int]("iteration")
  156. val splitset: Option[String] = result.getAs[String]("splitset")
  157. val newIteration: Int = iteration match {
  158. case None => 1
  159. case Some(x) => x
  160. }
  161. val newSplitset: String = splitset match {
  162. case Some("test") => "test"
  163. case Some("validation") => "validation"
  164. case _ => "test"
  165. }
  166. val newResultid = (evalid + "_" + metricid + "_" + algoid + "_" + newIteration + "_" + newSplitset)
  167. println(s"Update OfflineEvalResult ID = $resultid. New ID = $newResultid")
  168. offlineEvalResultsColl.save(MongoDBObject(
  169. "_id" -> newResultid,
  170. "evalid" -> evalid,
  171. "metricid" -> metricid,
  172. "algoid" -> algoid,
  173. "score" -> score,
  174. "iteration" -> newIteration,
  175. "splitset" -> newSplitset
  176. ))
  177. offlineEvalResultsColl.remove(MongoDBObject("_id" -> resultid))
  178. }
  179. println("Done")
  180. }
  181. case _ => println("Aborted")
  182. }
  183. } else {
  184. println("None Found")
  185. }
  186. //
  187. println()
  188. println("Looking for Algo without status...")
  189. val algoWithoutStatus = algoColl.find(MongoDBObject("status" -> MongoDBObject("$exists" -> false)))
  190. if (algoWithoutStatus.length > 0) {
  191. println(s"Found ${algoWithoutStatus.length} Algos without status. Proceed to add the 'status' field to these records?")
  192. val choice = readLine("Enter 'YES' to proceed: ")
  193. choice match {
  194. case "YES" => {
  195. algoWithoutStatus foreach { algo =>
  196. val offlineevalid = algo.getAs[Int]("offlineevalid")
  197. val deployed = algo.as[Boolean]("deployed")
  198. val status: String = (offlineevalid, deployed) match {
  199. case (None, true) => "deployed"
  200. case (None, false) => "ready"
  201. case (Some(x), _) => "simeval"
  202. }
  203. val algoid = algo.as[Int]("_id")
  204. println(s"Update Algo ID = $algoid with 'status'")
  205. algoColl.update(MongoDBObject("_id" -> algoid),
  206. MongoDBObject("$set" -> MongoDBObject("status" -> status)))
  207. }
  208. println("Done")
  209. }
  210. case _ => println("Aborted")
  211. }
  212. } else {
  213. println("None Found")
  214. }
  215. }
  216. }