PageRenderTime 25ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala

https://github.com/shivaram/spark
Scala | 189 lines | 126 code | 22 blank | 41 comment | 12 complexity | 770e27792f7e3d0d157fe01341ed8c57 MD5 | raw file
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package spark.deploy.worker
  18. import java.io._
  19. import java.lang.System.getenv
  20. import akka.actor.ActorRef
  21. import spark.{Utils, Logging}
  22. import spark.deploy.{ExecutorState, ApplicationDescription}
  23. import spark.deploy.DeployMessages.ExecutorStateChanged
  24. /**
  25. * Manages the execution of one executor process.
  26. */
  27. private[spark] class ExecutorRunner(
  28. val appId: String,
  29. val execId: Int,
  30. val appDesc: ApplicationDescription,
  31. val cores: Int,
  32. val memory: Int,
  33. val worker: ActorRef,
  34. val workerId: String,
  35. val hostPort: String,
  36. val sparkHome: File,
  37. val workDir: File)
  38. extends Logging {
  39. Utils.checkHostPort(hostPort, "Expected hostport")
  40. val fullId = appId + "/" + execId
  41. var workerThread: Thread = null
  42. var process: Process = null
  43. var shutdownHook: Thread = null
  44. private def getAppEnv(key: String): Option[String] =
  45. appDesc.command.environment.get(key).orElse(Option(getenv(key)))
  46. def start() {
  47. workerThread = new Thread("ExecutorRunner for " + fullId) {
  48. override def run() { fetchAndRunExecutor() }
  49. }
  50. workerThread.start()
  51. // Shutdown hook that kills actors on shutdown.
  52. shutdownHook = new Thread() {
  53. override def run() {
  54. if (process != null) {
  55. logInfo("Shutdown hook killing child process.")
  56. process.destroy()
  57. process.waitFor()
  58. }
  59. }
  60. }
  61. Runtime.getRuntime.addShutdownHook(shutdownHook)
  62. }
  63. /** Stop this executor runner, including killing the process it launched */
  64. def kill() {
  65. if (workerThread != null) {
  66. workerThread.interrupt()
  67. workerThread = null
  68. if (process != null) {
  69. logInfo("Killing process!")
  70. process.destroy()
  71. process.waitFor()
  72. }
  73. worker ! ExecutorStateChanged(appId, execId, ExecutorState.KILLED, None, None)
  74. Runtime.getRuntime.removeShutdownHook(shutdownHook)
  75. }
  76. }
  77. /** Replace variables such as {{EXECUTOR_ID}} and {{CORES}} in a command argument passed to us */
  78. def substituteVariables(argument: String): String = argument match {
  79. case "{{EXECUTOR_ID}}" => execId.toString
  80. case "{{HOSTNAME}}" => Utils.parseHostPort(hostPort)._1
  81. case "{{CORES}}" => cores.toString
  82. case other => other
  83. }
  84. def buildCommandSeq(): Seq[String] = {
  85. val command = appDesc.command
  86. val runner = getAppEnv("JAVA_HOME").map(_ + "/bin/java").getOrElse("java")
  87. // SPARK-698: do not call the run.cmd script, as process.destroy()
  88. // fails to kill a process tree on Windows
  89. Seq(runner) ++ buildJavaOpts() ++ Seq(command.mainClass) ++
  90. command.arguments.map(substituteVariables)
  91. }
  92. /**
  93. * Attention: this must always be aligned with the environment variables in the run scripts and
  94. * the way the JAVA_OPTS are assembled there.
  95. */
  96. def buildJavaOpts(): Seq[String] = {
  97. val libraryOpts = getAppEnv("SPARK_LIBRARY_PATH")
  98. .map(p => List("-Djava.library.path=" + p))
  99. .getOrElse(Nil)
  100. val userOpts = getAppEnv("SPARK_JAVA_OPTS").map(Utils.splitCommandString).getOrElse(Nil)
  101. val memoryOpts = Seq("-Xms" + memory + "M", "-Xmx" + memory + "M")
  102. // Figure out our classpath with the external compute-classpath script
  103. val ext = if (System.getProperty("os.name").startsWith("Windows")) ".cmd" else ".sh"
  104. val classPath = Utils.executeAndGetOutput(
  105. Seq(sparkHome + "/bin/compute-classpath" + ext),
  106. extraEnvironment=appDesc.command.environment)
  107. Seq("-cp", classPath) ++ libraryOpts ++ userOpts ++ memoryOpts
  108. }
  109. /** Spawn a thread that will redirect a given stream to a file */
  110. def redirectStream(in: InputStream, file: File) {
  111. val out = new FileOutputStream(file)
  112. new Thread("redirect output to " + file) {
  113. override def run() {
  114. try {
  115. Utils.copyStream(in, out, true)
  116. } catch {
  117. case e: IOException =>
  118. logInfo("Redirection to " + file + " closed: " + e.getMessage)
  119. }
  120. }
  121. }.start()
  122. }
  123. /**
  124. * Download and run the executor described in our ApplicationDescription
  125. */
  126. def fetchAndRunExecutor() {
  127. try {
  128. // Create the executor's working directory
  129. val executorDir = new File(workDir, appId + "/" + execId)
  130. if (!executorDir.mkdirs()) {
  131. throw new IOException("Failed to create directory " + executorDir)
  132. }
  133. // Launch the process
  134. val command = buildCommandSeq()
  135. val builder = new ProcessBuilder(command: _*).directory(executorDir)
  136. val env = builder.environment()
  137. for ((key, value) <- appDesc.command.environment) {
  138. env.put(key, value)
  139. }
  140. // In case we are running this from within the Spark Shell, avoid creating a "scala"
  141. // parent process for the executor command
  142. env.put("SPARK_LAUNCH_WITH_SCALA", "0")
  143. process = builder.start()
  144. // Redirect its stdout and stderr to files
  145. redirectStream(process.getInputStream, new File(executorDir, "stdout"))
  146. redirectStream(process.getErrorStream, new File(executorDir, "stderr"))
  147. // Wait for it to exit; this is actually a bad thing if it happens, because we expect to run
  148. // long-lived processes only. However, in the future, we might restart the executor a few
  149. // times on the same machine.
  150. val exitCode = process.waitFor()
  151. val message = "Command exited with code " + exitCode
  152. worker ! ExecutorStateChanged(appId, execId, ExecutorState.FAILED, Some(message),
  153. Some(exitCode))
  154. } catch {
  155. case interrupted: InterruptedException =>
  156. logInfo("Runner thread for executor " + fullId + " interrupted")
  157. case e: Exception => {
  158. logError("Error running executor", e)
  159. if (process != null) {
  160. process.destroy()
  161. }
  162. val message = e.getClass + ": " + e.getMessage
  163. worker ! ExecutorStateChanged(appId, execId, ExecutorState.FAILED, Some(message), None)
  164. }
  165. }
  166. }
  167. }