PageRenderTime 44ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/bin/start-impala-cluster.py

https://gitlab.com/s9perween/Impala
Python | 286 lines | 258 code | 4 blank | 24 comment | 12 complexity | 97f0e5449c733c02342dc6fa73a62108 MD5 | raw file
  1. #!/usr/bin/env python
  2. # Copyright 2012 Cloudera Inc.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # Starts up an Impala cluster (ImpalaD + State Store) with the specified number of
  16. # ImpalaD instances. Each ImpalaD runs on a different port allowing this to be run
  17. # on a single machine.
  18. import os
  19. import sys
  20. from time import sleep, time
  21. from optparse import OptionParser
  22. # Options
  23. parser = OptionParser()
  24. parser.add_option("-s", "--cluster_size", type="int", dest="cluster_size", default=3,
  25. help="Size of the cluster (number of impalad instances to start).")
  26. parser.add_option("--build_type", dest="build_type", default= 'debug',
  27. help="Build type to use - debug / release")
  28. parser.add_option("--impalad_args", dest="impalad_args", default="",
  29. help="Additional arguments to pass to each Impalad during startup")
  30. parser.add_option("--state_store_args", dest="state_store_args", default="",
  31. help="Additional arguments to pass to State Store during startup")
  32. parser.add_option("--catalogd_args", dest="catalogd_args", default="",
  33. help="Additional arguments to pass to the Catalog Service at startup")
  34. parser.add_option("--kill", "--kill_only", dest="kill_only", action="store_true",
  35. default=False, help="Instead of starting the cluster, just kill all"\
  36. " the running impalads and the statestored.")
  37. parser.add_option("--force_kill", dest="force_kill", action="store_true", default=False,
  38. help="Force kill impalad and statestore processes.")
  39. parser.add_option("-r", "--restart_impalad_only", dest="restart_impalad_only",
  40. action="store_true", default=False,
  41. help="Restarts only the impalad processes")
  42. parser.add_option("--in-process", dest="inprocess", action="store_true", default=False,
  43. help="Start all Impala backends and state store in a single process.")
  44. parser.add_option("--log_dir", dest="log_dir", default="/tmp",
  45. help="Directory to store output logs to.")
  46. parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False,
  47. help="Prints all output to stderr/stdout.")
  48. parser.add_option("--wait_for_cluster", dest="wait_for_cluster", action="store_true",
  49. default=False, help="Wait until the cluster is ready to accept "\
  50. "queries before returning.")
  51. parser.add_option("--log_level", type="int", dest="log_level", default=1,
  52. help="Set the impalad backend logging level")
  53. parser.add_option("--jvm_args", dest="jvm_args", default="",
  54. help="Additional arguments to pass to the JVM(s) during startup.")
  55. options, args = parser.parse_args()
  56. IMPALA_HOME = os.environ['IMPALA_HOME']
  57. KNOWN_BUILD_TYPES = ['debug', 'release']
  58. IMPALAD_PATH = os.path.join(IMPALA_HOME,
  59. 'bin/start-impalad.sh -build_type=%s' % options.build_type)
  60. STATE_STORE_PATH = os.path.join(IMPALA_HOME, 'be/build',
  61. options.build_type, 'statestore/statestored')
  62. CATALOGD_PATH = os.path.join(IMPALA_HOME,
  63. 'bin/start-catalogd.sh -build_type=%s' % options.build_type)
  64. MINI_IMPALA_CLUSTER_PATH = IMPALAD_PATH + " -in-process"
  65. IMPALA_SHELL = os.path.join(IMPALA_HOME, 'bin/impala-shell.sh')
  66. IMPALAD_PORTS = ("-beeswax_port=%d -hs2_port=%d -be_port=%d "
  67. "-state_store_subscriber_port=%d -webserver_port=%d "
  68. "-llama_callback_port=%d")
  69. JVM_ARGS = "-jvm_debug_port=%s -jvm_args=%s"
  70. BE_LOGGING_ARGS = "-log_filename=%s -log_dir=%s -v=%s -logbufsecs=5"
  71. CLUSTER_WAIT_TIMEOUT_IN_SECONDS = 240
  72. def exec_impala_process(cmd, args, stderr_log_file_path):
  73. redirect_output = str()
  74. if options.verbose:
  75. args += ' -logtostderr=1'
  76. else:
  77. redirect_output = "1>%s" % stderr_log_file_path
  78. cmd = '%s %s %s 2>&1 &' % (cmd, args, redirect_output)
  79. os.system(cmd)
  80. def kill_cluster_processes(force=False):
  81. kill_matching_processes('catalogd')
  82. kill_matching_processes('impalad')
  83. kill_matching_processes('statestored')
  84. kill_matching_processes('mini-impala-cluster')
  85. def kill_matching_processes(binary_name, force=False):
  86. """Kills all processes with the given binary name"""
  87. # -w = Wait for processes to die.
  88. kill_cmd = "killall -w"
  89. if force: kill_cmd += " -9"
  90. os.system("%s %s" % (kill_cmd, binary_name))
  91. def start_statestore():
  92. print "Starting State Store logging to %s/statestored.INFO" % options.log_dir
  93. stderr_log_file_path = os.path.join(options.log_dir, "statestore-error.log")
  94. args = "%s %s" % (build_impalad_logging_args(0, "statestored"),
  95. options.state_store_args)
  96. exec_impala_process(STATE_STORE_PATH, args, stderr_log_file_path)
  97. def start_catalogd():
  98. print "Starting Catalog Service logging to %s/catalogd.INFO" % options.log_dir
  99. stderr_log_file_path = os.path.join(options.log_dir, "catalogd-error.log")
  100. args = "%s %s %s" % (build_impalad_logging_args(0, "catalogd"),
  101. options.catalogd_args, build_jvm_args(options.cluster_size))
  102. exec_impala_process(CATALOGD_PATH, args, stderr_log_file_path)
  103. def start_mini_impala_cluster(cluster_size):
  104. print ("Starting in-process Impala Cluster logging "
  105. "to %s/mini-impala-cluster.INFO" % options.log_dir)
  106. args = "-num_backends=%s %s" %\
  107. (cluster_size, build_impalad_logging_args(0, 'mini-impala-cluster'))
  108. stderr_log_file_path = os.path.join(options.log_dir, 'mini-impala-cluster-error.log')
  109. exec_impala_process(MINI_IMPALA_CLUSTER_PATH, args, stderr_log_file_path)
  110. def build_impalad_port_args(instance_num):
  111. BASE_BEESWAX_PORT = 21000
  112. BASE_HS2_PORT = 21050
  113. BASE_BE_PORT = 22000
  114. BASE_STATE_STORE_SUBSCRIBER_PORT = 23000
  115. BASE_WEBSERVER_PORT = 25000
  116. BASE_LLAMA_CALLBACK_PORT = 28000
  117. return IMPALAD_PORTS % (BASE_BEESWAX_PORT + instance_num, BASE_HS2_PORT + instance_num,
  118. BASE_BE_PORT + instance_num,
  119. BASE_STATE_STORE_SUBSCRIBER_PORT + instance_num,
  120. BASE_WEBSERVER_PORT + instance_num,
  121. BASE_LLAMA_CALLBACK_PORT + instance_num)
  122. def build_impalad_logging_args(instance_num, service_name):
  123. log_file_path = os.path.join(options.log_dir, "%s.INFO" % service_name)
  124. return BE_LOGGING_ARGS % (service_name, options.log_dir, options.log_level)
  125. def build_jvm_args(instance_num):
  126. BASE_JVM_DEBUG_PORT = 30000
  127. return JVM_ARGS % (BASE_JVM_DEBUG_PORT + instance_num, options.jvm_args)
  128. def start_impalad_instances(cluster_size):
  129. # Start each impalad instance and optionally redirect the output to a log file.
  130. for i in range(options.cluster_size):
  131. if i == 0:
  132. # The first impalad always logs to impalad.INFO
  133. service_name = "impalad"
  134. else:
  135. service_name = "impalad_node%s" % i
  136. args = "%s %s %s %s" %\
  137. (build_impalad_logging_args(i, service_name), build_jvm_args(i),
  138. build_impalad_port_args(i), options.impalad_args.replace("#ID", str(i)))
  139. stderr_log_file_path = os.path.join(options.log_dir, '%s-error.log' % service_name)
  140. exec_impala_process(IMPALAD_PATH, args, stderr_log_file_path)
  141. def wait_for_impala_process_count(impala_cluster, retries=3):
  142. """Checks that the desired number of impalad/statestored processes are running.
  143. Refresh until the number running impalad/statestored processes reaches the expected
  144. number based on CLUSTER_SIZE, or the retry limit is hit. Failing this, raise a
  145. RuntimeError.
  146. """
  147. for i in range(retries):
  148. if len(impala_cluster.impalads) < options.cluster_size or \
  149. not impala_cluster.statestored or not impala_cluster.catalogd:
  150. sleep(2)
  151. impala_cluster.refresh()
  152. msg = str()
  153. if len(impala_cluster.impalads) < options.cluster_size:
  154. impalads_found = len(impala_cluster.impalads)
  155. msg += "Expected %d impalad(s), only %d found\n" %\
  156. (options.cluster_size, impalads_found)
  157. if not impala_cluster.statestored:
  158. msg += "statestored failed to start.\n"
  159. if not impala_cluster.catalogd:
  160. msg += "catalogd failed to start.\n"
  161. if msg:
  162. raise RuntimeError, msg
  163. def wait_for_cluster_web(timeout_in_seconds=CLUSTER_WAIT_TIMEOUT_IN_SECONDS):
  164. """Checks if the cluster is "ready"
  165. A cluster is deemed "ready" if:
  166. - All backends are registered with the statestore.
  167. - Each impalad knows about all other impalads.
  168. This information is retrieved by querying the statestore debug webpage
  169. and each individual impalad's metrics webpage.
  170. """
  171. impala_cluster = ImpalaCluster()
  172. # impalad processes may take a while to come up.
  173. wait_for_impala_process_count(impala_cluster)
  174. for impalad in impala_cluster.impalads:
  175. impalad.service.wait_for_num_known_live_backends(options.cluster_size,
  176. timeout=CLUSTER_WAIT_TIMEOUT_IN_SECONDS, interval=2)
  177. wait_for_catalog(impalad, timeout_in_seconds=CLUSTER_WAIT_TIMEOUT_IN_SECONDS)
  178. def wait_for_catalog(impalad, timeout_in_seconds):
  179. """Waits for the impalad catalog to become ready"""
  180. start_time = time()
  181. catalog_ready = False
  182. while (time() - start_time < timeout_in_seconds and not catalog_ready):
  183. try:
  184. num_dbs = impalad.service.get_metric_value('catalog.num-databases')
  185. num_tbls = impalad.service.get_metric_value('catalog.num-tables')
  186. catalog_ready = impalad.service.get_metric_value('catalog.ready')
  187. print 'Waiting for Catalog... Status: %s DBs / %s tables (ready=%s)' %\
  188. (num_dbs, num_tbls, catalog_ready)
  189. except Exception, e:
  190. print e
  191. sleep(1)
  192. if not catalog_ready:
  193. raise RuntimeError, 'Catalog was not initialized in expected time period.'
  194. def wait_for_cluster_cmdline(timeout_in_seconds=CLUSTER_WAIT_TIMEOUT_IN_SECONDS):
  195. """Checks if the cluster is "ready" by executing a simple query in a loop"""
  196. start_time = time()
  197. while os.system('%s -i localhost:21000 -q "%s"' % (IMPALA_SHELL, 'select 1')) != 0:
  198. if time() - timeout_in_seconds > start_time:
  199. raise RuntimeError, 'Cluster did not start within %d seconds' % timeout_in_seconds
  200. print 'Cluster not yet available. Sleeping...'
  201. sleep(2)
  202. if __name__ == "__main__":
  203. if options.kill_only:
  204. kill_cluster_processes(force=options.force_kill)
  205. sys.exit(0)
  206. if options.build_type not in KNOWN_BUILD_TYPES:
  207. print 'Invalid build type %s' % options.build_type
  208. print 'Valid values: %s' % ', '.join(KNOWN_BUILD_TYPES)
  209. sys.exit(1)
  210. if options.cluster_size <= 0:
  211. print 'Please specify a cluster size > 0'
  212. sys.exit(1)
  213. # Kill existing cluster processes based on the current configuration.
  214. if options.restart_impalad_only:
  215. if options.inprocess:
  216. print 'Cannot perform individual component restarts using an in-process cluster'
  217. sys.exit(1)
  218. kill_matching_processes('impalad', force=options.force_kill)
  219. else:
  220. kill_cluster_processes(force=options.force_kill)
  221. try:
  222. import json
  223. wait_for_cluster = wait_for_cluster_web
  224. except ImportError:
  225. print "json module not found, checking for cluster startup through the command-line"
  226. wait_for_cluster = wait_for_cluster_cmdline
  227. # If ImpalaCluster cannot be imported, fall back to the command-line to check
  228. # whether impalads/statestore are up.
  229. try:
  230. from tests.common.impala_cluster import ImpalaCluster
  231. if options.restart_impalad_only:
  232. impala_cluster = ImpalaCluster()
  233. if not impala_cluster.statestored or not impala_cluster.catalogd:
  234. print 'No running statestored or catalogd detected. Restarting entire cluster.'
  235. options.restart_impalad_only = False
  236. except ImportError:
  237. print 'ImpalaCluster module not found.'
  238. # TODO: Update this code path to work similar to the ImpalaCluster code path when
  239. # restarting only impalad processes. Specifically, we should do a full cluster
  240. # restart if either the statestored or catalogd processes are down, even if
  241. # restart_only_impalad=True.
  242. wait_for_cluster = wait_for_cluster_cmdline
  243. if options.inprocess:
  244. # The statestore and the impalads start in the same process.
  245. start_mini_impala_cluster(options.cluster_size)
  246. wait_for_cluster_cmdline()
  247. else:
  248. try:
  249. if not options.restart_impalad_only:
  250. start_statestore()
  251. start_catalogd()
  252. start_impalad_instances(options.cluster_size)
  253. wait_for_cluster()
  254. except Exception, e:
  255. print 'Error starting cluster: %s' % e
  256. sys.exit(1)
  257. print 'Impala Cluster Running with %d nodes.' % options.cluster_size