PageRenderTime 77ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/cm/services/apps/hadoop.py

https://bitbucket.org/imageincloud/cloudman
Python | 224 lines | 165 code | 15 blank | 44 comment | 17 complexity | 4f46a9c369fa7cac85cb0f6a7c828458 MD5 | raw file
  1. import os
  2. import re
  3. import glob
  4. import shutil
  5. import tarfile
  6. import urllib2
  7. import urlparse
  8. import threading
  9. from distutils.version import StrictVersion
  10. from cm.util import misc
  11. from cm.util import paths
  12. from cm.services import ServiceRole
  13. from cm.services import service_states
  14. from cm.services import ServiceDependency
  15. from cm.services.apps import ApplicationService
  16. import logging
  17. log = logging.getLogger('cloudman')
  18. class HadoopService(ApplicationService):
  19. def __init__(self, app):
  20. super(HadoopService, self).__init__(app)
  21. self.svc_roles = [ServiceRole.HADOOP]
  22. self.name = ServiceRole.to_string(ServiceRole.HADOOP)
  23. self.reqs = [ServiceDependency(self, ServiceRole.SGE)]
  24. self.id_rsa_path = os.path.join(paths.P_HADOOP_HOME, "id_rsa")
  25. self.id_rsa_pub_key_path = os.path.join(
  26. paths.P_HADOOP_HOME, "id_rsa.pub")
  27. def start(self):
  28. """
  29. Start Hadoop. This entails downloading and extracting Hadoop
  30. binaries into its folders and setting up the environment
  31. so Hadoop jobs can be submitted.
  32. """
  33. log.debug("Configuring Hadoop")
  34. self.state = service_states.STARTING
  35. threading.Thread(target=self.__start).start()
  36. def __start(self):
  37. """
  38. Do the actual unpacking and configuring for Hadoop.
  39. This method is intended to be called in a separate thread
  40. and this is because the file download may take a while.
  41. """
  42. if self.unpack_hadoop():
  43. self.configure_hadoop()
  44. self.state = service_states.RUNNING
  45. log.info("Done adding Hadoop service; service running.")
  46. else:
  47. log.error("Error adding service '%s'" % self.svc_type)
  48. self.state = service_states.ERROR
  49. def remove(self):
  50. """
  51. Remove Hadoop related files from the system.
  52. """
  53. log.info("Removing Hadoop service")
  54. self.state = service_states.SHUTTING_DOWN
  55. self._clean()
  56. self.state = service_states.SHUT_DOWN
  57. def _clean(self):
  58. """
  59. Clean up the system as if Hadoop was never installed.
  60. """
  61. if self.state == service_states.SHUT_DOWN:
  62. misc.run('rm -rf %s/*' % paths.P_HADOOP_HOME)
  63. def unpack_hadoop(self):
  64. """
  65. Download and extract Hadoop into the ``paths.P_HADOOP_HOME`` folder.
  66. This function first looks for Hadoop in that folder and, if not found,
  67. downloads the tar ball from the ``cloudman`` bucket.
  68. """
  69. all_done = False
  70. log.debug("Unpacking Hadoop")
  71. hadoop_path = os.path.join(
  72. paths.P_HADOOP_TARS_PATH, paths.P_HADOOP_TAR)
  73. log.debug("Hadoop path is " + hadoop_path)
  74. hadoop_intg_path = os.path.join(
  75. paths.P_HADOOP_TARS_PATH, paths.P_HADOOP_INTEGRATION_TAR)
  76. log.debug("Hadoop SGE integration path is " + hadoop_intg_path)
  77. try:
  78. if not os.path.exists(paths.P_HADOOP_HOME):
  79. os.makedirs(paths.P_HADOOP_HOME)
  80. if not os.path.exists(self.id_rsa_path):
  81. shutil.copy("/mnt/cm/id_rsa", self.id_rsa_path)
  82. if not os.path.exists(self.id_rsa_pub_key_path):
  83. shutil.copy("/mnt/cm/id_rsa.pub", self.id_rsa_pub_key_path)
  84. hdp = glob.glob(paths.P_HADOOP_TARS_PATH + "/hadoop.*")
  85. img_hdp_ver = "0.0"
  86. img_intg_ver = "0.0"
  87. if len(hdp) > 0:
  88. hdp_file = os.path.basename(hdp[0])
  89. img_hdp_ver, img_intg_ver = self.get_file_version(hdp_file)
  90. u = urllib2.urlopen(paths.P_HADOOP_TAR_URL)
  91. s = u.read()
  92. m = re.search(paths.P_HADOOP_TAR, s)
  93. srv_hdp = m.group(0)
  94. srv_hdp_intg = paths.P_HADOOP_INTEGRATION_TAR
  95. serv_hdp_ver = "0.0"
  96. serv_intg_ver = "0.0"
  97. if m != None:
  98. serv_hdp_ver, serv_intg_ver = self.get_file_version(srv_hdp)
  99. m = re.search(paths.P_HADOOP_INTEGRATION_TAR, s)
  100. srv_hdp_intg = m.group(0)
  101. # log.debug(srv_hdp)
  102. # log.debug(img_hdp_ver)
  103. # log.debug(serv_hdp_ver)
  104. # log.debug(img_intg_ver)
  105. # log.debug(serv_intg_ver)
  106. if StrictVersion(serv_hdp_ver) > StrictVersion(img_hdp_ver) or StrictVersion(serv_intg_ver) > StrictVersion(img_intg_ver):
  107. u = urllib2.urlopen(
  108. urlparse.urljoin(paths.P_HADOOP_TAR_URL, srv_hdp))
  109. log.debug("Downloading Hadoop from {0}".format(u))
  110. localFile = open(paths.P_HADOOP_TARS_PATH + "/" + srv_hdp, 'w')
  111. localFile.write(u.read())
  112. localFile.close()
  113. log.debug("Downloaded Hadoop")
  114. if not os.path.exists(paths.P_HADOOP_TARS_PATH + "/" + srv_hdp_intg):
  115. u = urllib2.urlopen(
  116. urlparse.urljoin(paths.P_HADOOP_TAR_URL, srv_hdp_intg))
  117. log.debug(
  118. "Downloading Hadoop SGE integration from {0}".format(u))
  119. localFile = open(
  120. paths.P_HADOOP_TARS_PATH + "/" + srv_hdp_intg, 'w')
  121. localFile.write(u.read())
  122. localFile.close()
  123. log.debug("Hadoop SGE integration downloaded")
  124. tar = tarfile.open(paths.P_HADOOP_TARS_PATH + "/" + srv_hdp)
  125. tar.extractall(paths.P_HADOOP_HOME)
  126. tar.close()
  127. log.debug("Hadoop extracted to {0}".format(paths.P_HADOOP_HOME))
  128. tar = tarfile.open(paths.P_HADOOP_TARS_PATH + "/" + srv_hdp_intg)
  129. tar.extractall(paths.P_HADOOP_HOME)
  130. tar.close()
  131. log.debug("Hadoop SGE integration extracted to {0}".format(
  132. paths.P_HADOOP_HOME))
  133. misc.run("chown -R -c ubuntu " +
  134. paths.P_HADOOP_TARS_PATH + "/" + srv_hdp_intg)
  135. misc.run("chown -R -c ubuntu " +
  136. paths.P_HADOOP_TARS_PATH + "/" + srv_hdp)
  137. all_done = True
  138. except Exception, e:
  139. log.debug("Error downloading Hadoop: {0}".format(e))
  140. all_done = False
  141. return all_done
  142. def get_file_version(self, file_name):
  143. """
  144. Extract and return the file version from the file name passed into
  145. the method, namely, Hadoop and program version.
  146. Our standard for versioning Hadoop and SGE integration is as follows:
  147. ``hadoop.<hadoop version>.__.<release version>.<builde versio>.tar.gz``
  148. If no version is found from the file name, version ``0.0`` will be returned.
  149. The returned values should be compatible with from StrictVersion
  150. ``distutils.version`` module.
  151. """
  152. hdp_file_ver = file_name.lstrip('hadoop.').rstrip('tar.gz').rstrip('.')
  153. versions = hdp_file_ver.split('__')
  154. hadoop_version = "0.0"
  155. build_version = "0.0"
  156. try:
  157. hadoop_version = versions[0]
  158. build_version = versions[1]
  159. except Exception, e:
  160. log.debug("Error extracting Hadoop's file version: {0}".format(e))
  161. hadoop_version = "0.0"
  162. build_version = "0.0"
  163. log.debug("Extracted Hadoop version: {0}".format(hadoop_version))
  164. log.debug("Extracted Hadoop build version: {0}".format(build_version))
  165. return hadoop_version, build_version
  166. def configure_hadoop(self):
  167. """
  168. Configure environment for running Hadoop on demand.
  169. """
  170. all_done = False
  171. try:
  172. log.debug("Setting up Hadoop environment")
  173. etcFile = open("/etc/environment", "a")
  174. etcFile.write("JAVA_HOME=\"/usr\"\n")
  175. etcFile.flush()
  176. etcFile.close()
  177. log.debug("Hadoop id_rsa set from::" + self.id_rsa_path)
  178. hadoop_id_rsa = "/home/ubuntu/.ssh/id_rsa"
  179. shutil.copy(self.id_rsa_path, hadoop_id_rsa)
  180. misc.run("chown -c ubuntu {0}".format(hadoop_id_rsa))
  181. log.debug("Hadoop authFile saved to {0}".format(hadoop_id_rsa))
  182. authFile = open("/home/ubuntu/.ssh/authorized_keys", "a")
  183. pubKeyFile = open(self.id_rsa_pub_key_path)
  184. authFile.write(pubKeyFile.read())
  185. authFile.flush()
  186. authFile.close()
  187. pubKeyFile.close()
  188. misc.run("chown -c ubuntu /home/ubuntu/.ssh/authorized_keys")
  189. all_done = True
  190. except Exception, e:
  191. log.debug("Error while configuring HADOOP: {0}".format(e))
  192. all_done = False
  193. return all_done
  194. def status(self):
  195. """
  196. Check and update the status of HADOOP service. If the service state is
  197. ``SHUTTING_DOWN``, ``SHUT_DOWN``, ``UNSTARTED``, or ``WAITING_FOR_USER_ACTION``,
  198. the method doesn't do anything. Otherwise, it updates service status (see
  199. ``check_sge``) by setting ``self.state``, whose value is always the method's
  200. return value.
  201. """
  202. ## TODO: Add actual logic to make sure Hadoop jobs run
  203. if self.state == service_states.RUNNING:
  204. return service_states.RUNNING
  205. else:
  206. pass