PageRenderTime 38ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/create_batch_for_workflow/pymodules/python2.7/lib/python/gemini/scripts/gemini_install.py

https://gitlab.com/pooja043/Globus_Docker_2
Python | 326 lines | 303 code | 5 blank | 18 comment | 15 complexity | b274a8293e3fd07bb72f189aa0697274 MD5 | raw file
  1. #!/usr/bin/env python
  2. """Installer for gemini: a lightweight db framework for disease and population genetics.
  3. https://github.com/arq5x/gemini
  4. Handles installation of:
  5. - Required third party software
  6. - Required Python libraries
  7. - Gemini application
  8. - Associated data files
  9. Requires: Python 2.7, git, and compilers (gcc, g++)
  10. Run gemini_install.py -h for usage.
  11. """
  12. import argparse
  13. import platform
  14. import os
  15. import shutil
  16. import subprocess
  17. import sys
  18. import urllib2
  19. remotes = {"requirements_pip":
  20. "https://raw.github.com/arq5x/gemini/master/requirements.txt",
  21. "requirements_conda":
  22. "",
  23. "versioned_installations":
  24. "https://raw.githubusercontent.com/arq5x/gemini/master/versioning/",
  25. "cloudbiolinux":
  26. "https://github.com/chapmanb/cloudbiolinux.git",
  27. "gemini":
  28. "https://github.com/arq5x/gemini.git",
  29. "anaconda":
  30. "http://repo.continuum.io/miniconda/Miniconda-3.5.5-%s-x86_64.sh"}
  31. def main(args):
  32. check_dependencies()
  33. work_dir = os.path.join(os.getcwd(), "tmpgemini_install")
  34. if not os.path.exists(work_dir):
  35. os.makedirs(work_dir)
  36. os.chdir(work_dir)
  37. if args.gemini_version != 'latest':
  38. requirements_pip = os.path.join( remotes['versioned_installations'], args.gemini_version, 'requirements_pip.txt' )
  39. requirements_conda = os.path.join( remotes['versioned_installations'], args.gemini_version, 'requirements_conda.txt' )
  40. try:
  41. urllib2.urlopen( requirements_pip )
  42. except:
  43. sys.exit('Gemini version %s could not be found. Try the latest version.' % args.gemini_version)
  44. remotes.update( {'requirements_pip': requirements_pip, 'requirements_conda': requirements_conda} )
  45. print "Installing isolated base python installation"
  46. make_dirs(args)
  47. anaconda = install_anaconda_python(args, remotes)
  48. print "Installing gemini..."
  49. install_conda_pkgs(anaconda, remotes, args)
  50. gemini = install_gemini(anaconda, remotes, args.datadir, args.tooldir, args.sudo)
  51. if args.install_tools:
  52. cbl = get_cloudbiolinux(remotes["cloudbiolinux"])
  53. fabricrc = write_fabricrc(cbl["fabricrc"], args.tooldir, args.datadir,
  54. "ubuntu", args.sudo)
  55. print "Installing associated tools..."
  56. install_tools(gemini["fab"], cbl["tool_fabfile"], fabricrc)
  57. os.chdir(work_dir)
  58. install_data(gemini["python"], gemini["data_script"], args)
  59. os.chdir(work_dir)
  60. test_script = install_testbase(args.datadir, remotes["gemini"], gemini)
  61. print "Finished: gemini, tools and data installed"
  62. print " Tools installed in:\n %s" % args.tooldir
  63. print " Data installed in:\n %s" % args.datadir
  64. print " Run tests with:\n cd %s && bash %s" % (os.path.dirname(test_script),
  65. os.path.basename(test_script))
  66. print " NOTE: be sure to add %s/bin to your PATH." % args.tooldir
  67. print " NOTE: Install data files for GERP_bp & CADD_scores (not installed by default).\n "
  68. shutil.rmtree(work_dir)
  69. def install_gemini(anaconda, remotes, datadir, tooldir, use_sudo):
  70. """Install gemini plus python dependencies inside isolated Anaconda environment.
  71. """
  72. # Work around issue with distribute where asks for 'distribute==0.0'
  73. # try:
  74. # subprocess.check_call([anaconda["easy_install"], "--upgrade", "distribute"])
  75. # except subprocess.CalledProcessError:
  76. # try:
  77. # subprocess.check_call([anaconda["pip"], "install", "--upgrade", "distribute"])
  78. # except subprocess.CalledProcessError:
  79. # pass
  80. # Ensure latest version of fabric for running CloudBioLinux
  81. subprocess.check_call([anaconda["pip"], "install", "fabric>=1.7.0"])
  82. # allow downloads excluded in recent pip (1.5 or greater) versions
  83. try:
  84. p = subprocess.Popen([anaconda["pip"], "--version"], stdout=subprocess.PIPE)
  85. pip_version = p.communicate()[0].split()[1]
  86. except:
  87. pip_version = ""
  88. pip_compat = []
  89. if pip_version >= "1.5":
  90. for req in ["python-graph-core", "python-graph-dot"]:
  91. pip_compat += ["--allow-external", req, "--allow-unverified", req]
  92. subprocess.check_call([anaconda["pip"], "install"] + pip_compat + ["-r", remotes["requirements_pip"]])
  93. python_bin = os.path.join(anaconda["dir"], "bin", "python")
  94. _cleanup_problem_files(anaconda["dir"])
  95. _add_missing_inits(python_bin)
  96. for final_name, ve_name in [("gemini", "gemini"), ("gemini_python", "python"),
  97. ("gemini_pip", "pip")]:
  98. final_script = os.path.join(tooldir, "bin", final_name)
  99. ve_script = os.path.join(anaconda["dir"], "bin", ve_name)
  100. sudo_cmd = ["sudo"] if use_sudo else []
  101. if os.path.lexists(final_script):
  102. subprocess.check_call(sudo_cmd + ["rm", "-f", final_script])
  103. else:
  104. subprocess.check_call(sudo_cmd + ["mkdir", "-p", os.path.dirname(final_script)])
  105. cmd = ["ln", "-s", ve_script, final_script]
  106. subprocess.check_call(sudo_cmd + cmd)
  107. library_loc = subprocess.check_output("%s -c 'import gemini; print gemini.__file__'" % python_bin,
  108. shell=True)
  109. return {"fab": os.path.join(anaconda["dir"], "bin", "fab"),
  110. "data_script": os.path.join(os.path.dirname(library_loc.strip()), "install-data.py"),
  111. "python": python_bin,
  112. "cmd": os.path.join(anaconda["dir"], "bin", "gemini")}
  113. def install_conda_pkgs(anaconda, remotes, args):
  114. if args.gemini_version != 'latest':
  115. pkgs = ["--file", remotes['requirements_conda']]
  116. else:
  117. pkgs = ["bx-python", "conda", "cython", "ipython", "jinja2", "nose", "numpy",
  118. "pip", "pycrypto", "pyparsing", "pysam", "pyyaml",
  119. "pyzmq", "pandas", "scipy"]
  120. channels = ["-c", "https://conda.binstar.org/bcbio"]
  121. subprocess.check_call([anaconda["conda"], "install", "--yes"] + channels + pkgs)
  122. def install_anaconda_python(args, remotes):
  123. """Provide isolated installation of Anaconda python.
  124. http://docs.continuum.io/anaconda/index.html
  125. """
  126. anaconda_dir = os.path.join(args.datadir, "anaconda")
  127. bindir = os.path.join(anaconda_dir, "bin")
  128. conda = os.path.join(bindir, "conda")
  129. if platform.mac_ver()[0]:
  130. distribution = "macosx"
  131. else:
  132. distribution = "linux"
  133. if not os.path.exists(anaconda_dir) or not os.path.exists(conda):
  134. if os.path.exists(anaconda_dir):
  135. shutil.rmtree(anaconda_dir)
  136. url = remotes["anaconda"] % ("MacOSX" if distribution == "macosx" else "Linux")
  137. if not os.path.exists(os.path.basename(url)):
  138. subprocess.check_call(["wget", url])
  139. subprocess.check_call("bash %s -b -p %s" %
  140. (os.path.basename(url), anaconda_dir), shell=True)
  141. return {"conda": conda,
  142. "pip": os.path.join(bindir, "pip"),
  143. "easy_install": os.path.join(bindir, "easy_install"),
  144. "dir": anaconda_dir}
  145. def _add_missing_inits(python_bin):
  146. """pip/setuptools strips __init__.py files with namespace declarations.
  147. I have no idea why, but this adds them back.
  148. """
  149. library_loc = subprocess.check_output("%s -c 'import pygraph.classes.graph; "
  150. "print pygraph.classes.graph.__file__'" % python_bin,
  151. shell=True)
  152. pygraph_init = os.path.normpath(os.path.join(os.path.dirname(library_loc.strip()), os.pardir,
  153. "__init__.py"))
  154. if not os.path.exists(pygraph_init):
  155. with open(pygraph_init, "w") as out_handle:
  156. out_handle.write("__import__('pkg_resources').declare_namespace(__name__)\n")
  157. def _cleanup_problem_files(venv_dir):
  158. """Remove problem bottle items in PATH which conflict with site-packages
  159. """
  160. for cmd in ["bottle.py", "bottle.pyc"]:
  161. bin_cmd = os.path.join(venv_dir, "bin", cmd)
  162. if os.path.exists(bin_cmd):
  163. os.remove(bin_cmd)
  164. def install_tools(fab_cmd, fabfile, fabricrc):
  165. """Install 3rd party tools used by Gemini using a custom CloudBioLinux flavor.
  166. """
  167. tools = ["tabix", "grabix", "samtools", "bedtools"]
  168. flavor_dir = os.path.join(os.getcwd(), "gemini-flavor")
  169. if not os.path.exists(flavor_dir):
  170. os.makedirs(flavor_dir)
  171. with open(os.path.join(flavor_dir, "main.yaml"), "w") as out_handle:
  172. out_handle.write("packages:\n")
  173. out_handle.write(" - bio_nextgen\n")
  174. out_handle.write("libraries:\n")
  175. with open(os.path.join(flavor_dir, "custom.yaml"), "w") as out_handle:
  176. out_handle.write("bio_nextgen:\n")
  177. for tool in tools:
  178. out_handle.write(" - %s\n" % tool)
  179. cmd = [fab_cmd, "-f", fabfile, "-H", "localhost", "-c", fabricrc,
  180. "install_biolinux:target=custom,flavor=%s" % flavor_dir]
  181. subprocess.check_call(cmd)
  182. def install_data(python_cmd, data_script, args):
  183. """Install biological data used by gemini.
  184. """
  185. data_dir = os.path.join(args.datadir, "gemini_data") if args.sharedpy else args.datadir
  186. cmd = [python_cmd, data_script, data_dir]
  187. if args.install_data:
  188. print "Installing gemini data..."
  189. else:
  190. cmd.append("--nodata")
  191. subprocess.check_call(cmd)
  192. def install_testbase(datadir, repo, gemini):
  193. """Clone or update gemini code so we have the latest test suite.
  194. """
  195. gemini_dir = os.path.join(datadir, "gemini")
  196. cur_dir = os.getcwd()
  197. needs_git = True
  198. if os.path.exists(gemini_dir):
  199. os.chdir(gemini_dir)
  200. try:
  201. subprocess.check_call(["git", "pull", "origin", "master", "--tags"])
  202. needs_git = False
  203. except:
  204. os.chdir(cur_dir)
  205. shutil.rmtree(gemini_dir)
  206. if needs_git:
  207. os.chdir(os.path.split(gemini_dir)[0])
  208. subprocess.check_call(["git", "clone", repo])
  209. os.chdir(gemini_dir)
  210. _update_testdir_revision(gemini["cmd"])
  211. os.chdir(cur_dir)
  212. return os.path.join(gemini_dir, "master-test.sh")
  213. def _update_testdir_revision(gemini_cmd):
  214. """Update test directory to be in sync with a tagged installed version or development.
  215. """
  216. try:
  217. p = subprocess.Popen([gemini_cmd, "--version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  218. gversion = p.communicate()[0].split()[1]
  219. except:
  220. gversion = ""
  221. tag = ""
  222. if gversion:
  223. try:
  224. p = subprocess.Popen("git tag -l | grep %s" % gversion, stdout=subprocess.PIPE, shell=True)
  225. tag = p.communicate()[0].strip()
  226. except:
  227. tag = ""
  228. if tag:
  229. subprocess.check_call(["git", "checkout", "tags/%s" % tag])
  230. pass
  231. else:
  232. subprocess.check_call(["git", "reset", "--hard", "HEAD"])
  233. def write_fabricrc(base_file, tooldir, datadir, distribution, use_sudo):
  234. out_file = os.path.join(os.getcwd(), os.path.basename(base_file))
  235. with open(base_file) as in_handle:
  236. with open(out_file, "w") as out_handle:
  237. for line in in_handle:
  238. if line.startswith("system_install"):
  239. line = "system_install = %s\n" % tooldir
  240. elif line.startswith("local_install"):
  241. line = "local_install = %s/install\n" % tooldir
  242. elif line.startswith("data_files"):
  243. line = "data_files = %s\n" % datadir
  244. elif line.startswith("distribution"):
  245. line = "distribution = %s\n" % distribution
  246. elif line.startswith("use_sudo"):
  247. line = "use_sudo = %s\n" % use_sudo
  248. elif line.startswith("edition"):
  249. line = "edition = minimal\n"
  250. elif line.startswith("#galaxy_home"):
  251. line = "galaxy_home = %s\n" % os.path.join(datadir, "galaxy")
  252. out_handle.write(line)
  253. return out_file
  254. def make_dirs(args):
  255. sudo_cmd = ["sudo"] if args.sudo else []
  256. for dname in [args.datadir, args.tooldir]:
  257. if not os.path.exists(dname):
  258. subprocess.check_call(sudo_cmd + ["mkdir", "-p", dname])
  259. username = subprocess.check_output("echo $USER", shell=True).strip()
  260. subprocess.check_call(sudo_cmd + ["chown", username, dname])
  261. def get_cloudbiolinux(repo):
  262. base_dir = os.path.join(os.getcwd(), "cloudbiolinux")
  263. if not os.path.exists(base_dir):
  264. subprocess.check_call(["git", "clone", repo])
  265. return {"fabricrc": os.path.join(base_dir, "config", "fabricrc.txt"),
  266. "tool_fabfile": os.path.join(base_dir, "fabfile.py")}
  267. def check_dependencies():
  268. """Ensure required tools for installation are present.
  269. """
  270. print "Checking required dependencies..."
  271. for cmd, url in [("git", "http://git-scm.com/"),
  272. ("wget", "http://www.gnu.org/software/wget/"),
  273. ("curl", "http://curl.haxx.se/")]:
  274. try:
  275. retcode = subprocess.call([cmd, "--version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  276. except OSError:
  277. retcode = 127
  278. if retcode == 127:
  279. raise OSError("gemini requires %s (%s)" % (cmd, url))
  280. else:
  281. print " %s found" % cmd
  282. if __name__ == "__main__":
  283. parser = argparse.ArgumentParser(description="Automated installer for gemini framework.")
  284. parser.add_argument("tooldir", help="Directory to install 3rd party software tools",
  285. type=os.path.abspath)
  286. parser.add_argument("datadir", help="Directory to install gemini data files",
  287. type=os.path.abspath)
  288. parser.add_argument("--gemini-version", dest="gemini_version", default="latest",
  289. help="Install one specific gemini version with a fixed dependency chain.")
  290. parser.add_argument("--nosudo", help="Specify we cannot use sudo for commands",
  291. dest="sudo", action="store_false", default=True)
  292. parser.add_argument("--notools", help="Do not install tool dependencies",
  293. dest="install_tools", action="store_false", default=True)
  294. parser.add_argument("--nodata", help="Do not install data dependencies",
  295. dest="install_data", action="store_false", default=True)
  296. parser.add_argument("--sharedpy", help=("Indicate we share an Anaconda Python directory with "
  297. "another project. Creates unique gemini data directory."),
  298. action="store_true", default=False)
  299. if len(sys.argv) == 1:
  300. parser.print_help()
  301. else:
  302. main(parser.parse_args())