/tools/data_source/import.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 61 lines · 42 code · 13 blank · 6 comment · 5 complexity · 5dfa55d2630b7ca909ce0decdffc86e7 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Script that imports locally stored data as a new dataset for the user
  4. Usage: import id outputfile
  5. """
  6. import sys, os
  7. assert sys.version_info[:2] >= ( 2, 4 )
  8. BUFFER = 1048576
  9. dataid = sys.argv[1]
  10. out_name = sys.argv[2]
  11. id2name = {
  12. 'eryth' : 'ErythPreCRMmm3_cusTrk.txt',
  13. 'cishg16' : 'ReglRegHBBhg16CusTrk.txt',
  14. 'cishg17' : 'ReglRegHBBhg17CusTrk.txt',
  15. 'exons' : 'ExonsKnownGenes_mm3.txt',
  16. 'krhg16' : 'known_regulatory_hg16.bed',
  17. 'krhg17' : 'known_regulatory_hg17.bed',
  18. 'tARhg16mmc' : 'hg16.mouse.t_AR.cold.bed',
  19. 'tARhg16mmm' : 'hg16.mouse.t_AR.medium.bed',
  20. 'tARhg16mmh' : 'hg16.mouse.t_AR.hot.bed',
  21. 'tARhg16rnc' : 'hg16.rat.t_AR.cold.bed',
  22. 'tARhg16rnm' : 'hg16.rat.t_AR.medium.bed',
  23. 'tARhg16rnh' : 'hg16.rat.t_AR.hot.bed',
  24. 'phastConsHg16' : 'phastConsMost_hg16.bed',
  25. 'omimhg16' : 'omimDisorders_hg16.tab',
  26. 'omimhg17' : 'omimDisorders_hg17.tab',
  27. }
  28. fname = id2name.get(dataid, '')
  29. if not fname:
  30. print 'Importing invalid data %s' % dataid
  31. sys.exit()
  32. else:
  33. print 'Imported %s' % fname
  34. # this path is hardcoded
  35. inp_name = os.path.join('database', 'import', fname)
  36. try:
  37. inp = open(inp_name, 'rt')
  38. except:
  39. print 'Could not find file %s' % inp_name
  40. sys.exit()
  41. out = open(out_name, 'wt')
  42. while 1:
  43. data = inp.read(BUFFER)
  44. if not data:
  45. break
  46. out.write(data)
  47. inp.close()
  48. out.close()