/tools/data_source/microbial_import.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 86 lines · 61 code · 14 blank · 11 comment · 15 complexity · 38c6c7e221b6325283a97117a1867690 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Script that imports locally stored data as a new dataset for the user
  4. Usage: import id outputfile
  5. """
  6. import sys, os
  7. from shutil import copyfile
  8. assert sys.version_info[:2] >= ( 2, 4 )
  9. BUFFER = 1048576
  10. uids = sys.argv[1].split(",")
  11. out_file1 = sys.argv[2]
  12. #remove NONE from uids
  13. have_none = True
  14. while have_none:
  15. try:
  16. uids.remove('None')
  17. except:
  18. have_none = False
  19. #create dictionary keyed by uid of tuples of (displayName,filePath,build) for all files
  20. available_files = {}
  21. try:
  22. filename = sys.argv[-1]
  23. for i, line in enumerate( file( filename ) ):
  24. if not line or line[0:1] == "#" : continue
  25. fields = line.split('\t')
  26. try:
  27. info_type = fields.pop(0)
  28. if info_type.upper()=="DATA":
  29. uid = fields.pop(0)
  30. org_num = fields.pop(0)
  31. chr_acc = fields.pop(0)
  32. feature = fields.pop(0)
  33. filetype = fields.pop(0)
  34. path = fields.pop(0).replace("\r","").replace("\n","")
  35. file_type = filetype
  36. build = org_num
  37. description = uid
  38. else:
  39. continue
  40. except:
  41. continue
  42. available_files[uid]=(description,path,build,file_type,chr_acc)
  43. except:
  44. print >>sys.stderr, "It appears that the configuration file for this tool is missing."
  45. #create list of tuples of (displayName,FileName,build) for desired files
  46. desired_files = []
  47. for uid in uids:
  48. try:
  49. desired_files.append(available_files[uid])
  50. except:
  51. continue
  52. #copy first file to contents of given output file
  53. file1_copied = False
  54. while not file1_copied:
  55. try:
  56. first_file = desired_files.pop(0)
  57. except:
  58. print >>sys.stderr, "There were no valid files requested."
  59. sys.exit()
  60. file1_desc, file1_path, file1_build, file1_type,file1_chr_acc = first_file
  61. try:
  62. copyfile(file1_path,out_file1)
  63. print "#File1\t"+file1_desc+"\t"+file1_chr_acc+"\t"+file1_build+"\t"+file1_type
  64. file1_copied = True
  65. except:
  66. print >>sys.stderr, "The file specified is missing."
  67. continue
  68. #print >>sys.stderr, "The file specified is missing."
  69. #Tell post-process filter where remaining files reside
  70. for extra_output in desired_files:
  71. file_desc, file_path, file_build, file_type,file_chr_acc = extra_output
  72. print "#NewFile\t"+file_desc+"\t"+file_chr_acc+"\t"+file_build+"\t"+file_path+"\t"+file_type