/tools/data_source/ucsc_proxy.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 65 lines · 47 code · 13 blank · 5 comment · 13 complexity · 8c767f24b258ca8c02c77c5d2cb10d18 MD5 · raw file

  1. #!/usr/bin/env python
  2. import urllib
  3. import sys, os
  4. assert sys.version_info[:2] >= ( 2, 4 )
  5. CHUNK = 2**20 # 1Mb
  6. MAXSIZE = CHUNK * 100
  7. if __name__ == '__main__':
  8. if len(sys.argv) != 3:
  9. print 'Usage ucsc.py input_params output_file'
  10. sys.exit()
  11. inp_file = sys.argv[1]
  12. out_file = sys.argv[2]
  13. DEFAULT_URL = "http://genome.ucsc.edu/hgTables?"
  14. # this must stay a list to allow multiple selections for the same widget name (checkboxes)
  15. params = []
  16. for line in file(inp_file):
  17. line = line.strip()
  18. if line:
  19. parts = line.split('=')
  20. if len(parts) == 0:
  21. key = ""
  22. value = ""
  23. elif len(parts) == 1:
  24. key = parts[0]
  25. value = ""
  26. else:
  27. key = parts[0]
  28. value = parts[1]
  29. if key == 'display':
  30. print value
  31. # get url from params, refered from proxy.py, initialized by the tool xml
  32. elif key == 'proxy_url':
  33. DEFAULT_URL = value
  34. else:
  35. params.append( (key, value) )
  36. #print params
  37. encoded_params = urllib.urlencode(params)
  38. url = DEFAULT_URL + encoded_params
  39. #print url
  40. page = urllib.urlopen(url)
  41. fp = open(out_file, 'wt')
  42. size = 0
  43. while 1:
  44. data = page.read(CHUNK)
  45. if not data:
  46. break
  47. if size > MAXSIZE:
  48. fp.write('----- maximum datasize exceeded ---\n')
  49. break
  50. size += len(data)
  51. fp.write(data)
  52. fp.close()