PageRenderTime 46ms CodeModel.GetById 34ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/api/example_watch_folder.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 88 lines | 80 code | 0 blank | 8 comment | 0 complexity | 0919096f333add1356f3448f734ff2ba MD5 | raw file
  1. #!/usr/bin/env python
  2. """
  3. Simple example script that watches a folder for new files, imports that data to a data library, and then
  4. execute a workflow on it, creating a new history for each workflow invocation.
  5. This assumes a workflow with only one input, though it could be adapted to many.
  6. Sample call:
  7. python example_watch_folder.py <api_key> <api_url> /tmp/g_inbox/ /tmp/g_inbox/done/ "API Imports" f2db41e1fa331b3e
  8. NOTE: The upload method used requires the data library filesystem upload allow_library_path_paste
  9. """
  10. import os
  11. import shutil
  12. import sys
  13. import time
  14. sys.path.insert( 0, os.path.dirname( __file__ ) )
  15. from common import submit, display
  16. def main(api_key, api_url, in_folder, out_folder, data_library, workflow):
  17. # Find/Create data library with the above name. Assume we're putting datasets in the root folder '/'
  18. libs = display(api_key, api_url + 'libraries', return_formatted=False)
  19. library_id = None
  20. for library in libs:
  21. if library['name'] == data_library:
  22. library_id = library['id']
  23. if not library_id:
  24. lib_create_data = {'name':data_library}
  25. library = submit(api_key, api_url + 'libraries', lib_create_data, return_formatted=False)
  26. library_id = library[0]['id']
  27. folders = display(api_key, api_url + "libraries/%s/contents" % library_id, return_formatted = False)
  28. for f in folders:
  29. if f['name'] == "/":
  30. library_folder_id = f['id']
  31. workflow = display(api_key, api_url + 'workflows/%s' % workflow, return_formatted = False)
  32. if not workflow:
  33. print "Workflow %s not found, terminating."
  34. sys.exit(1)
  35. if not library_id or not library_folder_id:
  36. print "Failure to configure library destination."
  37. sys.exit(1)
  38. while 1:
  39. # Watch in_folder, upload anything that shows up there to data library and get ldda,
  40. # invoke workflow, move file to out_folder.
  41. for fname in os.listdir(in_folder):
  42. fullpath = os.path.join(in_folder, fname)
  43. if os.path.isfile(fullpath):
  44. data = {}
  45. data['folder_id'] = library_folder_id
  46. data['file_type'] = 'auto'
  47. data['dbkey'] = ''
  48. data['upload_option'] = 'upload_paths'
  49. data['filesystem_paths'] = fullpath
  50. data['create_type'] = 'file'
  51. libset = submit(api_key, api_url + "libraries/%s/contents" % library_id, data, return_formatted = False)
  52. #TODO Handle this better, but the datatype isn't always
  53. # set for the followup workflow execution without this
  54. # pause.
  55. time.sleep(5)
  56. for ds in libset:
  57. if 'id' in ds:
  58. # Successful upload of dataset, we have the ldda now. Run the workflow.
  59. wf_data = {}
  60. wf_data['workflow_id'] = workflow['id']
  61. wf_data['history'] = "%s - %s" % (fname, workflow['name'])
  62. wf_data['ds_map'] = {}
  63. for step_id, ds_in in workflow['inputs'].iteritems():
  64. wf_data['ds_map'][step_id] = {'src':'ld', 'id':ds['id']}
  65. res = submit( api_key, api_url + 'workflows', wf_data, return_formatted=False)
  66. if res:
  67. print res
  68. # Successful workflow execution, safe to move dataset.
  69. shutil.move(fullpath, os.path.join(out_folder, fname))
  70. time.sleep(10)
  71. if __name__ == '__main__':
  72. try:
  73. api_key = sys.argv[1]
  74. api_url = sys.argv[2]
  75. in_folder = sys.argv[3]
  76. out_folder = sys.argv[4]
  77. data_library = sys.argv[5]
  78. workflow = sys.argv[6]
  79. except IndexError:
  80. print 'usage: %s key url in_folder out_folder data_library workflow' % os.path.basename( sys.argv[0] )
  81. sys.exit( 1 )
  82. main(api_key, api_url, in_folder, out_folder, data_library, workflow )