PageRenderTime 25ms CodeModel.GetById 9ms app.highlight 12ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/api/example_watch_folder.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 88 lines | 80 code | 0 blank | 8 comment | 1 complexity | 0919096f333add1356f3448f734ff2ba MD5 | raw file
 1#!/usr/bin/env python
 2"""
 3Simple example script that watches a folder for new files, imports that data to a data library, and then
 4execute a workflow on it, creating a new history for each workflow invocation.
 5
 6This assumes a workflow with only one input, though it could be adapted to many.
 7
 8Sample call:
 9python example_watch_folder.py <api_key> <api_url> /tmp/g_inbox/ /tmp/g_inbox/done/ "API Imports" f2db41e1fa331b3e
10
11NOTE:  The upload method used requires the data library filesystem upload allow_library_path_paste
12"""
13import os
14import shutil
15import sys
16import time
17sys.path.insert( 0, os.path.dirname( __file__ ) )
18from common import submit, display
19
20def main(api_key, api_url, in_folder, out_folder, data_library, workflow):
21    # Find/Create data library with the above name.  Assume we're putting datasets in the root folder '/'
22    libs = display(api_key, api_url + 'libraries', return_formatted=False)
23    library_id = None
24    for library in libs:
25        if library['name'] == data_library:
26            library_id = library['id']
27    if not library_id:
28        lib_create_data = {'name':data_library}
29        library = submit(api_key, api_url + 'libraries', lib_create_data, return_formatted=False)
30        library_id = library[0]['id']
31    folders = display(api_key, api_url + "libraries/%s/contents" % library_id, return_formatted = False)
32    for f in folders:
33        if f['name'] == "/":
34            library_folder_id = f['id']
35    workflow = display(api_key, api_url + 'workflows/%s' % workflow, return_formatted = False)
36    if not workflow:
37        print "Workflow %s not found, terminating."
38        sys.exit(1)
39    if not library_id or not library_folder_id:
40        print "Failure to configure library destination."
41        sys.exit(1)
42    while 1:
43        # Watch in_folder, upload anything that shows up there to data library and get ldda,
44        # invoke workflow, move file to out_folder.
45        for fname in os.listdir(in_folder):
46            fullpath = os.path.join(in_folder, fname)
47            if os.path.isfile(fullpath):
48                data = {}
49                data['folder_id'] = library_folder_id
50                data['file_type'] = 'auto'
51                data['dbkey'] = ''
52                data['upload_option'] = 'upload_paths'
53                data['filesystem_paths'] = fullpath
54                data['create_type'] = 'file'
55                libset = submit(api_key, api_url + "libraries/%s/contents" % library_id, data, return_formatted = False)
56                #TODO Handle this better, but the datatype isn't always
57                # set for the followup workflow execution without this
58                # pause.
59                time.sleep(5)
60                for ds in libset:
61                    if 'id' in ds:
62                        # Successful upload of dataset, we have the ldda now.  Run the workflow.
63                        wf_data = {}
64                        wf_data['workflow_id'] = workflow['id']
65                        wf_data['history'] = "%s - %s" % (fname, workflow['name'])
66                        wf_data['ds_map'] = {}
67                        for step_id, ds_in in workflow['inputs'].iteritems():
68                            wf_data['ds_map'][step_id] = {'src':'ld', 'id':ds['id']}
69                        res = submit( api_key, api_url + 'workflows', wf_data, return_formatted=False)
70                        if res:
71                            print res
72                            # Successful workflow execution, safe to move dataset.
73                            shutil.move(fullpath, os.path.join(out_folder, fname))
74        time.sleep(10)
75
76if __name__ == '__main__':
77    try:
78        api_key = sys.argv[1]
79        api_url = sys.argv[2]
80        in_folder = sys.argv[3]
81        out_folder = sys.argv[4]
82        data_library = sys.argv[5]
83        workflow = sys.argv[6]
84    except IndexError:
85        print 'usage: %s key url in_folder out_folder data_library workflow' % os.path.basename( sys.argv[0] )
86        sys.exit( 1 )
87    main(api_key, api_url, in_folder, out_folder, data_library, workflow )
88