/lib/galaxy/datatypes/metadata.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 755 lines · 516 code · 128 blank · 111 comment · 144 complexity · 4b1df126956c2139fc1090d6c64b19c2 MD5 · raw file

  1. """
  2. Galaxy Metadata
  3. """
  4. import copy
  5. import cPickle
  6. import json
  7. import logging
  8. import os
  9. import shutil
  10. import sys
  11. import tempfile
  12. import weakref
  13. from os.path import abspath
  14. import galaxy.model
  15. from galaxy.util import listify, stringify_dictionary_keys, string_as_bool
  16. from galaxy.util.odict import odict
  17. from galaxy.util import in_directory
  18. from galaxy.web import form_builder
  19. from sqlalchemy.orm import object_session
  20. log = logging.getLogger(__name__)
  21. STATEMENTS = "__galaxy_statements__" #this is the name of the property in a Datatype class where new metadata spec element Statements are stored
  22. class Statement( object ):
  23. """
  24. This class inserts its target into a list in the surrounding
  25. class. the data.Data class has a metaclass which executes these
  26. statements. This is how we shove the metadata element spec into
  27. the class.
  28. """
  29. def __init__( self, target ):
  30. self.target = target
  31. def __call__( self, *args, **kwargs ):
  32. class_locals = sys._getframe( 1 ).f_locals #get the locals dictionary of the frame object one down in the call stack (i.e. the Datatype class calling MetadataElement)
  33. statements = class_locals.setdefault( STATEMENTS, [] ) #get and set '__galaxy_statments__' to an empty list if not in locals dict
  34. statements.append( ( self, args, kwargs ) ) #add Statement containing info to populate a MetadataElementSpec
  35. @classmethod
  36. def process( cls, element ):
  37. for statement, args, kwargs in getattr( element, STATEMENTS, [] ):
  38. statement.target( element, *args, **kwargs ) #statement.target is MetadataElementSpec, element is a Datatype class
  39. class MetadataCollection( object ):
  40. """
  41. MetadataCollection is not a collection at all, but rather a proxy
  42. to the real metadata which is stored as a Dictionary. This class
  43. handles processing the metadata elements when they are set and
  44. retrieved, returning default values in cases when metadata is not set.
  45. """
  46. def __init__(self, parent ):
  47. self.parent = parent
  48. #initialize dict if needed
  49. if self.parent._metadata is None:
  50. self.parent._metadata = {}
  51. def get_parent( self ):
  52. if "_parent" in self.__dict__:
  53. return self.__dict__["_parent"]()
  54. return None
  55. def set_parent( self, parent ):
  56. self.__dict__["_parent"] = weakref.ref( parent ) # use weakref to prevent a circular reference interfering with garbage collection: hda/lda (parent) <--> MetadataCollection (self) ; needs to be hashable, so cannot use proxy.
  57. parent = property( get_parent, set_parent )
  58. @property
  59. def spec( self ):
  60. return self.parent.datatype.metadata_spec
  61. def __iter__( self ):
  62. return self.parent._metadata.__iter__()
  63. def get( self, key, default=None ):
  64. try:
  65. return self.__getattr__( key ) or default
  66. except:
  67. return default
  68. def items(self):
  69. return iter( [ ( k, self.get( k ) ) for k in self.spec.iterkeys() ] )
  70. def __str__(self):
  71. return dict( self.items() ).__str__()
  72. def __nonzero__( self ):
  73. return bool( self.parent._metadata )
  74. def __getattr__( self, name ):
  75. if name in self.spec:
  76. if name in self.parent._metadata:
  77. return self.spec[name].wrap( self.parent._metadata[name], object_session( self.parent ) )
  78. return self.spec[name].wrap( self.spec[name].default, object_session( self.parent ) )
  79. if name in self.parent._metadata:
  80. return self.parent._metadata[name]
  81. def __setattr__( self, name, value ):
  82. if name == "parent":
  83. return self.set_parent( value )
  84. else:
  85. if name in self.spec:
  86. self.parent._metadata[name] = self.spec[name].unwrap( value )
  87. else:
  88. self.parent._metadata[name] = value
  89. def element_is_set( self, name ):
  90. return bool( self.parent._metadata.get( name, False ) )
  91. def get_html_by_name( self, name, **kwd ):
  92. if name in self.spec:
  93. rval = self.spec[name].param.get_html( value=getattr( self, name ), context=self, **kwd )
  94. if rval is None:
  95. return self.spec[name].no_value
  96. return rval
  97. def make_dict_copy( self, to_copy ):
  98. """Makes a deep copy of input iterable to_copy according to self.spec"""
  99. rval = {}
  100. for key, value in to_copy.items():
  101. if key in self.spec:
  102. rval[key] = self.spec[key].param.make_copy( value, target_context=self, source_context=to_copy )
  103. return rval
  104. def from_JSON_dict( self, filename ):
  105. dataset = self.parent
  106. log.debug( 'loading metadata from file for: %s %s' % ( dataset.__class__.__name__, dataset.id ) )
  107. JSONified_dict = json.load( open( filename ) )
  108. for name, spec in self.spec.items():
  109. if name in JSONified_dict:
  110. dataset._metadata[ name ] = spec.param.from_external_value( JSONified_dict[ name ], dataset )
  111. elif name in dataset._metadata:
  112. #if the metadata value is not found in our externally set metadata but it has a value in the 'old'
  113. #metadata associated with our dataset, we'll delete it from our dataset's metadata dict
  114. del dataset._metadata[ name ]
  115. def to_JSON_dict( self, filename ):
  116. #galaxy.model.customtypes.json_encoder.encode()
  117. meta_dict = {}
  118. dataset_meta_dict = self.parent._metadata
  119. for name, spec in self.spec.items():
  120. if name in dataset_meta_dict:
  121. meta_dict[ name ] = spec.param.to_external_value( dataset_meta_dict[ name ] )
  122. json.dump( meta_dict, open( filename, 'wb+' ) )
  123. def __getstate__( self ):
  124. return None #cannot pickle a weakref item (self._parent), when data._metadata_collection is None, it will be recreated on demand
  125. class MetadataSpecCollection( odict ):
  126. """
  127. A simple extension of dict which allows cleaner access to items
  128. and allows the values to be iterated over directly as if it were a
  129. list. append() is also implemented for simplicity and does not
  130. "append".
  131. """
  132. def __init__( self, dict = None ):
  133. odict.__init__( self, dict = None )
  134. def append( self, item ):
  135. self[item.name] = item
  136. def iter( self ):
  137. return self.itervalues()
  138. def __getattr__( self, name ):
  139. return self.get( name )
  140. def __repr__( self ):
  141. # force elements to draw with __str__ for sphinx-apidoc
  142. return ', '.join([ item.__str__() for item in self.iter() ])
  143. class MetadataParameter( object ):
  144. def __init__( self, spec ):
  145. self.spec = spec
  146. def get_html_field( self, value=None, context=None, other_values=None, **kwd ):
  147. context = context or {}
  148. other_values = other_values or {}
  149. return form_builder.TextField( self.spec.name, value=value )
  150. def get_html( self, value, context=None, other_values=None, **kwd ):
  151. """
  152. The "context" is simply the metadata collection/bunch holding
  153. this piece of metadata. This is passed in to allow for
  154. metadata to validate against each other (note: this could turn
  155. into a huge, recursive mess if not done with care). For
  156. example, a column assignment should validate against the
  157. number of columns in the dataset.
  158. """
  159. context = context or {}
  160. other_values = other_values or {}
  161. if self.spec.get("readonly"):
  162. return value
  163. if self.spec.get("optional"):
  164. checked = False
  165. if value: checked = "true"
  166. checkbox = form_builder.CheckboxField( "is_" + self.spec.name, checked=checked )
  167. return checkbox.get_html() + self.get_html_field( value=value, context=context, other_values=other_values, **kwd ).get_html()
  168. else:
  169. return self.get_html_field( value=value, context=context, other_values=other_values, **kwd ).get_html()
  170. def to_string( self, value ):
  171. return str( value )
  172. def make_copy( self, value, target_context = None, source_context = None ):
  173. return copy.deepcopy( value )
  174. @classmethod
  175. def marshal ( cls, value ):
  176. """
  177. This method should/can be overridden to convert the incoming
  178. value to whatever type it is supposed to be.
  179. """
  180. return value
  181. def validate( self, value ):
  182. """
  183. Throw an exception if the value is invalid.
  184. """
  185. pass
  186. def unwrap( self, form_value ):
  187. """
  188. Turns a value into its storable form.
  189. """
  190. value = self.marshal( form_value )
  191. self.validate( value )
  192. return value
  193. def wrap( self, value, session ):
  194. """
  195. Turns a value into its usable form.
  196. """
  197. return value
  198. def from_external_value( self, value, parent ):
  199. """
  200. Turns a value read from an external dict into its value to be pushed directly into the metadata dict.
  201. """
  202. return value
  203. def to_external_value( self, value ):
  204. """
  205. Turns a value read from a metadata into its value to be pushed directly into the external dict.
  206. """
  207. return value
  208. class MetadataElementSpec( object ):
  209. """
  210. Defines a metadata element and adds it to the metadata_spec (which
  211. is a MetadataSpecCollection) of datatype.
  212. """
  213. def __init__( self, datatype,
  214. name=None, desc=None, param=MetadataParameter, default=None, no_value = None,
  215. visible=True, set_in_upload = False, **kwargs ):
  216. self.name = name
  217. self.desc = desc or name
  218. self.default = default
  219. self.no_value = no_value
  220. self.visible = visible
  221. self.set_in_upload = set_in_upload
  222. # Catch-all, allows for extra attributes to be set
  223. self.__dict__.update(kwargs)
  224. # set up param last, as it uses values set above
  225. self.param = param( self )
  226. # add spec element to the spec
  227. datatype.metadata_spec.append( self )
  228. def get( self, name, default=None ):
  229. return self.__dict__.get(name, default)
  230. def wrap( self, value, session ):
  231. """
  232. Turns a stored value into its usable form.
  233. """
  234. return self.param.wrap( value, session )
  235. def unwrap( self, value ):
  236. """
  237. Turns an incoming value into its storable form.
  238. """
  239. return self.param.unwrap( value )
  240. def __str__( self ):
  241. #TODO??: assuming param is the class of this MetadataElementSpec - add the plain class name for that
  242. spec_dict = dict( param_class=self.param.__class__.__name__ )
  243. spec_dict.update( self.__dict__ )
  244. return ( "{name} ({param_class}): {desc}, defaults to '{default}'".format( **spec_dict ) )
  245. # create a statement class that, when called,
  246. # will add a new MetadataElementSpec to a class's metadata_spec
  247. MetadataElement = Statement( MetadataElementSpec )
  248. """
  249. MetadataParameter sub-classes.
  250. """
  251. class SelectParameter( MetadataParameter ):
  252. def __init__( self, spec ):
  253. MetadataParameter.__init__( self, spec )
  254. self.values = self.spec.get( "values" )
  255. self.multiple = string_as_bool( self.spec.get( "multiple" ) )
  256. def to_string( self, value ):
  257. if value in [ None, [] ]:
  258. return str( self.spec.no_value )
  259. if not isinstance( value, list ):
  260. value = [value]
  261. return ",".join( map( str, value ) )
  262. def get_html_field( self, value=None, context=None, other_values=None, values=None, **kwd ):
  263. context = context or {}
  264. other_values = other_values or {}
  265. field = form_builder.SelectField( self.spec.name, multiple=self.multiple, display=self.spec.get("display") )
  266. if self.values:
  267. value_list = self.values
  268. elif values:
  269. value_list = values
  270. elif value:
  271. value_list = [ ( v, v ) for v in listify( value )]
  272. else:
  273. value_list = []
  274. for val, label in value_list:
  275. try:
  276. if ( self.multiple and val in value ) or ( not self.multiple and val == value ):
  277. field.add_option( label, val, selected=True )
  278. else:
  279. field.add_option( label, val, selected=False )
  280. except TypeError:
  281. field.add_option( val, label, selected=False )
  282. return field
  283. def get_html( self, value, context=None, other_values=None, values=None, **kwd ):
  284. context = context or {}
  285. other_values = other_values or {}
  286. if self.spec.get("readonly"):
  287. if value in [ None, [] ]:
  288. return str( self.spec.no_value )
  289. return ", ".join( map( str, value ) )
  290. return MetadataParameter.get_html( self, value, context=context, other_values=other_values, values=values, **kwd )
  291. def wrap( self, value, session ):
  292. value = self.marshal( value ) #do we really need this (wasteful)? - yes because we are not sure that all existing selects have been stored previously as lists. Also this will handle the case where defaults/no_values are specified and are single non-list values.
  293. if self.multiple:
  294. return value
  295. elif value:
  296. return value[0] #single select, only return the first value
  297. return None
  298. @classmethod
  299. def marshal( cls, value ):
  300. # Store select as list, even if single item
  301. if value is None: return []
  302. if not isinstance( value, list ): return [value]
  303. return value
  304. class DBKeyParameter( SelectParameter ):
  305. def get_html_field( self, value=None, context=None, other_values=None, values=None, **kwd):
  306. context = context or {}
  307. other_values = other_values or {}
  308. try:
  309. values = kwd['trans'].db_builds
  310. except KeyError:
  311. pass
  312. return super(DBKeyParameter, self).get_html_field( value, context, other_values, values, **kwd)
  313. def get_html( self, value=None, context=None, other_values=None, values=None, **kwd):
  314. context = context or {}
  315. other_values = other_values or {}
  316. try:
  317. values = kwd['trans'].db_builds
  318. except KeyError:
  319. pass
  320. return super(DBKeyParameter, self).get_html( value, context, other_values, values, **kwd)
  321. class RangeParameter( SelectParameter ):
  322. def __init__( self, spec ):
  323. SelectParameter.__init__( self, spec )
  324. # The spec must be set with min and max values
  325. self.min = spec.get( "min" ) or 1
  326. self.max = spec.get( "max" ) or 1
  327. self.step = self.spec.get( "step" ) or 1
  328. def get_html_field( self, value=None, context=None, other_values=None, values=None, **kwd ):
  329. context = context or {}
  330. other_values = other_values or {}
  331. if values is None:
  332. values = zip( range( self.min, self.max, self.step ), range( self.min, self.max, self.step ))
  333. return SelectParameter.get_html_field( self, value=value, context=context, other_values=other_values, values=values, **kwd )
  334. def get_html( self, value, context=None, other_values=None, values=None, **kwd ):
  335. context = context or {}
  336. other_values = other_values or {}
  337. if values is None:
  338. values = zip( range( self.min, self.max, self.step ), range( self.min, self.max, self.step ))
  339. return SelectParameter.get_html( self, value, context=context, other_values=other_values, values=values, **kwd )
  340. @classmethod
  341. def marshal( cls, value ):
  342. value = SelectParameter.marshal( value )
  343. values = [ int(x) for x in value ]
  344. return values
  345. class ColumnParameter( RangeParameter ):
  346. def get_html_field( self, value=None, context=None, other_values=None, values=None, **kwd ):
  347. context = context or {}
  348. other_values = other_values or {}
  349. if values is None and context:
  350. column_range = range( 1, ( context.columns or 0 ) + 1, 1 )
  351. values = zip( column_range, column_range )
  352. return RangeParameter.get_html_field( self, value=value, context=context, other_values=other_values, values=values, **kwd )
  353. def get_html( self, value, context=None, other_values=None, values=None, **kwd ):
  354. context = context or {}
  355. other_values = other_values or {}
  356. if values is None and context:
  357. column_range = range( 1, ( context.columns or 0 ) + 1, 1 )
  358. values = zip( column_range, column_range )
  359. return RangeParameter.get_html( self, value, context=context, other_values=other_values, values=values, **kwd )
  360. class ColumnTypesParameter( MetadataParameter ):
  361. def to_string( self, value ):
  362. return ",".join( map( str, value ) )
  363. class ListParameter( MetadataParameter ):
  364. def to_string( self, value ):
  365. return ",".join( [str(x) for x in value] )
  366. class DictParameter( MetadataParameter ):
  367. def to_string( self, value ):
  368. return json.dumps( value )
  369. class PythonObjectParameter( MetadataParameter ):
  370. def to_string( self, value ):
  371. if not value:
  372. return self.spec._to_string( self.spec.no_value )
  373. return self.spec._to_string( value )
  374. def get_html_field( self, value=None, context=None, other_values=None, **kwd ):
  375. context = context or {}
  376. other_values = other_values or {}
  377. return form_builder.TextField( self.spec.name, value=self._to_string( value ) )
  378. def get_html( self, value=None, context=None, other_values=None, **kwd ):
  379. context = context or {}
  380. other_values = other_values or {}
  381. return str( self )
  382. @classmethod
  383. def marshal( cls, value ):
  384. return value
  385. class FileParameter( MetadataParameter ):
  386. def to_string( self, value ):
  387. if not value:
  388. return str( self.spec.no_value )
  389. return value.file_name
  390. def get_html_field( self, value=None, context=None, other_values=None, **kwd ):
  391. context = context or {}
  392. other_values = other_values or {}
  393. return form_builder.TextField( self.spec.name, value=str( value.id ) )
  394. def get_html( self, value=None, context=None, other_values=None, **kwd ):
  395. context = context or {}
  396. other_values = other_values or {}
  397. return "<div>No display available for Metadata Files</div>"
  398. def wrap( self, value, session ):
  399. if value is None:
  400. return None
  401. if isinstance( value, galaxy.model.MetadataFile ) or isinstance( value, MetadataTempFile ):
  402. return value
  403. mf = session.query( galaxy.model.MetadataFile ).get( value )
  404. return mf
  405. def make_copy( self, value, target_context, source_context ):
  406. value = self.wrap( value, object_session( target_context.parent ) )
  407. if value:
  408. new_value = galaxy.model.MetadataFile( dataset = target_context.parent, name = self.spec.name )
  409. object_session( target_context.parent ).add( new_value )
  410. object_session( target_context.parent ).flush()
  411. shutil.copy( value.file_name, new_value.file_name )
  412. return self.unwrap( new_value )
  413. return None
  414. @classmethod
  415. def marshal( cls, value ):
  416. if isinstance( value, galaxy.model.MetadataFile ):
  417. value = value.id
  418. return value
  419. def from_external_value( self, value, parent ):
  420. """
  421. Turns a value read from a external dict into its value to be pushed directly into the metadata dict.
  422. """
  423. if MetadataTempFile.is_JSONified_value( value ):
  424. value = MetadataTempFile.from_JSON( value )
  425. if isinstance( value, MetadataTempFile ):
  426. mf = parent.metadata.get( self.spec.name, None)
  427. if mf is None:
  428. mf = self.new_file( dataset = parent, **value.kwds )
  429. # Ensure the metadata file gets updated with content
  430. parent.dataset.object_store.update_from_file( mf, file_name=value.file_name, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name=os.path.basename(mf.file_name) )
  431. os.unlink( value.file_name )
  432. value = mf.id
  433. return value
  434. def to_external_value( self, value ):
  435. """
  436. Turns a value read from a metadata into its value to be pushed directly into the external dict.
  437. """
  438. if isinstance( value, galaxy.model.MetadataFile ):
  439. value = value.id
  440. elif isinstance( value, MetadataTempFile ):
  441. value = MetadataTempFile.to_JSON( value )
  442. return value
  443. def new_file( self, dataset = None, **kwds ):
  444. if object_session( dataset ):
  445. mf = galaxy.model.MetadataFile( name = self.spec.name, dataset = dataset, **kwds )
  446. object_session( dataset ).add( mf )
  447. object_session( dataset ).flush() #flush to assign id
  448. return mf
  449. else:
  450. #we need to make a tmp file that is accessable to the head node,
  451. #we will be copying its contents into the MetadataFile objects filename after restoring from JSON
  452. #we do not include 'dataset' in the kwds passed, as from_JSON_value() will handle this for us
  453. return MetadataTempFile( **kwds )
  454. #This class is used when a database file connection is not available
  455. class MetadataTempFile( object ):
  456. tmp_dir = 'database/tmp' #this should be overwritten as necessary in calling scripts
  457. def __init__( self, **kwds ):
  458. self.kwds = kwds
  459. self._filename = None
  460. @property
  461. def file_name( self ):
  462. if self._filename is None:
  463. #we need to create a tmp file, accessable across all nodes/heads, save the name, and return it
  464. self._filename = abspath( tempfile.NamedTemporaryFile( dir = self.tmp_dir, prefix = "metadata_temp_file_" ).name )
  465. open( self._filename, 'wb+' ) #create an empty file, so it can't be reused using tempfile
  466. return self._filename
  467. def to_JSON( self ):
  468. return { '__class__':self.__class__.__name__, 'filename':self.file_name, 'kwds':self.kwds }
  469. @classmethod
  470. def from_JSON( cls, json_dict ):
  471. #need to ensure our keywords are not unicode
  472. rval = cls( **stringify_dictionary_keys( json_dict['kwds'] ) )
  473. rval._filename = json_dict['filename']
  474. return rval
  475. @classmethod
  476. def is_JSONified_value( cls, value ):
  477. return ( isinstance( value, dict ) and value.get( '__class__', None ) == cls.__name__ )
  478. @classmethod
  479. def cleanup_from_JSON_dict_filename( cls, filename ):
  480. try:
  481. for key, value in json.load( open( filename ) ).items():
  482. if cls.is_JSONified_value( value ):
  483. value = cls.from_JSON( value )
  484. if isinstance( value, cls ) and os.path.exists( value.file_name ):
  485. log.debug( 'Cleaning up abandoned MetadataTempFile file: %s' % value.file_name )
  486. os.unlink( value.file_name )
  487. except Exception, e:
  488. log.debug( 'Failed to cleanup MetadataTempFile temp files from %s: %s' % ( filename, e ) )
  489. #Class with methods allowing set_meta() to be called externally to the Galaxy head
  490. class JobExternalOutputMetadataWrapper( object ):
  491. #this class allows access to external metadata filenames for all outputs associated with a job
  492. #We will use JSON as the medium of exchange of information, except for the DatasetInstance object which will use pickle (in the future this could be JSONified as well)
  493. def __init__( self, job ):
  494. self.job_id = job.id
  495. def get_output_filenames_by_dataset( self, dataset, sa_session ):
  496. if isinstance( dataset, galaxy.model.HistoryDatasetAssociation ):
  497. return sa_session.query( galaxy.model.JobExternalOutputMetadata ) \
  498. .filter_by( job_id = self.job_id, history_dataset_association_id = dataset.id ) \
  499. .first() #there should only be one or None
  500. elif isinstance( dataset, galaxy.model.LibraryDatasetDatasetAssociation ):
  501. return sa_session.query( galaxy.model.JobExternalOutputMetadata ) \
  502. .filter_by( job_id = self.job_id, library_dataset_dataset_association_id = dataset.id ) \
  503. .first() #there should only be one or None
  504. return None
  505. def get_dataset_metadata_key( self, dataset ):
  506. # Set meta can be called on library items and history items,
  507. # need to make different keys for them, since ids can overlap
  508. return "%s_%d" % ( dataset.__class__.__name__, dataset.id )
  509. def setup_external_metadata( self, datasets, sa_session, exec_dir=None, tmp_dir=None, dataset_files_path=None,
  510. output_fnames=None, config_root=None, config_file=None, datatypes_config=None, job_metadata=None, compute_tmp_dir=None, kwds=None ):
  511. kwds = kwds or {}
  512. if tmp_dir is None:
  513. tmp_dir = MetadataTempFile.tmp_dir
  514. # path is calculated for Galaxy, may be different on compute - rewrite
  515. # for the compute server.
  516. def metadata_path_on_compute(path):
  517. compute_path = path
  518. log.info(compute_tmp_dir)
  519. if compute_tmp_dir and tmp_dir and in_directory(path, tmp_dir):
  520. path_relative = os.path.relpath(path, tmp_dir)
  521. compute_path = os.path.join(compute_tmp_dir, path_relative)
  522. return compute_path
  523. #fill in metadata_files_dict and return the command with args required to set metadata
  524. def __metadata_files_list_to_cmd_line( metadata_files ):
  525. def __get_filename_override():
  526. if output_fnames:
  527. for dataset_path in output_fnames:
  528. if dataset_path.false_path and dataset_path.real_path == metadata_files.dataset.file_name:
  529. return dataset_path.false_path
  530. return ""
  531. line = "%s,%s,%s,%s,%s,%s" % (
  532. metadata_path_on_compute(metadata_files.filename_in),
  533. metadata_path_on_compute(metadata_files.filename_kwds),
  534. metadata_path_on_compute(metadata_files.filename_out),
  535. metadata_path_on_compute(metadata_files.filename_results_code),
  536. __get_filename_override(),
  537. metadata_path_on_compute(metadata_files.filename_override_metadata),
  538. )
  539. log.info(line)
  540. return line
  541. if not isinstance( datasets, list ):
  542. datasets = [ datasets ]
  543. if exec_dir is None:
  544. exec_dir = os.path.abspath( os.getcwd() )
  545. if dataset_files_path is None:
  546. dataset_files_path = galaxy.model.Dataset.file_path
  547. if config_root is None:
  548. config_root = os.path.abspath( os.getcwd() )
  549. if datatypes_config is None:
  550. raise Exception( 'In setup_external_metadata, the received datatypes_config is None.' )
  551. datatypes_config = 'datatypes_conf.xml'
  552. metadata_files_list = []
  553. for dataset in datasets:
  554. key = self.get_dataset_metadata_key( dataset )
  555. #future note:
  556. #wonkiness in job execution causes build command line to be called more than once
  557. #when setting metadata externally, via 'auto-detect' button in edit attributes, etc.,
  558. #we don't want to overwrite (losing the ability to cleanup) our existing dataset keys and files,
  559. #so we will only populate the dictionary once
  560. metadata_files = self.get_output_filenames_by_dataset( dataset, sa_session )
  561. if not metadata_files:
  562. metadata_files = galaxy.model.JobExternalOutputMetadata( dataset = dataset)
  563. metadata_files.job_id = self.job_id
  564. #we are using tempfile to create unique filenames, tempfile always returns an absolute path
  565. #we will use pathnames relative to the galaxy root, to accommodate instances where the galaxy root
  566. #is located differently, i.e. on a cluster node with a different filesystem structure
  567. #file to store existing dataset
  568. metadata_files.filename_in = abspath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_in_%s_" % key ).name )
  569. #FIXME: HACK
  570. #sqlalchemy introduced 'expire_on_commit' flag for sessionmaker at version 0.5x
  571. #This may be causing the dataset attribute of the dataset_association object to no-longer be loaded into memory when needed for pickling.
  572. #For now, we'll simply 'touch' dataset_association.dataset to force it back into memory.
  573. dataset.dataset #force dataset_association.dataset to be loaded before pickling
  574. #A better fix could be setting 'expire_on_commit=False' on the session, or modifying where commits occur, or ?
  575. cPickle.dump( dataset, open( metadata_files.filename_in, 'wb+' ) )
  576. #file to store metadata results of set_meta()
  577. metadata_files.filename_out = abspath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_out_%s_" % key ).name )
  578. open( metadata_files.filename_out, 'wb+' ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
  579. #file to store a 'return code' indicating the results of the set_meta() call
  580. #results code is like (True/False - if setting metadata was successful/failed , exception or string of reason of success/failure )
  581. metadata_files.filename_results_code = abspath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_results_%s_" % key ).name )
  582. json.dump( ( False, 'External set_meta() not called' ), open( metadata_files.filename_results_code, 'wb+' ) ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
  583. #file to store kwds passed to set_meta()
  584. metadata_files.filename_kwds = abspath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_kwds_%s_" % key ).name )
  585. json.dump( kwds, open( metadata_files.filename_kwds, 'wb+' ), ensure_ascii=True )
  586. #existing metadata file parameters need to be overridden with cluster-writable file locations
  587. metadata_files.filename_override_metadata = abspath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_override_%s_" % key ).name )
  588. open( metadata_files.filename_override_metadata, 'wb+' ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
  589. override_metadata = []
  590. for meta_key, spec_value in dataset.metadata.spec.iteritems():
  591. if isinstance( spec_value.param, FileParameter ) and dataset.metadata.get( meta_key, None ) is not None:
  592. metadata_temp = MetadataTempFile()
  593. shutil.copy( dataset.metadata.get( meta_key, None ).file_name, metadata_temp.file_name )
  594. override_metadata.append( ( meta_key, metadata_temp.to_JSON() ) )
  595. json.dump( override_metadata, open( metadata_files.filename_override_metadata, 'wb+' ) )
  596. #add to session and flush
  597. sa_session.add( metadata_files )
  598. sa_session.flush()
  599. metadata_files_list.append( metadata_files )
  600. #return command required to build
  601. return "%s %s %s %s %s %s %s %s" % ( os.path.join( exec_dir, 'set_metadata.sh' ), dataset_files_path, tmp_dir, config_root, config_file, datatypes_config, job_metadata, " ".join( map( __metadata_files_list_to_cmd_line, metadata_files_list ) ) )
  602. def external_metadata_set_successfully( self, dataset, sa_session ):
  603. metadata_files = self.get_output_filenames_by_dataset( dataset, sa_session )
  604. if not metadata_files:
  605. return False # this file doesn't exist
  606. rval, rstring = json.load( open( metadata_files.filename_results_code ) )
  607. if not rval:
  608. log.debug( 'setting metadata externally failed for %s %s: %s' % ( dataset.__class__.__name__, dataset.id, rstring ) )
  609. return rval
  610. def cleanup_external_metadata( self, sa_session ):
  611. log.debug( 'Cleaning up external metadata files' )
  612. for metadata_files in sa_session.query( galaxy.model.Job ).get( self.job_id ).external_output_metadata:
  613. #we need to confirm that any MetadataTempFile files were removed, if not we need to remove them
  614. #can occur if the job was stopped before completion, but a MetadataTempFile is used in the set_meta
  615. MetadataTempFile.cleanup_from_JSON_dict_filename( metadata_files.filename_out )
  616. dataset_key = self.get_dataset_metadata_key( metadata_files.dataset )
  617. for key, fname in [ ( 'filename_in', metadata_files.filename_in ), ( 'filename_out', metadata_files.filename_out ), ( 'filename_results_code', metadata_files.filename_results_code ), ( 'filename_kwds', metadata_files.filename_kwds ), ( 'filename_override_metadata', metadata_files.filename_override_metadata ) ]:
  618. try:
  619. os.remove( fname )
  620. except Exception, e:
  621. log.debug( 'Failed to cleanup external metadata file (%s) for %s: %s' % ( key, dataset_key, e ) )
  622. def set_job_runner_external_pid( self, pid, sa_session ):
  623. for metadata_files in sa_session.query( galaxy.model.Job ).get( self.job_id ).external_output_metadata:
  624. metadata_files.job_runner_external_pid = pid
  625. sa_session.add( metadata_files )
  626. sa_session.flush()