PageRenderTime 141ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 712 lines | 614 code | 47 blank | 51 comment | 93 complexity | fe40dcfd27bc0874625f71c5ab05bcb0 MD5 | raw file
  1. import sys, logging, os, time, datetime, errno
  2. log = logging.getLogger( __name__ )
  3. log.setLevel(logging.DEBUG)
  4. handler = logging.StreamHandler( sys.stdout )
  5. format = "%(name)s %(levelname)s %(asctime)s %(message)s"
  6. formatter = logging.Formatter( format )
  7. handler.setFormatter( formatter )
  8. log.addHandler( handler )
  9. from sqlalchemy import and_
  10. from sqlalchemy import *
  11. now = datetime.datetime.utcnow
  12. from sqlalchemy.orm import *
  13. from migrate import *
  14. from migrate.changeset import *
  15. from galaxy.model.custom_types import *
  16. from galaxy.util.bunch import Bunch
  17. metadata = MetaData()
  18. context = scoped_session( sessionmaker( autoflush=False, autocommit=True ) )
  19. ## classes
  20. def get_permitted_actions( **kwds ):
  21. return Bunch()
  22. def directory_hash_id( id ):
  23. s = str( id )
  24. l = len( s )
  25. # Shortcut -- ids 0-999 go under ../000/
  26. if l < 4:
  27. return [ "000" ]
  28. # Pad with zeros until a multiple of three
  29. padded = ( ( 3 - len( s ) % 3 ) * "0" ) + s
  30. # Drop the last three digits -- 1000 files per directory
  31. padded = padded[:-3]
  32. # Break into chunks of three
  33. return [ padded[i*3:(i+1)*3] for i in range( len( padded ) // 3 ) ]
  34. class Dataset( object ):
  35. states = Bunch( NEW = 'new',
  36. UPLOAD = 'upload',
  37. QUEUED = 'queued',
  38. RUNNING = 'running',
  39. OK = 'ok',
  40. EMPTY = 'empty',
  41. ERROR = 'error',
  42. DISCARDED = 'discarded' )
  43. permitted_actions = get_permitted_actions( filter='DATASET' )
  44. file_path = "/tmp/"
  45. engine = None
  46. def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True ):
  47. self.id = id
  48. self.state = state
  49. self.deleted = False
  50. self.purged = False
  51. self.purgable = purgable
  52. self.external_filename = external_filename
  53. self._extra_files_path = extra_files_path
  54. self.file_size = file_size
  55. def get_file_name( self ):
  56. if not self.external_filename:
  57. assert self.id is not None, "ID must be set before filename used (commit the object)"
  58. # First try filename directly under file_path
  59. filename = os.path.join( self.file_path, "dataset_%d.dat" % self.id )
  60. # Only use that filename if it already exists (backward compatibility),
  61. # otherwise construct hashed path
  62. if not os.path.exists( filename ):
  63. dir = os.path.join( self.file_path, *directory_hash_id( self.id ) )
  64. # Create directory if it does not exist
  65. try:
  66. os.makedirs( dir )
  67. except OSError, e:
  68. # File Exists is okay, otherwise reraise
  69. if e.errno != errno.EEXIST:
  70. raise
  71. # Return filename inside hashed directory
  72. return os.path.abspath( os.path.join( dir, "dataset_%d.dat" % self.id ) )
  73. else:
  74. filename = self.external_filename
  75. # Make filename absolute
  76. return os.path.abspath( filename )
  77. def set_file_name ( self, filename ):
  78. if not filename:
  79. self.external_filename = None
  80. else:
  81. self.external_filename = filename
  82. file_name = property( get_file_name, set_file_name )
  83. @property
  84. def extra_files_path( self ):
  85. if self._extra_files_path:
  86. path = self._extra_files_path
  87. else:
  88. path = os.path.join( self.file_path, "dataset_%d_files" % self.id )
  89. #only use path directly under self.file_path if it exists
  90. if not os.path.exists( path ):
  91. path = os.path.join( os.path.join( self.file_path, *directory_hash_id( self.id ) ), "dataset_%d_files" % self.id )
  92. # Make path absolute
  93. return os.path.abspath( path )
  94. def get_size( self ):
  95. """Returns the size of the data on disk"""
  96. if self.file_size:
  97. return self.file_size
  98. else:
  99. try:
  100. return os.path.getsize( self.file_name )
  101. except OSError:
  102. return 0
  103. def set_size( self ):
  104. """Returns the size of the data on disk"""
  105. try:
  106. if not self.file_size:
  107. self.file_size = os.path.getsize( self.file_name )
  108. except OSError:
  109. self.file_size = 0
  110. def has_data( self ):
  111. """Detects whether there is any data"""
  112. return self.get_size() > 0
  113. def mark_deleted( self, include_children=True ):
  114. self.deleted = True
  115. # FIXME: sqlalchemy will replace this
  116. def _delete(self):
  117. """Remove the file that corresponds to this data"""
  118. try:
  119. os.remove(self.data.file_name)
  120. except OSError, e:
  121. log.critical('%s delete error %s' % (self.__class__.__name__, e))
  122. class DatasetInstance( object ):
  123. """A base class for all 'dataset instances', HDAs, LDAs, etc"""
  124. states = Dataset.states
  125. permitted_actions = Dataset.permitted_actions
  126. def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None,
  127. dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None,
  128. parent_id=None, validation_errors=None, visible=True, create_dataset = False ):
  129. self.name = name or "Unnamed dataset"
  130. self.id = id
  131. self.info = info
  132. self.blurb = blurb
  133. self.peek = peek
  134. self.extension = extension
  135. self.designation = designation
  136. self.metadata = metadata or dict()
  137. if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata'
  138. self.dbkey = dbkey
  139. self.deleted = deleted
  140. self.visible = visible
  141. # Relationships
  142. if not dataset and create_dataset:
  143. dataset = Dataset( state=Dataset.states.NEW )
  144. context.add( dataset )
  145. context.flush()
  146. self.dataset = dataset
  147. self.parent_id = parent_id
  148. self.validation_errors = validation_errors
  149. @property
  150. def ext( self ):
  151. return self.extension
  152. def get_dataset_state( self ):
  153. return self.dataset.state
  154. def set_dataset_state ( self, state ):
  155. self.dataset.state = state
  156. context.add( self.dataset )
  157. context.flush() #flush here, because hda.flush() won't flush the Dataset object
  158. state = property( get_dataset_state, set_dataset_state )
  159. def get_file_name( self ):
  160. return self.dataset.get_file_name()
  161. def set_file_name (self, filename):
  162. return self.dataset.set_file_name( filename )
  163. file_name = property( get_file_name, set_file_name )
  164. @property
  165. def extra_files_path( self ):
  166. return self.dataset.extra_files_path
  167. @property
  168. def datatype( self ):
  169. return datatypes_registry.get_datatype_by_extension( self.extension )
  170. def get_metadata( self ):
  171. if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent != self: #using weakref to store parent (to prevent circ ref), does a context.clear() cause parent to be invalidated, while still copying over this non-database attribute?
  172. self._metadata_collection = MetadataCollection( self )
  173. return self._metadata_collection
  174. def set_metadata( self, bunch ):
  175. # Needs to accept a MetadataCollection, a bunch, or a dict
  176. self._metadata = self.metadata.make_dict_copy( bunch )
  177. metadata = property( get_metadata, set_metadata )
  178. # This provide backwards compatibility with using the old dbkey
  179. # field in the database. That field now maps to "old_dbkey" (see mapping.py).
  180. def get_dbkey( self ):
  181. dbkey = self.metadata.dbkey
  182. if not isinstance(dbkey, list): dbkey = [dbkey]
  183. if dbkey in [[None], []]: return "?"
  184. return dbkey[0]
  185. def set_dbkey( self, value ):
  186. if "dbkey" in self.datatype.metadata_spec:
  187. if not isinstance(value, list):
  188. self.metadata.dbkey = [value]
  189. else:
  190. self.metadata.dbkey = value
  191. dbkey = property( get_dbkey, set_dbkey )
  192. def change_datatype( self, new_ext ):
  193. self.clear_associated_files()
  194. datatypes_registry.change_datatype( self, new_ext )
  195. def get_size( self ):
  196. """Returns the size of the data on disk"""
  197. return self.dataset.get_size()
  198. def set_size( self ):
  199. """Returns the size of the data on disk"""
  200. return self.dataset.set_size()
  201. def has_data( self ):
  202. """Detects whether there is any data"""
  203. return self.dataset.has_data()
  204. def get_raw_data( self ):
  205. """Returns the full data. To stream it open the file_name and read/write as needed"""
  206. return self.datatype.get_raw_data( self )
  207. def write_from_stream( self, stream ):
  208. """Writes data from a stream"""
  209. self.datatype.write_from_stream(self, stream)
  210. def set_raw_data( self, data ):
  211. """Saves the data on the disc"""
  212. self.datatype.set_raw_data(self, data)
  213. def get_mime( self ):
  214. """Returns the mime type of the data"""
  215. return datatypes_registry.get_mimetype_by_extension( self.extension.lower() )
  216. def set_peek( self, is_multi_byte=False ):
  217. return self.datatype.set_peek( self, is_multi_byte=is_multi_byte )
  218. def init_meta( self, copy_from=None ):
  219. return self.datatype.init_meta( self, copy_from=copy_from )
  220. def set_meta( self, **kwd ):
  221. self.clear_associated_files( metadata_safe = True )
  222. return self.datatype.set_meta( self, **kwd )
  223. def missing_meta( self, **kwd ):
  224. return self.datatype.missing_meta( self, **kwd )
  225. def as_display_type( self, type, **kwd ):
  226. return self.datatype.as_display_type( self, type, **kwd )
  227. def display_peek( self ):
  228. return self.datatype.display_peek( self )
  229. def display_name( self ):
  230. return self.datatype.display_name( self )
  231. def display_info( self ):
  232. return self.datatype.display_info( self )
  233. def get_converted_files_by_type( self, file_type ):
  234. valid = []
  235. for assoc in self.implicitly_converted_datasets:
  236. if not assoc.deleted and assoc.type == file_type:
  237. valid.append( assoc.dataset )
  238. return valid
  239. def clear_associated_files( self, metadata_safe = False, purge = False ):
  240. raise 'Unimplemented'
  241. def get_child_by_designation(self, designation):
  242. for child in self.children:
  243. if child.designation == designation:
  244. return child
  245. return None
  246. def get_converter_types(self):
  247. return self.datatype.get_converter_types( self, datatypes_registry)
  248. def find_conversion_destination( self, accepted_formats, **kwd ):
  249. """Returns ( target_ext, exisiting converted dataset )"""
  250. return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd )
  251. def add_validation_error( self, validation_error ):
  252. self.validation_errors.append( validation_error )
  253. def extend_validation_errors( self, validation_errors ):
  254. self.validation_errors.extend(validation_errors)
  255. def mark_deleted( self, include_children=True ):
  256. self.deleted = True
  257. if include_children:
  258. for child in self.children:
  259. child.mark_deleted()
  260. def mark_undeleted( self, include_children=True ):
  261. self.deleted = False
  262. if include_children:
  263. for child in self.children:
  264. child.mark_undeleted()
  265. def undeletable( self ):
  266. if self.purged:
  267. return False
  268. return True
  269. @property
  270. def source_library_dataset( self ):
  271. def get_source( dataset ):
  272. if isinstance( dataset, LibraryDatasetDatasetAssociation ):
  273. if dataset.library_dataset:
  274. return ( dataset, dataset.library_dataset )
  275. if dataset.copied_from_library_dataset_dataset_association:
  276. source = get_source( dataset.copied_from_library_dataset_dataset_association )
  277. if source:
  278. return source
  279. if dataset.copied_from_history_dataset_association:
  280. source = get_source( dataset.copied_from_history_dataset_association )
  281. if source:
  282. return source
  283. return ( None, None )
  284. return get_source( self )
  285. class HistoryDatasetAssociation( DatasetInstance ):
  286. def __init__( self,
  287. hid = None,
  288. history = None,
  289. copied_from_history_dataset_association = None,
  290. copied_from_library_dataset_dataset_association = None,
  291. **kwd ):
  292. DatasetInstance.__init__( self, **kwd )
  293. self.hid = hid
  294. # Relationships
  295. self.history = history
  296. self.copied_from_history_dataset_association = copied_from_history_dataset_association
  297. self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
  298. def copy( self, copy_children = False, parent_id = None, target_history = None ):
  299. hda = HistoryDatasetAssociation( hid=self.hid,
  300. name=self.name,
  301. info=self.info,
  302. blurb=self.blurb,
  303. peek=self.peek,
  304. extension=self.extension,
  305. dbkey=self.dbkey,
  306. dataset = self.dataset,
  307. visible=self.visible,
  308. deleted=self.deleted,
  309. parent_id=parent_id,
  310. copied_from_history_dataset_association=self,
  311. history = target_history )
  312. context.add( hda )
  313. context.flush()
  314. hda.set_size()
  315. # Need to set after flushed, as MetadataFiles require dataset.id
  316. hda.metadata = self.metadata
  317. if copy_children:
  318. for child in self.children:
  319. child_copy = child.copy( copy_children = copy_children, parent_id = hda.id )
  320. if not self.datatype.copy_safe_peek:
  321. # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
  322. hda.set_peek()
  323. context.flush()
  324. return hda
  325. def to_library_dataset_dataset_association( self, target_folder, replace_dataset=None, parent_id=None ):
  326. if replace_dataset:
  327. # The replace_dataset param ( when not None ) refers to a LibraryDataset that is being replaced with a new version.
  328. library_dataset = replace_dataset
  329. else:
  330. # If replace_dataset is None, the Library level permissions will be taken from the folder and applied to the new
  331. # LibraryDataset, and the current user's DefaultUserPermissions will be applied to the associated Dataset.
  332. library_dataset = LibraryDataset( folder=target_folder, name=self.name, info=self.info )
  333. context.add( library_dataset )
  334. context.flush()
  335. ldda = LibraryDatasetDatasetAssociation( name=self.name,
  336. info=self.info,
  337. blurb=self.blurb,
  338. peek=self.peek,
  339. extension=self.extension,
  340. dbkey=self.dbkey,
  341. dataset=self.dataset,
  342. library_dataset=library_dataset,
  343. visible=self.visible,
  344. deleted=self.deleted,
  345. parent_id=parent_id,
  346. copied_from_history_dataset_association=self,
  347. user=self.history.user )
  348. context.add( ldda )
  349. context.flush()
  350. # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset
  351. # Must set metadata after ldda flushed, as MetadataFiles require ldda.id
  352. ldda.metadata = self.metadata
  353. if not replace_dataset:
  354. target_folder.add_library_dataset( library_dataset, genome_build=ldda.dbkey )
  355. context.add( target_folder )
  356. context.flush()
  357. library_dataset.library_dataset_dataset_association_id = ldda.id
  358. context.add( library_dataset )
  359. context.flush()
  360. for child in self.children:
  361. child_copy = child.to_library_dataset_dataset_association( target_folder=target_folder, replace_dataset=replace_dataset, parent_id=ldda.id )
  362. if not self.datatype.copy_safe_peek:
  363. # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
  364. ldda.set_peek()
  365. context.flush()
  366. return ldda
  367. def clear_associated_files( self, metadata_safe = False, purge = False ):
  368. # metadata_safe = True means to only clear when assoc.metadata_safe == False
  369. for assoc in self.implicitly_converted_datasets:
  370. if not metadata_safe or not assoc.metadata_safe:
  371. assoc.clear( purge = purge )
  372. class LibraryDatasetDatasetAssociation( DatasetInstance ):
  373. def __init__( self,
  374. copied_from_history_dataset_association=None,
  375. copied_from_library_dataset_dataset_association=None,
  376. library_dataset=None,
  377. user=None,
  378. **kwd ):
  379. DatasetInstance.__init__( self, **kwd )
  380. self.copied_from_history_dataset_association = copied_from_history_dataset_association
  381. self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
  382. self.library_dataset = library_dataset
  383. self.user = user
  384. def to_history_dataset_association( self, target_history, parent_id=None ):
  385. hid = target_history._next_hid()
  386. hda = HistoryDatasetAssociation( name=self.name,
  387. info=self.info,
  388. blurb=self.blurb,
  389. peek=self.peek,
  390. extension=self.extension,
  391. dbkey=self.dbkey,
  392. dataset=self.dataset,
  393. visible=self.visible,
  394. deleted=self.deleted,
  395. parent_id=parent_id,
  396. copied_from_library_dataset_dataset_association=self,
  397. history=target_history,
  398. hid=hid )
  399. context.flush()
  400. hda.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id
  401. for child in self.children:
  402. child_copy = child.to_history_dataset_association( target_history=target_history, parent_id=hda.id )
  403. if not self.datatype.copy_safe_peek:
  404. hda.set_peek() #in some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
  405. context.add( hda )
  406. context.flush()
  407. return hda
  408. def copy( self, copy_children = False, parent_id = None, target_folder = None ):
  409. ldda = LibraryDatasetDatasetAssociation( name=self.name,
  410. info=self.info,
  411. blurb=self.blurb,
  412. peek=self.peek,
  413. extension=self.extension,
  414. dbkey=self.dbkey,
  415. dataset=self.dataset,
  416. visible=self.visible,
  417. deleted=self.deleted,
  418. parent_id=parent_id,
  419. copied_from_library_dataset_dataset_association=self,
  420. folder=target_folder )
  421. context.add( ldda )
  422. context.flush()
  423. # Need to set after flushed, as MetadataFiles require dataset.id
  424. ldda.metadata = self.metadata
  425. if copy_children:
  426. for child in self.children:
  427. child_copy = child.copy( copy_children = copy_children, parent_id = ldda.id )
  428. if not self.datatype.copy_safe_peek:
  429. # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
  430. ldda.set_peek()
  431. context.flush()
  432. return ldda
  433. def clear_associated_files( self, metadata_safe = False, purge = False ):
  434. return
  435. def get_library_item_info_templates( self, template_list=[], restrict=False ):
  436. # If restrict is True, we'll return only those templates directly associated with this LibraryDatasetDatasetAssociation
  437. if self.library_dataset_dataset_info_template_associations:
  438. template_list.extend( [ lddita.library_item_info_template for lddita in self.library_dataset_dataset_info_template_associations if lddita.library_item_info_template not in template_list ] )
  439. self.library_dataset.get_library_item_info_templates( template_list, restrict )
  440. return template_list
  441. class LibraryDataset( object ):
  442. # This class acts as a proxy to the currently selected LDDA
  443. def __init__( self, folder=None, order_id=None, name=None, info=None, library_dataset_dataset_association=None, **kwd ):
  444. self.folder = folder
  445. self.order_id = order_id
  446. self.name = name
  447. self.info = info
  448. self.library_dataset_dataset_association = library_dataset_dataset_association
  449. def set_library_dataset_dataset_association( self, ldda ):
  450. self.library_dataset_dataset_association = ldda
  451. ldda.library_dataset = self
  452. context.add_all( ( self, ldda ) )
  453. context.flush()
  454. def get_info( self ):
  455. if self.library_dataset_dataset_association:
  456. return self.library_dataset_dataset_association.info
  457. elif self._info:
  458. return self._info
  459. else:
  460. return 'no info'
  461. def set_info( self, info ):
  462. self._info = info
  463. info = property( get_info, set_info )
  464. def get_name( self ):
  465. if self.library_dataset_dataset_association:
  466. return self.library_dataset_dataset_association.name
  467. elif self._name:
  468. return self._name
  469. else:
  470. return 'Unnamed dataset'
  471. def set_name( self, name ):
  472. self._name = name
  473. name = property( get_name, set_name )
  474. def display_name( self ):
  475. self.library_dataset_dataset_association.display_name()
  476. def get_purged( self ):
  477. return self.library_dataset_dataset_association.dataset.purged
  478. def set_purged( self, purged ):
  479. if purged:
  480. raise Exception( "Not implemented" )
  481. if not purged and self.purged:
  482. raise Exception( "Cannot unpurge once purged" )
  483. purged = property( get_purged, set_purged )
  484. def get_library_item_info_templates( self, template_list=[], restrict=False ):
  485. # If restrict is True, we'll return only those templates directly associated with this LibraryDataset
  486. if self.library_dataset_info_template_associations:
  487. template_list.extend( [ ldita.library_item_info_template for ldita in self.library_dataset_info_template_associations if ldita.library_item_info_template not in template_list ] )
  488. if restrict not in [ 'True', True ]:
  489. self.folder.get_library_item_info_templates( template_list, restrict )
  490. return template_list
  491. ##tables
  492. Dataset.table = Table( "dataset", metadata,
  493. Column( "id", Integer, primary_key=True ),
  494. Column( "create_time", DateTime, default=now ),
  495. Column( "update_time", DateTime, index=True, default=now, onupdate=now ),
  496. Column( "state", TrimmedString( 64 ) ),
  497. Column( "deleted", Boolean, index=True, default=False ),
  498. Column( "purged", Boolean, index=True, default=False ),
  499. Column( "purgable", Boolean, default=True ),
  500. Column( "external_filename" , TEXT ),
  501. Column( "_extra_files_path", TEXT ),
  502. Column( 'file_size', Numeric( 15, 0 ) ) )
  503. HistoryDatasetAssociation.table = Table( "history_dataset_association", metadata,
  504. Column( "id", Integer, primary_key=True ),
  505. Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
  506. Column( "create_time", DateTime, default=now ),
  507. Column( "update_time", DateTime, default=now, onupdate=now ),
  508. Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
  509. Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
  510. Column( "hid", Integer ),
  511. Column( "name", TrimmedString( 255 ) ),
  512. Column( "info", TrimmedString( 255 ) ),
  513. Column( "blurb", TrimmedString( 255 ) ),
  514. Column( "peek" , TEXT ),
  515. Column( "extension", TrimmedString( 64 ) ),
  516. Column( "metadata", MetadataType(), key="_metadata" ),
  517. Column( "parent_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
  518. Column( "designation", TrimmedString( 255 ) ),
  519. Column( "deleted", Boolean, index=True, default=False ),
  520. Column( "visible", Boolean ) )
  521. LibraryDatasetDatasetAssociation.table = Table( "library_dataset_dataset_association", metadata,
  522. Column( "id", Integer, primary_key=True ),
  523. Column( "library_dataset_id", Integer, ForeignKey( "library_dataset.id" ), index=True ),
  524. Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
  525. Column( "create_time", DateTime, default=now ),
  526. Column( "update_time", DateTime, default=now, onupdate=now ),
  527. Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id", use_alter=True, name='history_dataset_association_dataset_id_fkey' ), nullable=True ),
  528. Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name='library_dataset_dataset_association_id_fkey' ), nullable=True ),
  529. Column( "name", TrimmedString( 255 ) ),
  530. Column( "info", TrimmedString( 255 ) ),
  531. Column( "blurb", TrimmedString( 255 ) ),
  532. Column( "peek" , TEXT ),
  533. Column( "extension", TrimmedString( 64 ) ),
  534. Column( "metadata", MetadataType(), key="_metadata" ),
  535. Column( "parent_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
  536. Column( "designation", TrimmedString( 255 ) ),
  537. Column( "deleted", Boolean, index=True, default=False ),
  538. Column( "visible", Boolean ),
  539. Column( "message", TrimmedString( 255 ) ) )
  540. LibraryDataset.table = Table( "library_dataset", metadata,
  541. Column( "id", Integer, primary_key=True ),
  542. Column( "library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name="library_dataset_dataset_association_id_fk" ), nullable=True, index=True ),#current version of dataset, if null, there is not a current version selected
  543. Column( "order_id", Integer ),
  544. Column( "create_time", DateTime, default=now ),
  545. Column( "update_time", DateTime, default=now, onupdate=now ),
  546. Column( "name", TrimmedString( 255 ), key="_name" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
  547. Column( "info", TrimmedString( 255 ), key="_info" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
  548. Column( "deleted", Boolean, index=True, default=False ) )
  549. ##mappers
  550. mapper( Dataset, Dataset.table,
  551. properties=dict(
  552. history_associations=relation(
  553. HistoryDatasetAssociation,
  554. primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) ),
  555. active_history_associations=relation(
  556. HistoryDatasetAssociation,
  557. primaryjoin=( ( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) & ( HistoryDatasetAssociation.table.c.deleted == False ) ) ),
  558. library_associations=relation(
  559. LibraryDatasetDatasetAssociation,
  560. primaryjoin=( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) ),
  561. active_library_associations=relation(
  562. LibraryDatasetDatasetAssociation,
  563. primaryjoin=( ( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) & ( LibraryDatasetDatasetAssociation.table.c.deleted == False ) ) )
  564. ) )
  565. mapper( HistoryDatasetAssociation, HistoryDatasetAssociation.table,
  566. properties=dict(
  567. dataset=relation(
  568. Dataset,
  569. primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ), lazy=False ),
  570. # .history defined in History mapper
  571. copied_to_history_dataset_associations=relation(
  572. HistoryDatasetAssociation,
  573. primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ),
  574. backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
  575. copied_to_library_dataset_dataset_associations=relation(
  576. LibraryDatasetDatasetAssociation,
  577. primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
  578. backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
  579. children=relation(
  580. HistoryDatasetAssociation,
  581. primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ),
  582. backref=backref( "parent", primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
  583. visible_children=relation(
  584. HistoryDatasetAssociation,
  585. primaryjoin=( ( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ) & ( HistoryDatasetAssociation.table.c.visible == True ) ) )
  586. ) )
  587. mapper( LibraryDatasetDatasetAssociation, LibraryDatasetDatasetAssociation.table,
  588. properties=dict(
  589. dataset=relation( Dataset ),
  590. library_dataset = relation( LibraryDataset,
  591. primaryjoin=( LibraryDatasetDatasetAssociation.table.c.library_dataset_id == LibraryDataset.table.c.id ) ),
  592. copied_to_library_dataset_dataset_associations=relation(
  593. LibraryDatasetDatasetAssociation,
  594. primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
  595. backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
  596. copied_to_history_dataset_associations=relation(
  597. HistoryDatasetAssociation,
  598. primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
  599. backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
  600. children=relation(
  601. LibraryDatasetDatasetAssociation,
  602. primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ),
  603. backref=backref( "parent", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
  604. visible_children=relation(
  605. LibraryDatasetDatasetAssociation,
  606. primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) )
  607. ) )
  608. mapper( LibraryDataset, LibraryDataset.table,
  609. properties=dict(
  610. library_dataset_dataset_association=relation( LibraryDatasetDatasetAssociation, primaryjoin=( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ),
  611. expired_datasets = relation( LibraryDatasetDatasetAssociation, foreign_keys=[LibraryDataset.table.c.id,LibraryDataset.table.c.library_dataset_dataset_association_id ], primaryjoin=( ( LibraryDataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.library_dataset_id ) & ( not_( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ) ), viewonly=True, uselist=True )
  612. ) )
  613. def __guess_dataset_by_filename( filename ):
  614. """Return a guessed dataset by filename"""
  615. try:
  616. fields = os.path.split( filename )
  617. if fields:
  618. if fields[-1].startswith( 'dataset_' ) and fields[-1].endswith( '.dat' ): #dataset_%d.dat
  619. return Dataset.get( int( fields[-1][ len( 'dataset_' ): -len( '.dat' ) ] ) )
  620. except:
  621. pass #some parsing error, we can't guess Dataset
  622. return None
  623. def upgrade(migrate_engine):
  624. metadata.bind = migrate_engine
  625. log.debug( "Fixing a discrepancy concerning deleted shared history items." )
  626. affected_items = 0
  627. start_time = time.time()
  628. for dataset in context.query( Dataset ).filter( and_( Dataset.deleted == True, Dataset.purged == False ) ):
  629. for dataset_instance in dataset.history_associations + dataset.library_associations:
  630. if not dataset_instance.deleted:
  631. dataset.deleted = False
  632. if dataset.file_size in [ None, 0 ]:
  633. dataset.set_size() #Restore filesize
  634. affected_items += 1
  635. break
  636. context.flush()
  637. log.debug( "%i items affected, and restored." % ( affected_items ) )
  638. log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
  639. #fix share before hda
  640. log.debug( "Fixing a discrepancy concerning cleaning up deleted history items shared before HDAs." )
  641. dataset_by_filename = {}
  642. changed_associations = 0
  643. start_time = time.time()
  644. for dataset in context.query( Dataset ).filter( Dataset.external_filename.like( '%dataset_%.dat' ) ):
  645. if dataset.file_name in dataset_by_filename:
  646. guessed_dataset = dataset_by_filename[ dataset.file_name ]
  647. else:
  648. guessed_dataset = __guess_dataset_by_filename( dataset.file_name )
  649. if guessed_dataset and dataset.file_name != guessed_dataset.file_name:#not os.path.samefile( dataset.file_name, guessed_dataset.file_name ):
  650. guessed_dataset = None
  651. dataset_by_filename[ dataset.file_name ] = guessed_dataset
  652. if guessed_dataset is not None and guessed_dataset.id != dataset.id: #could we have a self referential dataset?
  653. for dataset_instance in dataset.history_associations + dataset.library_associations:
  654. dataset_instance.dataset = guessed_dataset
  655. changed_associations += 1
  656. #mark original Dataset as deleted and purged, it is no longer in use, but do not delete file_name contents
  657. dataset.deleted = True
  658. dataset.external_filename = "Dataset was result of share before HDA, and has been replaced: %s mapped to Dataset %s" % ( dataset.external_filename, guessed_dataset.id )
  659. dataset.purged = True #we don't really purge the file here, but we mark it as purged, since this dataset is now defunct
  660. context.flush()
  661. log.debug( "%i items affected, and restored." % ( changed_associations ) )
  662. log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
  663. def downgrade(migrate_engine):
  664. metadata.bind = migrate_engine
  665. log.debug( "Downgrade is not possible." )