/lib/galaxy/webapps/community/controllers/upload.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 316 lines · 265 code · 5 blank · 46 comment · 93 complexity · 15be5b19a1d84810a2670144f5347b3a MD5 · raw file

  1. import sys, os, shutil, logging, tarfile, tempfile
  2. from galaxy.web.base.controller import *
  3. from galaxy.model.orm import *
  4. from galaxy.datatypes.checkers import *
  5. from common import *
  6. from mercurial import hg, ui, commands
  7. log = logging.getLogger( __name__ )
  8. # States for passing messages
  9. SUCCESS, INFO, WARNING, ERROR = "done", "info", "warning", "error"
  10. CHUNK_SIZE = 2**20 # 1Mb
  11. class UploadError( Exception ):
  12. pass
  13. class UploadController( BaseUIController ):
  14. @web.expose
  15. @web.require_login( 'upload', use_panels=True, webapp='community' )
  16. def upload( self, trans, **kwd ):
  17. params = util.Params( kwd )
  18. message = util.restore_text( params.get( 'message', '' ) )
  19. status = params.get( 'status', 'done' )
  20. commit_message = util.restore_text( params.get( 'commit_message', 'Uploaded' ) )
  21. category_ids = util.listify( params.get( 'category_id', '' ) )
  22. categories = get_categories( trans )
  23. repository_id = params.get( 'repository_id', '' )
  24. repository = get_repository( trans, repository_id )
  25. repo_dir = repository.repo_path
  26. repo = hg.repository( get_configured_ui(), repo_dir )
  27. uncompress_file = util.string_as_bool( params.get( 'uncompress_file', 'true' ) )
  28. remove_repo_files_not_in_tar = util.string_as_bool( params.get( 'remove_repo_files_not_in_tar', 'true' ) )
  29. uploaded_file = None
  30. upload_point = self.__get_upload_point( repository, **kwd )
  31. # Get the current repository tip.
  32. tip = repository.tip
  33. if params.get( 'upload_button', False ):
  34. current_working_dir = os.getcwd()
  35. file_data = params.get( 'file_data', '' )
  36. if file_data == '':
  37. message = 'No files were entered on the upload form.'
  38. status = 'error'
  39. uploaded_file = None
  40. elif file_data not in ( '', None ):
  41. uploaded_file = file_data.file
  42. uploaded_file_name = uploaded_file.name
  43. uploaded_file_filename = file_data.filename
  44. isempty = os.path.getsize( os.path.abspath( uploaded_file_name ) ) == 0
  45. if uploaded_file:
  46. isgzip = False
  47. isbz2 = False
  48. if uncompress_file:
  49. isgzip = is_gzip( uploaded_file_name )
  50. if not isgzip:
  51. isbz2 = is_bz2( uploaded_file_name )
  52. ok = True
  53. if isempty:
  54. tar = None
  55. istar = False
  56. else:
  57. # Determine what we have - a single file or an archive
  58. try:
  59. if ( isgzip or isbz2 ) and uncompress_file:
  60. # Open for reading with transparent compression.
  61. tar = tarfile.open( uploaded_file_name, 'r:*' )
  62. else:
  63. tar = tarfile.open( uploaded_file_name )
  64. istar = True
  65. except tarfile.ReadError, e:
  66. tar = None
  67. istar = False
  68. if istar:
  69. ok, message, files_to_remove = self.upload_tar( trans,
  70. repository,
  71. tar,
  72. uploaded_file,
  73. upload_point,
  74. remove_repo_files_not_in_tar,
  75. commit_message )
  76. else:
  77. if ( isgzip or isbz2 ) and uncompress_file:
  78. uploaded_file_filename = self.uncompress( repository, uploaded_file_name, uploaded_file_filename, isgzip, isbz2 )
  79. if upload_point is not None:
  80. full_path = os.path.abspath( os.path.join( repo_dir, upload_point, uploaded_file_filename ) )
  81. else:
  82. full_path = os.path.abspath( os.path.join( repo_dir, uploaded_file_filename ) )
  83. # Move the uploaded file to the load_point within the repository hierarchy.
  84. shutil.move( uploaded_file_name, full_path )
  85. commands.add( repo.ui, repo, full_path )
  86. try:
  87. commands.commit( repo.ui, repo, full_path, user=trans.user.username, message=commit_message )
  88. except Exception, e:
  89. # I never have a problem with commands.commit on a Mac, but in the test/production
  90. # tool shed environment, it occasionally throws a "TypeError: array item must be char"
  91. # exception. If this happens, we'll try the following.
  92. repo.dirstate.write()
  93. repo.commit( user=trans.user.username, text=commit_message )
  94. if full_path.endswith( 'tool_data_table_conf.xml.sample' ):
  95. # Handle the special case where a tool_data_table_conf.xml.sample
  96. # file is being uploaded by parsing the file and adding new entries
  97. # to the in-memory trans.app.tool_data_tables dictionary as well as
  98. # appending them to the shed's tool_data_table_conf.xml file on disk.
  99. error, error_message = handle_sample_tool_data_table_conf_file( trans, full_path )
  100. if error:
  101. message = '%s<br/>%s' % ( message, error_message )
  102. if full_path.endswith( '.loc.sample' ):
  103. # Handle the special case where a xxx.loc.sample file is
  104. # being uploaded by copying it to ~/tool-data/xxx.loc.
  105. copy_sample_loc_file( trans, full_path )
  106. handle_email_alerts( trans, repository )
  107. if ok:
  108. # Update the repository files for browsing.
  109. update_for_browsing( trans, repository, current_working_dir, commit_message=commit_message )
  110. # Get the new repository tip.
  111. if tip != repository.tip:
  112. if ( isgzip or isbz2 ) and uncompress_file:
  113. uncompress_str = ' uncompressed and '
  114. else:
  115. uncompress_str = ' '
  116. message = "The file '%s' has been successfully%suploaded to the repository." % ( uploaded_file_filename, uncompress_str )
  117. if istar and remove_repo_files_not_in_tar and files_to_remove:
  118. if upload_point is not None:
  119. message += " %d files were removed from the repository relative to the selected upload point '%s'." % ( len( files_to_remove ), upload_point )
  120. else:
  121. message += " %d files were removed from the repository root." % len( files_to_remove )
  122. else:
  123. message = 'No changes to repository.'
  124. # Set metadata on the repository tip
  125. error_message, status = set_repository_metadata( trans, repository_id, repository.tip, **kwd )
  126. if error_message:
  127. message = '%s<br/>%s' % ( message, error_message )
  128. return trans.response.send_redirect( web.url_for( controller='repository',
  129. action='manage_repository',
  130. id=repository_id,
  131. message=message,
  132. status=status ) )
  133. trans.response.send_redirect( web.url_for( controller='repository',
  134. action='browse_repository',
  135. id=repository_id,
  136. commit_message='Deleted selected files',
  137. message=message,
  138. status=status ) )
  139. else:
  140. status = 'error'
  141. selected_categories = [ trans.security.decode_id( id ) for id in category_ids ]
  142. return trans.fill_template( '/webapps/community/repository/upload.mako',
  143. repository=repository,
  144. commit_message=commit_message,
  145. uncompress_file=uncompress_file,
  146. remove_repo_files_not_in_tar=remove_repo_files_not_in_tar,
  147. message=message,
  148. status=status )
  149. def upload_tar( self, trans, repository, tar, uploaded_file, upload_point, remove_repo_files_not_in_tar, commit_message ):
  150. # Upload a tar archive of files.
  151. repo_dir = repository.repo_path
  152. repo = hg.repository( get_configured_ui(), repo_dir )
  153. files_to_remove = []
  154. ok, message = self.__check_archive( tar )
  155. if not ok:
  156. tar.close()
  157. uploaded_file.close()
  158. return ok, message, files_to_remove
  159. else:
  160. if upload_point is not None:
  161. full_path = os.path.abspath( os.path.join( repo_dir, upload_point ) )
  162. else:
  163. full_path = os.path.abspath( repo_dir )
  164. filenames_in_archive = [ tarinfo_obj.name for tarinfo_obj in tar.getmembers() ]
  165. filenames_in_archive = [ os.path.join( full_path, name ) for name in filenames_in_archive ]
  166. # Extract the uploaded tar to the load_point within the repository hierarchy.
  167. tar.extractall( path=full_path )
  168. tar.close()
  169. uploaded_file.close()
  170. if remove_repo_files_not_in_tar and not repository.is_new:
  171. # We have a repository that is not new (it contains files), so discover
  172. # those files that are in the repository, but not in the uploaded archive.
  173. for root, dirs, files in os.walk( full_path ):
  174. if not root.find( '.hg' ) >= 0 and not root.find( 'hgrc' ) >= 0:
  175. if '.hg' in dirs:
  176. # Don't visit .hg directories - should be impossible since we don't
  177. # allow uploaded archives that contain .hg dirs, but just in case...
  178. dirs.remove( '.hg' )
  179. if 'hgrc' in files:
  180. # Don't include hgrc files in commit.
  181. files.remove( 'hgrc' )
  182. for name in files:
  183. full_name = os.path.join( root, name )
  184. if full_name not in filenames_in_archive:
  185. files_to_remove.append( full_name )
  186. for repo_file in files_to_remove:
  187. # Remove files in the repository (relative to the upload point)
  188. # that are not in the uploaded archive.
  189. try:
  190. commands.remove( repo.ui, repo, repo_file, force=True )
  191. except Exception, e:
  192. # I never have a problem with commands.remove on a Mac, but in the test/production
  193. # tool shed environment, it throws an exception whenever I delete all files from a
  194. # repository. If this happens, we'll try the following.
  195. relative_selected_file = selected_file.split( 'repo_%d' % repository.id )[1].lstrip( '/' )
  196. repo.dirstate.remove( relative_selected_file )
  197. repo.dirstate.write()
  198. absolute_selected_file = os.path.abspath( selected_file )
  199. if os.path.isdir( absolute_selected_file ):
  200. try:
  201. os.rmdir( absolute_selected_file )
  202. except OSError, e:
  203. # The directory is not empty
  204. pass
  205. elif os.path.isfile( absolute_selected_file ):
  206. os.remove( absolute_selected_file )
  207. dir = os.path.split( absolute_selected_file )[0]
  208. try:
  209. os.rmdir( dir )
  210. except OSError, e:
  211. # The directory is not empty
  212. pass
  213. for filename_in_archive in filenames_in_archive:
  214. commands.add( repo.ui, repo, filename_in_archive )
  215. if filename_in_archive.endswith( 'tool_data_table_conf.xml.sample' ):
  216. # Handle the special case where a tool_data_table_conf.xml.sample
  217. # file is being uploaded by parsing the file and adding new entries
  218. # to the in-memory trans.app.tool_data_tables dictionary as well as
  219. # appending them to the shed's tool_data_table_conf.xml file on disk.
  220. error, message = handle_sample_tool_data_table_conf_file( trans, filename_in_archive )
  221. if error:
  222. return False, message, files_to_remove
  223. if filename_in_archive.endswith( '.loc.sample' ):
  224. # Handle the special case where a xxx.loc.sample file is
  225. # being uploaded by copying it to ~/tool-data/xxx.loc.
  226. copy_sample_loc_file( trans, filename_in_archive )
  227. try:
  228. commands.commit( repo.ui, repo, full_path, user=trans.user.username, message=commit_message )
  229. except Exception, e:
  230. # I never have a problem with commands.commit on a Mac, but in the test/production
  231. # tool shed environment, it occasionally throws a "TypeError: array item must be char"
  232. # exception. If this happens, we'll try the following.
  233. repo.dirstate.write()
  234. repo.commit( user=trans.user.username, text=commit_message )
  235. handle_email_alerts( trans, repository )
  236. return True, '', files_to_remove
  237. def uncompress( self, repository, uploaded_file_name, uploaded_file_filename, isgzip, isbz2 ):
  238. if isgzip:
  239. self.__handle_gzip( repository, uploaded_file_name )
  240. return uploaded_file_filename.rstrip( '.gz' )
  241. if isbz2:
  242. self.__handle_bz2( repository, uploaded_file_name )
  243. return uploaded_file_filename.rstrip( '.bz2' )
  244. def __handle_gzip( self, repository, uploaded_file_name ):
  245. fd, uncompressed = tempfile.mkstemp( prefix='repo_%d_upload_gunzip_' % repository.id, dir=os.path.dirname( uploaded_file_name ), text=False )
  246. gzipped_file = gzip.GzipFile( uploaded_file_name, 'rb' )
  247. while 1:
  248. try:
  249. chunk = gzipped_file.read( CHUNK_SIZE )
  250. except IOError, e:
  251. os.close( fd )
  252. os.remove( uncompressed )
  253. log.exception( 'Problem uncompressing gz data "%s": %s' % ( uploaded_file_name, str( e ) ) )
  254. return
  255. if not chunk:
  256. break
  257. os.write( fd, chunk )
  258. os.close( fd )
  259. gzipped_file.close()
  260. shutil.move( uncompressed, uploaded_file_name )
  261. def __handle_bz2( self, repository, uploaded_file_name ):
  262. fd, uncompressed = tempfile.mkstemp( prefix='repo_%d_upload_bunzip2_' % repository.id, dir=os.path.dirname( uploaded_file_name ), text=False )
  263. bzipped_file = bz2.BZ2File( uploaded_file_name, 'rb' )
  264. while 1:
  265. try:
  266. chunk = bzipped_file.read( CHUNK_SIZE )
  267. except IOError:
  268. os.close( fd )
  269. os.remove( uncompressed )
  270. log.exception( 'Problem uncompressing bz2 data "%s": %s' % ( uploaded_file_name, str( e ) ) )
  271. return
  272. if not chunk:
  273. break
  274. os.write( fd, chunk )
  275. os.close( fd )
  276. bzipped_file.close()
  277. shutil.move( uncompressed, uploaded_file_name )
  278. def __get_upload_point( self, repository, **kwd ):
  279. upload_point = kwd.get( 'upload_point', None )
  280. if upload_point is not None:
  281. # The value of upload_point will be something like: database/community_files/000/repo_12/1.bed
  282. if os.path.exists( upload_point ):
  283. if os.path.isfile( upload_point ):
  284. # Get the parent directory
  285. upload_point, not_needed = os.path.split( upload_point )
  286. # Now the value of uplaod_point will be something like: database/community_files/000/repo_12/
  287. upload_point = upload_point.split( 'repo_%d' % repository.id )[ 1 ]
  288. if upload_point:
  289. upload_point = upload_point.lstrip( '/' )
  290. upload_point = upload_point.rstrip( '/' )
  291. # Now the value of uplaod_point will be something like: /
  292. if upload_point == '/':
  293. upload_point = None
  294. else:
  295. # Must have been an error selecting something that didn't exist, so default to repository root
  296. upload_point = None
  297. return upload_point
  298. def __check_archive( self, archive ):
  299. for member in archive.getmembers():
  300. # Allow regular files and directories only
  301. if not ( member.isdir() or member.isfile() ):
  302. message = "Uploaded archives can only include regular directories and files (no symbolic links, devices, etc)."
  303. return False, message
  304. for item in [ '.hg', '..', '/' ]:
  305. if member.name.startswith( item ):
  306. message = "Uploaded archives cannot contain .hg directories, absolute filenames starting with '/', or filenames with two dots '..'."
  307. return False, message
  308. if member.name in [ 'hgrc' ]:
  309. message = "Uploaded archives cannot contain hgrc files."
  310. return False, message
  311. return True, ''