PageRenderTime 36ms CodeModel.GetById 10ms app.highlight 22ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/webapps/community/controllers/upload.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 316 lines | 278 code | 4 blank | 34 comment | 60 complexity | 15be5b19a1d84810a2670144f5347b3a MD5 | raw file
  1import sys, os, shutil, logging, tarfile, tempfile
  2from galaxy.web.base.controller import *
  3from galaxy.model.orm import *
  4from galaxy.datatypes.checkers import *
  5from common import *
  6from mercurial import hg, ui, commands
  7
  8log = logging.getLogger( __name__ )
  9
 10# States for passing messages
 11SUCCESS, INFO, WARNING, ERROR = "done", "info", "warning", "error"
 12CHUNK_SIZE = 2**20 # 1Mb
 13
 14class UploadError( Exception ):
 15    pass
 16
 17class UploadController( BaseUIController ):
 18    @web.expose
 19    @web.require_login( 'upload', use_panels=True, webapp='community' )
 20    def upload( self, trans, **kwd ):
 21        params = util.Params( kwd )
 22        message = util.restore_text( params.get( 'message', ''  ) )
 23        status = params.get( 'status', 'done' )
 24        commit_message = util.restore_text( params.get( 'commit_message', 'Uploaded'  ) )
 25        category_ids = util.listify( params.get( 'category_id', '' ) )
 26        categories = get_categories( trans )
 27        repository_id = params.get( 'repository_id', '' )
 28        repository = get_repository( trans, repository_id )
 29        repo_dir = repository.repo_path
 30        repo = hg.repository( get_configured_ui(), repo_dir )
 31        uncompress_file = util.string_as_bool( params.get( 'uncompress_file', 'true' ) )
 32        remove_repo_files_not_in_tar = util.string_as_bool( params.get( 'remove_repo_files_not_in_tar', 'true' ) )
 33        uploaded_file = None
 34        upload_point = self.__get_upload_point( repository, **kwd )
 35        # Get the current repository tip.
 36        tip = repository.tip
 37        if params.get( 'upload_button', False ):
 38            current_working_dir = os.getcwd()
 39            file_data = params.get( 'file_data', '' )
 40            if file_data == '':
 41                message = 'No files were entered on the upload form.'
 42                status = 'error'
 43                uploaded_file = None
 44            elif file_data not in ( '', None ):
 45                uploaded_file = file_data.file
 46                uploaded_file_name = uploaded_file.name
 47                uploaded_file_filename = file_data.filename
 48            isempty = os.path.getsize( os.path.abspath( uploaded_file_name ) ) == 0
 49            if uploaded_file:
 50                isgzip = False
 51                isbz2 = False
 52                if uncompress_file:
 53                    isgzip = is_gzip( uploaded_file_name )
 54                    if not isgzip:
 55                        isbz2 = is_bz2( uploaded_file_name )
 56                ok = True
 57                if isempty:
 58                    tar = None
 59                    istar = False
 60                else:                
 61                    # Determine what we have - a single file or an archive
 62                    try:
 63                        if ( isgzip or isbz2 ) and uncompress_file:
 64                            # Open for reading with transparent compression.
 65                            tar = tarfile.open( uploaded_file_name, 'r:*' )
 66                        else:
 67                            tar = tarfile.open( uploaded_file_name )
 68                        istar = True
 69                    except tarfile.ReadError, e:
 70                        tar = None
 71                        istar = False
 72                if istar:
 73                    ok, message, files_to_remove = self.upload_tar( trans,
 74                                                                    repository,
 75                                                                    tar,
 76                                                                    uploaded_file,
 77                                                                    upload_point,
 78                                                                    remove_repo_files_not_in_tar,
 79                                                                    commit_message )
 80                else:
 81                    if ( isgzip or isbz2 ) and uncompress_file:
 82                        uploaded_file_filename = self.uncompress( repository, uploaded_file_name, uploaded_file_filename, isgzip, isbz2 )
 83                    if upload_point is not None:
 84                        full_path = os.path.abspath( os.path.join( repo_dir, upload_point, uploaded_file_filename ) )
 85                    else:
 86                        full_path = os.path.abspath( os.path.join( repo_dir, uploaded_file_filename ) )
 87                    # Move the uploaded file to the load_point within the repository hierarchy.
 88                    shutil.move( uploaded_file_name, full_path )
 89                    commands.add( repo.ui, repo, full_path )
 90                    try:
 91                        commands.commit( repo.ui, repo, full_path, user=trans.user.username, message=commit_message )
 92                    except Exception, e:
 93                        # I never have a problem with commands.commit on a Mac, but in the test/production
 94                        # tool shed environment, it occasionally throws a "TypeError: array item must be char"
 95                        # exception.  If this happens, we'll try the following.
 96                        repo.dirstate.write()
 97                        repo.commit( user=trans.user.username, text=commit_message )
 98                    if full_path.endswith( 'tool_data_table_conf.xml.sample' ):
 99                        # Handle the special case where a tool_data_table_conf.xml.sample
100                        # file is being uploaded by parsing the file and adding new entries
101                        # to the in-memory trans.app.tool_data_tables dictionary as well as
102                        # appending them to the shed's tool_data_table_conf.xml file on disk.
103                        error, error_message = handle_sample_tool_data_table_conf_file( trans, full_path )
104                        if error:
105                            message = '%s<br/>%s' % ( message, error_message )
106                    if full_path.endswith( '.loc.sample' ):
107                        # Handle the special case where a xxx.loc.sample file is
108                        # being uploaded by copying it to ~/tool-data/xxx.loc.
109                        copy_sample_loc_file( trans, full_path )
110                    handle_email_alerts( trans, repository )
111                if ok:
112                    # Update the repository files for browsing.
113                    update_for_browsing( trans, repository, current_working_dir, commit_message=commit_message )
114                    # Get the new repository tip.
115                    if tip != repository.tip:
116                        if ( isgzip or isbz2 ) and uncompress_file:
117                            uncompress_str = ' uncompressed and '
118                        else:
119                            uncompress_str = ' '
120                        message = "The file '%s' has been successfully%suploaded to the repository." % ( uploaded_file_filename, uncompress_str )
121                        if istar and remove_repo_files_not_in_tar and files_to_remove:
122                            if upload_point is not None:
123                                message += "  %d files were removed from the repository relative to the selected upload point '%s'." % ( len( files_to_remove ), upload_point )
124                            else:
125                                message += "  %d files were removed from the repository root." % len( files_to_remove )
126                    else:
127                        message = 'No changes to repository.'      
128                    # Set metadata on the repository tip
129                    error_message, status = set_repository_metadata( trans, repository_id, repository.tip, **kwd )
130                    if error_message:
131                        message = '%s<br/>%s' % ( message, error_message )
132                        return trans.response.send_redirect( web.url_for( controller='repository',
133                                                                          action='manage_repository',
134                                                                          id=repository_id,
135                                                                          message=message,
136                                                                          status=status ) )
137                    trans.response.send_redirect( web.url_for( controller='repository',
138                                                               action='browse_repository',
139                                                               id=repository_id,
140                                                               commit_message='Deleted selected files',
141                                                               message=message,
142                                                               status=status ) )
143                else:
144                    status = 'error'
145        selected_categories = [ trans.security.decode_id( id ) for id in category_ids ]
146        return trans.fill_template( '/webapps/community/repository/upload.mako',
147                                    repository=repository,
148                                    commit_message=commit_message,
149                                    uncompress_file=uncompress_file,
150                                    remove_repo_files_not_in_tar=remove_repo_files_not_in_tar,
151                                    message=message,
152                                    status=status )
153    def upload_tar( self, trans, repository, tar, uploaded_file, upload_point, remove_repo_files_not_in_tar, commit_message ):
154        # Upload a tar archive of files.
155        repo_dir = repository.repo_path
156        repo = hg.repository( get_configured_ui(), repo_dir )
157        files_to_remove = []
158        ok, message = self.__check_archive( tar )
159        if not ok:
160            tar.close()
161            uploaded_file.close()
162            return ok, message, files_to_remove
163        else:
164            if upload_point is not None:
165                full_path = os.path.abspath( os.path.join( repo_dir, upload_point ) )
166            else:
167                full_path = os.path.abspath( repo_dir )
168            filenames_in_archive = [ tarinfo_obj.name for tarinfo_obj in tar.getmembers() ]
169            filenames_in_archive = [ os.path.join( full_path, name ) for name in filenames_in_archive ]
170            # Extract the uploaded tar to the load_point within the repository hierarchy.
171            tar.extractall( path=full_path )
172            tar.close()
173            uploaded_file.close()
174            if remove_repo_files_not_in_tar and not repository.is_new:
175                # We have a repository that is not new (it contains files), so discover
176                # those files that are in the repository, but not in the uploaded archive.
177                for root, dirs, files in os.walk( full_path ):
178                    if not root.find( '.hg' ) >= 0 and not root.find( 'hgrc' ) >= 0:
179                        if '.hg' in dirs:
180                            # Don't visit .hg directories - should be impossible since we don't
181                            # allow uploaded archives that contain .hg dirs, but just in case...
182                            dirs.remove( '.hg' )
183                        if 'hgrc' in files:
184                             # Don't include hgrc files in commit.
185                            files.remove( 'hgrc' )
186                        for name in files:
187                            full_name = os.path.join( root, name )
188                            if full_name not in filenames_in_archive:
189                                files_to_remove.append( full_name )
190                for repo_file in files_to_remove:
191                    # Remove files in the repository (relative to the upload point)
192                    # that are not in the uploaded archive.
193                    try:
194                        commands.remove( repo.ui, repo, repo_file, force=True )
195                    except Exception, e:
196                        # I never have a problem with commands.remove on a Mac, but in the test/production
197                        # tool shed environment, it throws an exception whenever I delete all files from a
198                        # repository.  If this happens, we'll try the following.
199                        relative_selected_file = selected_file.split( 'repo_%d' % repository.id )[1].lstrip( '/' )
200                        repo.dirstate.remove( relative_selected_file )
201                        repo.dirstate.write()
202                        absolute_selected_file = os.path.abspath( selected_file )
203                        if os.path.isdir( absolute_selected_file ):
204                            try:
205                                os.rmdir( absolute_selected_file )
206                            except OSError, e:
207                                # The directory is not empty
208                                pass
209                        elif os.path.isfile( absolute_selected_file ):
210                            os.remove( absolute_selected_file )
211                            dir = os.path.split( absolute_selected_file )[0]
212                            try:
213                                os.rmdir( dir )
214                            except OSError, e:
215                                # The directory is not empty
216                                pass
217            for filename_in_archive in filenames_in_archive:
218                commands.add( repo.ui, repo, filename_in_archive )
219                if filename_in_archive.endswith( 'tool_data_table_conf.xml.sample' ):
220                    # Handle the special case where a tool_data_table_conf.xml.sample
221                    # file is being uploaded by parsing the file and adding new entries
222                    # to the in-memory trans.app.tool_data_tables dictionary as well as
223                    # appending them to the shed's tool_data_table_conf.xml file on disk.
224                    error, message = handle_sample_tool_data_table_conf_file( trans, filename_in_archive )
225                    if error:
226                        return False, message, files_to_remove
227                if filename_in_archive.endswith( '.loc.sample' ):
228                    # Handle the special case where a xxx.loc.sample file is
229                    # being uploaded by copying it to ~/tool-data/xxx.loc.
230                    copy_sample_loc_file( trans, filename_in_archive )
231            try:
232                commands.commit( repo.ui, repo, full_path, user=trans.user.username, message=commit_message )
233            except Exception, e:
234                # I never have a problem with commands.commit on a Mac, but in the test/production
235                # tool shed environment, it occasionally throws a "TypeError: array item must be char"
236                # exception.  If this happens, we'll try the following.
237                repo.dirstate.write()
238                repo.commit( user=trans.user.username, text=commit_message )
239            handle_email_alerts( trans, repository )
240            return True, '', files_to_remove
241    def uncompress( self, repository, uploaded_file_name, uploaded_file_filename, isgzip, isbz2 ):
242        if isgzip:
243            self.__handle_gzip( repository, uploaded_file_name )
244            return uploaded_file_filename.rstrip( '.gz' )
245        if isbz2:
246            self.__handle_bz2( repository, uploaded_file_name )
247            return uploaded_file_filename.rstrip( '.bz2' )
248    def __handle_gzip( self, repository, uploaded_file_name ):
249        fd, uncompressed = tempfile.mkstemp( prefix='repo_%d_upload_gunzip_' % repository.id, dir=os.path.dirname( uploaded_file_name ), text=False )
250        gzipped_file = gzip.GzipFile( uploaded_file_name, 'rb' )
251        while 1:
252            try:
253                chunk = gzipped_file.read( CHUNK_SIZE )
254            except IOError, e:
255                os.close( fd )
256                os.remove( uncompressed )
257                log.exception( 'Problem uncompressing gz data "%s": %s' % ( uploaded_file_name, str( e ) ) )
258                return
259            if not chunk:
260                break
261            os.write( fd, chunk )
262        os.close( fd )
263        gzipped_file.close()
264        shutil.move( uncompressed, uploaded_file_name )
265    def __handle_bz2( self, repository, uploaded_file_name ):
266        fd, uncompressed = tempfile.mkstemp( prefix='repo_%d_upload_bunzip2_' % repository.id, dir=os.path.dirname( uploaded_file_name ), text=False )
267        bzipped_file = bz2.BZ2File( uploaded_file_name, 'rb' )
268        while 1:
269            try:
270                chunk = bzipped_file.read( CHUNK_SIZE )
271            except IOError:
272                os.close( fd )
273                os.remove( uncompressed )
274                log.exception( 'Problem uncompressing bz2 data "%s": %s' % ( uploaded_file_name, str( e ) ) )
275                return
276            if not chunk:
277                break
278            os.write( fd, chunk )
279        os.close( fd )
280        bzipped_file.close()
281        shutil.move( uncompressed, uploaded_file_name )
282    def __get_upload_point( self, repository, **kwd ):
283        upload_point = kwd.get( 'upload_point', None )
284        if upload_point is not None:
285            # The value of upload_point will be something like: database/community_files/000/repo_12/1.bed
286            if os.path.exists( upload_point ):
287                if os.path.isfile( upload_point ):
288                    # Get the parent directory
289                    upload_point, not_needed = os.path.split( upload_point )
290                    # Now the value of uplaod_point will be something like: database/community_files/000/repo_12/
291                upload_point = upload_point.split( 'repo_%d' % repository.id )[ 1 ]
292                if upload_point:
293                    upload_point = upload_point.lstrip( '/' )
294                    upload_point = upload_point.rstrip( '/' )
295                # Now the value of uplaod_point will be something like: /
296                if upload_point == '/':
297                    upload_point = None
298            else:
299                # Must have been an error selecting something that didn't exist, so default to repository root
300                upload_point = None
301        return upload_point
302    def __check_archive( self, archive ):
303        for member in archive.getmembers():
304            # Allow regular files and directories only
305            if not ( member.isdir() or member.isfile() ):
306                message = "Uploaded archives can only include regular directories and files (no symbolic links, devices, etc)."
307                return False, message
308            for item in [ '.hg', '..', '/' ]:
309                if member.name.startswith( item ):
310                    message = "Uploaded archives cannot contain .hg directories, absolute filenames starting with '/', or filenames with two dots '..'."
311                    return False, message
312            if member.name in [ 'hgrc' ]:
313                message = "Uploaded archives cannot contain hgrc files."
314                return False, message
315        return True, ''
316