twilltestcase.py - This is a Python script that automates v…

/test/base/twilltestcase.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 2600 lines · 2502 code · 18 blank · 80 comment · 136 complexity · df943a8c086a1e8b21a50a6cb0a60990 MD5 · raw file
Large files are truncated click here to view the full file

import difflib
import filecmp
import logging
import os
import re
import pprint
import shutil
import StringIO
import subprocess
import tarfile
import tempfile
import time
import unittest
import urllib
import zipfile

from galaxy.web import security
from galaxy.web.framework.helpers import iff
from galaxy.util.json import from_json_string
from base.asserts import verify_assertions

from galaxy import eggs
eggs.require( "elementtree" )
eggs.require( 'twill' )

from elementtree import ElementTree

import twill
import twill.commands as tc
from twill.other_packages._mechanize_dist import ClientForm

#Force twill to log to a buffer -- FIXME: Should this go to stdout and be captured by nose?
buffer = StringIO.StringIO()
twill.set_output( buffer )
tc.config( 'use_tidy', 0 )

# Dial ClientCookie logging down (very noisy)
logging.getLogger( "ClientCookie.cookies" ).setLevel( logging.WARNING )
log = logging.getLogger( __name__ )


class TwillTestCase( unittest.TestCase ):

    def setUp( self ):
        # Security helper
        self.security = security.SecurityHelper( id_secret='changethisinproductiontoo' )
        self.history_id = os.environ.get( 'GALAXY_TEST_HISTORY_ID', None )
        self.host = os.environ.get( 'GALAXY_TEST_HOST' )
        self.port = os.environ.get( 'GALAXY_TEST_PORT' )
        self.url = "http://%s:%s" % ( self.host, self.port )
        self.file_dir = os.environ.get( 'GALAXY_TEST_FILE_DIR', None )
        self.tool_shed_test_file = os.environ.get( 'GALAXY_TOOL_SHED_TEST_FILE', None )
        if self.tool_shed_test_file:
            f = open( self.tool_shed_test_file, 'r' )
            text = f.read()
            f.close()
            self.shed_tools_dict = from_json_string( text )
        else:
            self.shed_tools_dict = {}
        self.keepOutdir = os.environ.get( 'GALAXY_TEST_SAVE', '' )
        if self.keepOutdir > '':
            try:
                os.makedirs(self.keepOutdir)
            except:
                pass
        self.home()

    # Functions associated with files
    def files_diff( self, file1, file2, attributes=None ):
        """Checks the contents of 2 files for differences"""
        def get_lines_diff( diff ):
            count = 0
            for line in diff:
                if ( line.startswith( '+' ) and not line.startswith( '+++' ) ) or ( line.startswith( '-' ) and not line.startswith( '---' ) ):
                    count += 1
            return count
        if not filecmp.cmp( file1, file2 ):
            files_differ = False
            local_file = open( file1, 'U' ).readlines()
            history_data = open( file2, 'U' ).readlines()
            if attributes is None:
                attributes = {}
            if attributes.get( 'sort', False ):
                history_data.sort()
            ##Why even bother with the check loop below, why not just use the diff output? This seems wasteful.
            if len( local_file ) == len( history_data ):
                for i in range( len( history_data ) ):
                    if local_file[i].rstrip( '\r\n' ) != history_data[i].rstrip( '\r\n' ):
                        files_differ = True
                        break
            else:
                files_differ = True
            if files_differ:
                allowed_diff_count = int(attributes.get( 'lines_diff', 0 ))
                diff = list( difflib.unified_diff( local_file, history_data, "local_file", "history_data" ) )
                diff_lines = get_lines_diff( diff )
                if diff_lines > allowed_diff_count:
                    if len(diff) < 60:
                        diff_slice = diff[0:40]
                    else:
                        diff_slice = diff[:25] + ["********\n", "*SNIP *\n", "********\n"] + diff[-25:]
                    #FIXME: This pdf stuff is rather special cased and has not been updated to consider lines_diff
                    #due to unknown desired behavior when used in conjunction with a non-zero lines_diff
                    #PDF forgiveness can probably be handled better by not special casing by __extension__ here
                    #and instead using lines_diff or a regular expression matching
                    #or by creating and using a specialized pdf comparison function
                    if file1.endswith( '.pdf' ) or file2.endswith( '.pdf' ):
                        # PDF files contain creation dates, modification dates, ids and descriptions that change with each
                        # new file, so we need to handle these differences.  As long as the rest of the PDF file does
                        # not differ we're ok.
                        valid_diff_strs = [ 'description', 'createdate', 'creationdate', 'moddate', 'id', 'producer', 'creator' ]
                        valid_diff = False
                        invalid_diff_lines = 0
                        for line in diff_slice:
                            # Make sure to lower case strings before checking.
                            line = line.lower()
                            # Diff lines will always start with a + or - character, but handle special cases: '--- local_file \n', '+++ history_data \n'
                            if ( line.startswith( '+' ) or line.startswith( '-' ) ) and line.find( 'local_file' ) < 0 and line.find( 'history_data' ) < 0:
                                for vdf in valid_diff_strs:
                                    if line.find( vdf ) < 0:
                                        valid_diff = False
                                    else:
                                        valid_diff = True
                                        # Stop checking as soon as we know we have a valid difference
                                        break
                                if not valid_diff:
                                    invalid_diff_lines += 1
                        log.info('## files diff on %s and %s lines_diff=%d, found diff = %d, found pdf invalid diff = %d' % (file1, file2, allowed_diff_count, diff_lines, invalid_diff_lines))
                        if invalid_diff_lines > allowed_diff_count:
                            # Print out diff_slice so we can see what failed
                            print "###### diff_slice ######"
                            raise AssertionError( "".join( diff_slice ) )
                    else:
                        log.info('## files diff on %s and %s lines_diff=%d, found diff = %d' % (file1, file2, allowed_diff_count, diff_lines))
                        for line in diff_slice:
                            for char in line:
                                if ord( char ) > 128:
                                    raise AssertionError( "Binary data detected, not displaying diff" )
                        raise AssertionError( "".join( diff_slice )  )

    def files_re_match( self, file1, file2, attributes=None ):
        """Checks the contents of 2 files for differences using re.match"""
        local_file = open( file1, 'U' ).readlines()  # regex file
        history_data = open( file2, 'U' ).readlines()
        assert len( local_file ) == len( history_data ), 'Data File and Regular Expression File contain a different number of lines (%s != %s)\nHistory Data (first 40 lines):\n%s' % ( len( local_file ), len( history_data ), ''.join( history_data[:40] ) )
        if attributes is None:
            attributes = {}
        if attributes.get( 'sort', False ):
            history_data.sort()
        lines_diff = int(attributes.get( 'lines_diff', 0 ))
        line_diff_count = 0
        diffs = []
        for i in range( len( history_data ) ):
            if not re.match( local_file[i].rstrip( '\r\n' ), history_data[i].rstrip( '\r\n' ) ):
                line_diff_count += 1
                diffs.append( 'Regular Expression: %s\nData file         : %s' % ( local_file[i].rstrip( '\r\n' ),  history_data[i].rstrip( '\r\n' ) ) )
            if line_diff_count > lines_diff:
                raise AssertionError( "Regular expression did not match data file (allowed variants=%i):\n%s" % ( lines_diff, "".join( diffs ) ) )

    def files_re_match_multiline( self, file1, file2, attributes=None ):
        """Checks the contents of 2 files for differences using re.match in multiline mode"""
        local_file = open( file1, 'U' ).read()  # regex file
        if attributes is None:
            attributes = {}
        if attributes.get( 'sort', False ):
            history_data = open( file2, 'U' ).readlines()
            history_data.sort()
            history_data = ''.join( history_data )
        else:
            history_data = open( file2, 'U' ).read()
        #lines_diff not applicable to multiline matching
        assert re.match( local_file, history_data, re.MULTILINE ), "Multiline Regular expression did not match data file"

    def files_contains( self, file1, file2, attributes=None ):
        """Checks the contents of file2 for substrings found in file1, on a per-line basis"""
        local_file = open( file1, 'U' ).readlines()  # regex file
        #TODO: allow forcing ordering of contains
        history_data = open( file2, 'U' ).read()
        lines_diff = int( attributes.get( 'lines_diff', 0 ) )
        line_diff_count = 0
        while local_file:
            contains = local_file.pop( 0 ).rstrip( '\n\r' )
            if contains not in history_data:
                line_diff_count += 1
            if line_diff_count > lines_diff:
                raise AssertionError( "Failed to find '%s' in history data. (lines_diff=%i):\n" % ( contains, lines_diff ) )

    def get_filename( self, filename, shed_tool_id=None ):
        if shed_tool_id and self.shed_tools_dict:
            file_dir = self.shed_tools_dict[ shed_tool_id ]
            if not file_dir:
                file_dir = self.file_dir
        else:
            file_dir = self.file_dir
        return os.path.abspath( os.path.join( file_dir, filename ) )

    def save_log( *path ):
        """Saves the log to a file"""
        filename = os.path.join( *path )
        file(filename, 'wt').write(buffer.getvalue())

    def upload_file( self, filename, ftype='auto', dbkey='unspecified (?)', space_to_tab=False, metadata=None, composite_data=None, name=None, shed_tool_id=None, wait=True ):
        """
        Uploads a file.  If shed_tool_id has a value, we're testing tools migrated from the distribution to the tool shed,
        so the tool-data directory of test data files is contained in the installed tool shed repository.
        """
        self.visit_url( "%s/tool_runner?tool_id=upload1" % self.url )
        try:
            self.refresh_form( "file_type", ftype )  # Refresh, to support composite files
            tc.fv( "tool_form", "dbkey", dbkey )
            if metadata:
                for elem in metadata:
                    tc.fv( "tool_form", "files_metadata|%s" % elem.get( 'name' ), elem.get( 'value' ) )
            if composite_data:
                for i, composite_file in enumerate( composite_data ):
                    filename = self.get_filename( composite_file.get( 'value' ), shed_tool_id=shed_tool_id )
                    tc.formfile( "tool_form", "files_%i|file_data" % i, filename )
                    tc.fv( "tool_form", "files_%i|space_to_tab" % i, composite_file.get( 'space_to_tab', False ) )
            else:
                filename = self.get_filename( filename, shed_tool_id=shed_tool_id )
                tc.formfile( "tool_form", "file_data", filename )
                tc.fv( "tool_form", "space_to_tab", space_to_tab )
                if name:
                    # NAME is a hidden form element, so the following prop must
                    # set to use it.
                    tc.config("readonly_controls_writeable", 1)
                    tc.fv( "tool_form", "NAME", name )
            tc.submit( "runtool_btn" )
            self.home()
        except AssertionError, err:
            errmsg = "Uploading file resulted in the following exception.  Make sure the file (%s) exists.  " % filename
            errmsg += str( err )
            raise AssertionError( errmsg )
        if not wait:
            return
        # Make sure every history item has a valid hid
        hids = self.get_hids_in_history()
        for hid in hids:
            try:
                int( hid )
            except:
                raise AssertionError( "Invalid hid (%s) created when uploading file %s" % ( hid, filename ) )
        # Wait for upload processing to finish (TODO: this should be done in each test case instead)
        self.wait()

    def upload_url_paste( self, url_paste, ftype='auto', dbkey='unspecified (?)' ):
        """Pasted data in the upload utility"""
        self.visit_page( "tool_runner/index?tool_id=upload1" )
        try: 
            self.refresh_form( "file_type", ftype ) #Refresh, to support composite files
            tc.fv( "tool_form", "dbkey", dbkey )
            tc.fv( "tool_form", "url_paste", url_paste )
            tc.submit( "runtool_btn" )
            self.home()
        except Exception, e:
            errmsg = "Problem executing upload utility using url_paste: %s" % str( e )
            raise AssertionError( errmsg )
        # Make sure every history item has a valid hid
        hids = self.get_hids_in_history()
        for hid in hids:
            try:
                int( hid )
            except:
                raise AssertionError( "Invalid hid (%s) created when pasting %s" % ( hid, url_paste ) )
        # Wait for upload processing to finish (TODO: this should be done in each test case instead)
        self.wait()

    def json_from_url( self, url ):
        self.visit_url( url )
        return from_json_string( self.last_page() )

    # Functions associated with histories
    def get_history_from_api( self, encoded_history_id=None ):
        if encoded_history_id is None:
            history = self.get_latest_history()
            encoded_history_id = history[ 'id' ]
        return self.json_from_url( '/api/histories/%s/contents' % encoded_history_id )

    def get_latest_history( self ):
        return self.json_from_url( '/api/histories' )[ 0 ]

    def find_hda_by_dataset_name( self, name, history=None ):
        if history is None:
            history = self.get_history_from_api()
        for hda in history:
            if hda[ 'name' ] == name:
                return hda

    def check_history_for_errors( self ):
        """Raises an exception if there are errors in a history"""
        self.home()
        self.visit_page( "history" )
        page = self.last_page()
        if page.find( 'error' ) > -1:
            raise AssertionError( 'Errors in the history for user %s' % self.user )

    def check_history_for_string( self, patt, show_deleted=False ):
        """Breaks patt on whitespace and searches for each element seperately in the history"""
        self.home()
        if show_deleted:
            self.visit_page( "history?show_deleted=True" )
        else:
            self.visit_page( "history" )
        for subpatt in patt.split():
            try:
                tc.find( subpatt )
            except:
                fname = self.write_temp_file( tc.browser.get_html() )
                errmsg = "no match to '%s'\npage content written to '%s'" % ( subpatt, fname )
                raise AssertionError( errmsg )
        self.home()

    def check_history_for_exact_string( self, string, show_deleted=False ):
        """Looks for exact match to 'string' in history page"""
        self.home()
        if show_deleted:
            self.visit_page( "history?show_deleted=True" )
        else:
            self.visit_page( "history" )
        try:
            tc.find( string )
        except:
            fname = self.write_temp_file( tc.browser.get_html() )
            errmsg = "no match to '%s'\npage content written to '%s'" % ( string, fname )
            raise AssertionError( errmsg )
        self.home()

    def check_history_json( self, pattern, check_fn, show_deleted=None, multiline=True ):
        """
        Tries to find a JSON string in the history page using the regex pattern,
        parse it, and assert check_fn returns True when called on that parsed
        data.
        """
        self.home()
        if show_deleted:
            self.visit_page( "history?show_deleted=True" )
        elif show_deleted == False:
            self.visit_page( "history?show_deleted=False" )
        else:
            self.visit_page( "history" )
        json_data = {}
        try:
            tc.find( pattern, flags=( 'm' if multiline else '' ) )
            # twill stores the regex match in a special stack variable
            match = twill.namespaces.get_twill_glocals()[1][ '__match__' ]
            json_data = from_json_string( match )
            assert check_fn( json_data ), 'failed check_fn: %s' % ( check_fn.func_name )

        except Exception, exc:
            log.error( exc, exc_info=True )
            log.debug( 'json_data: %s', ( '\n' + pprint.pformat( json_data ) if json_data else '(no match)' ) )
            fname = self.write_temp_file( tc.browser.get_html() )
            errmsg = ( "json '%s' could not be found or failed check_fn" % ( pattern ) +
                       "\npage content written to '%s'" % ( fname ) )
            raise AssertionError( errmsg )

        self.home()

    def is_history_empty( self ):
        """
        Uses history page JSON to determine whether this history is empty
        (i.e. has no undeleted datasets).
        """
        return len( self.get_history_from_api() ) == 0

    def check_hda_json_for_key_value( self, hda_id, key, value, use_string_contains=False ):
        """
        Uses the history API to determine whether the current history:
        (1) Has a history dataset with the required ID.
        (2) That dataset has the required key.
        (3) The contents of that key match the provided value.
        If use_string_contains=True, this will perform a substring match, otherwise an exact match.
        """
        #TODO: multi key, value
        hda = dict()
        for history_item in self.get_history_from_api():
            if history_item[ 'id' ] == hda_id:
                hda = self.json_from_url( history_item[ 'url' ] )
                break
        if hda:
            if key in hda:
                if use_string_contains:
                    return value in hda[ key ]
                else:
                    return value == hda[ key ]
        return False

    def clear_history( self ):
        """Empties a history of all datasets"""
        self.visit_page( "clear_history" )
        self.check_history_for_string( 'Your history is empty' )
        self.home()

    def delete_history( self, id ):
        """Deletes one or more histories"""
        history_list = self.get_histories_as_data_list()
        self.assertTrue( history_list )
        num_deleted = len( id.split( ',' ) )
        self.home()
        self.visit_page( "history/list?operation=delete&id=%s" % ( id ) )

        check_str = 'Deleted %d %s' % ( num_deleted, iff( num_deleted != 1, "histories", "history" ) )
        self.check_page_for_string( check_str )
        self.home()

    def delete_current_history( self, strings_displayed=[] ):
        """Deletes the current history"""
        self.home()
        self.visit_page( "history/delete_current" )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        self.home()

    def get_histories_as_data_list( self ):
        """Returns the data elements of all histories"""
        tree = self.histories_as_xml_tree()
        data_list = [ elem for elem in tree.findall("data") ]
        return data_list

    def get_history_as_data_list( self, show_deleted=False ):
        """Returns the data elements of a history"""
        tree = self.history_as_xml_tree( show_deleted=show_deleted )
        data_list = [ elem for elem in tree.findall("data") ]
        return data_list

    def history_as_xml_tree( self, show_deleted=False ):
        """Returns a parsed xml object of a history"""
        self.home()
        self.visit_page( 'history?as_xml=True&show_deleted=%s' % show_deleted )
        xml = self.last_page()
        tree = ElementTree.fromstring(xml)
        return tree

    def histories_as_xml_tree( self ):
        """Returns a parsed xml object of all histories"""
        self.home()
        self.visit_page( 'history/list_as_xml' )
        xml = self.last_page()
        tree = ElementTree.fromstring(xml)
        return tree

    def history_options( self, user=False, active_datasets=False, activatable_datasets=False, histories_shared_by_others=False ):
        """Mimics user clicking on history options link"""
        self.home()
        self.visit_page( "root/history_options" )
        if user:
            self.check_page_for_string( 'Previously</a> stored histories' )
            if active_datasets:
                self.check_page_for_string( 'Create</a> a new empty history' )
                self.check_page_for_string( 'Construct workflow</a> from current history' )
                self.check_page_for_string( 'Copy</a> current history' )
            self.check_page_for_string( 'Share</a> current history' )
            self.check_page_for_string( 'Change default permissions</a> for current history' )
            if histories_shared_by_others:
                self.check_page_for_string( 'Histories</a> shared with you by others' )
        if activatable_datasets:
            self.check_page_for_string( 'Show deleted</a> datasets in current history' )
        self.check_page_for_string( 'Rename</a> current history' )
        self.check_page_for_string( 'Delete</a> current history' )
        self.home()

    def new_history( self, name=None ):
        """Creates a new, empty history"""
        self.home()
        if name:
            self.visit_url( "%s/history_new?name=%s" % ( self.url, name ) )
        else:
            self.visit_url( "%s/history_new" % self.url )
        self.check_history_for_string('Your history is empty')
        self.home()

    def rename_history( self, id, old_name, new_name ):
        """Rename an existing history"""
        self.home()
        self.visit_page( "history/rename?id=%s&name=%s" % ( id, new_name ) )
        check_str = 'History: %s renamed to: %s' % ( old_name, urllib.unquote( new_name ) )
        self.check_page_for_string( check_str )
        self.home()

    def set_history( self ):
        """Sets the history (stores the cookies for this run)"""
        if self.history_id:
            self.home()
            self.visit_page( "history?id=%s" % self.history_id )
        else:
            self.new_history()
        self.home()

    def share_current_history( self, email, strings_displayed=[], strings_displayed_after_submit=[],
                               action='', action_strings_displayed=[], action_strings_displayed_after_submit=[] ):
        """Share the current history with different users"""
        self.visit_url( "%s/history/share" % self.url )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        tc.fv( 'share', 'email', email )
        tc.submit( 'share_button' )
        for check_str in strings_displayed_after_submit:
            self.check_page_for_string( check_str )
        if action:
            # If we have an action, then we are sharing datasets with users that do not have access permissions on them
            for check_str in action_strings_displayed:
                self.check_page_for_string( check_str )
            tc.fv( 'share_restricted', 'action', action )

            tc.submit( "share_restricted_button" )
            for check_str in action_strings_displayed_after_submit:
                self.check_page_for_string( check_str )
        self.home()

    def share_histories_with_users( self, ids, emails, strings_displayed=[], strings_displayed_after_submit=[],
                                    action=None, action_strings_displayed=[] ):
        """Share one or more histories with one or more different users"""
        self.visit_url( "%s/history/list?id=%s&operation=Share" % ( self.url, ids ) )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        tc.fv( 'share', 'email', emails )
        tc.submit( 'share_button' )
        for check_str in strings_displayed_after_submit:
            self.check_page_for_string( check_str )
        if action:
            # If we have an action, then we are sharing datasets with users that do not have access permissions on them
            tc.fv( 'share_restricted', 'action', action )
            tc.submit( "share_restricted_button" )

            for check_str in action_strings_displayed:
                self.check_page_for_string( check_str )
        self.home()

    def unshare_history( self, history_id, user_id, strings_displayed=[] ):
        """Unshare a history that has been shared with another user"""
        self.visit_url( "%s/history/list?id=%s&operation=share+or+publish" % ( self.url, history_id ) )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        self.visit_url( "%s/history/sharing?unshare_user=%s&id=%s" % ( self.url, user_id, history_id ) )
        self.home()

    def switch_history( self, id='', name='' ):
        """Switches to a history in the current list of histories"""
        self.visit_url( "%s/history/list?operation=switch&id=%s" % ( self.url, id ) )
        if name:
            self.check_history_for_exact_string( name )
        self.home()

    def view_stored_active_histories( self, strings_displayed=[] ):
        self.home()
        self.visit_page( "history/list" )
        self.check_page_for_string( 'Saved Histories' )
        self.check_page_for_string( 'operation=Rename' )
        self.check_page_for_string( 'operation=Switch' )
        self.check_page_for_string( 'operation=Delete' )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        self.home()

    def view_stored_deleted_histories( self, strings_displayed=[] ):
        self.home()
        self.visit_page( "history/list?f-deleted=True" )
        self.check_page_for_string( 'Saved Histories' )
        self.check_page_for_string( 'operation=Undelete' )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        self.home()

    def view_shared_histories( self, strings_displayed=[] ):
        self.home()
        self.visit_page( "history/list_shared" )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        self.home()

    def copy_history( self, history_id, copy_choice, strings_displayed=[], strings_displayed_after_submit=[] ):
        self.home()
        self.visit_page( "history/copy?id=%s" % history_id )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        tc.fv( '1', 'copy_choice', copy_choice )
        tc.submit( 'copy_choice_button' )
        for check_str in strings_displayed_after_submit:
            self.check_page_for_string( check_str )
        self.home()

    def make_accessible_via_link( self, history_id, strings_displayed=[], strings_displayed_after_submit=[] ):
        self.home()
        self.visit_page( "history/list?operation=share+or+publish&id=%s" % history_id )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        # twill barfs on this form, possibly because it contains no fields, but not sure.
        # In any case, we have to mimic the form submission
        self.home()
        self.visit_page( 'history/sharing?id=%s&make_accessible_via_link=True' % history_id )
        for check_str in strings_displayed_after_submit:
            self.check_page_for_string( check_str )
        self.home()

    def disable_access_via_link( self, history_id, strings_displayed=[], strings_displayed_after_submit=[] ):
        self.home()
        self.visit_page( "history/list?operation=share+or+publish&id=%s" % history_id )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        # twill barfs on this form, possibly because it contains no fields, but not sure.
        # In any case, we have to mimic the form submission
        self.home()
        self.visit_page( 'history/sharing?id=%s&disable_link_access=True' % history_id )
        for check_str in strings_displayed_after_submit:
            self.check_page_for_string( check_str )
        self.home()

    def import_history_via_url( self, history_id, email, strings_displayed_after_submit=[] ):
        self.home()
        self.visit_page( "history/imp?&id=%s" % history_id )
        for check_str in strings_displayed_after_submit:
            self.check_page_for_string( check_str )
        self.home()

    # Functions associated with datasets (history items) and meta data
    def _get_job_stream_output( self, hda_id, stream, format ):
        self.visit_page( "datasets/%s/%s" % ( self.security.encode_id( hda_id ), stream ) )

        output = self.last_page()
        return self._format_stream( output, stream, format )

    def _format_stream( self, output, stream, format ):
        if format:
            msg = "---------------------- >> begin tool %s << -----------------------\n" % stream
            msg += output + "\n"
            msg += "----------------------- >> end tool %s << ------------------------\n" % stream
        else:
            msg = output
        return msg

    def get_job_stdout( self, hda_id, format=False ):
        return self._get_job_stream_output( hda_id, 'stdout', format )

    def get_job_stderr( self, hda_id, format=False ):
        return self._get_job_stream_output( hda_id, 'stderr', format )

    def _assert_dataset_state( self, elem, state ):
        if elem.get( 'state' ) != state:
            errmsg = "Expecting dataset state '%s', but state is '%s'. Dataset blurb: %s\n\n" % ( state, elem.get('state'), elem.text.strip() )
            errmsg += self.get_job_stderr( elem.get( 'id' ), format=True )
            raise AssertionError( errmsg )

    def check_metadata_for_string( self, patt, hid=None ):
        """Looks for 'patt' in the edit page when editing a dataset"""
        data_list = self.get_history_as_data_list()
        self.assertTrue( data_list )
        if hid is None:  # take last hid
            elem = data_list[-1]
            hid = int( elem.get('hid') )
        self.assertTrue( hid )
        self.visit_page( "dataset/edit?hid=%s" % hid )
        for subpatt in patt.split():
            tc.find(subpatt)

    def delete_history_item( self, hda_id, strings_displayed=[] ):
        """Deletes an item from a history"""
        try:
            hda_id = int( hda_id )
        except:
            raise AssertionError( "Invalid hda_id '%s' - must be int" % hda_id )
        self.visit_url( "%s/datasets/%s/delete?show_deleted_on_refresh=False" % ( self.url, self.security.encode_id( hda_id ) ) )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )

    def undelete_history_item( self, hda_id, strings_displayed=[] ):
        """Un-deletes a deleted item in a history"""
        try:
            hda_id = int( hda_id )
        except:
            raise AssertionError( "Invalid hda_id '%s' - must be int" % hda_id )
        self.visit_url( "%s/datasets/%s/undelete" % ( self.url, self.security.encode_id( hda_id ) ) )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )

    def display_history_item( self, hda_id, strings_displayed=[] ):
        """Displays a history item - simulates eye icon click"""
        self.visit_url( '%s/datasets/%s/display/' % ( self.url, self.security.encode_id( hda_id ) ) )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        self.home()

    def view_history( self, history_id, strings_displayed=[] ):
        """Displays a history for viewing"""
        self.visit_url( '%s/history/view?id=%s' % ( self.url, self.security.encode_id( history_id ) ) )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        self.home()

    def edit_hda_attribute_info( self, hda_id, new_name='', new_info='', new_dbkey='', new_startcol='',
                                 strings_displayed=[], strings_not_displayed=[] ):
        """Edit history_dataset_association attribute information"""
        self.home()
        self.visit_url( "%s/datasets/%s/edit" % ( self.url, self.security.encode_id( hda_id ) ) )
        submit_required = False
        self.check_page_for_string( 'Edit Attributes' )
        if new_name:
            tc.fv( 'edit_attributes', 'name', new_name )
            submit_required = True
        if new_info:
            tc.fv( 'edit_attributes', 'info', new_info )
            submit_required = True
        if new_dbkey:
            tc.fv( 'edit_attributes', 'dbkey', new_dbkey )
            submit_required = True
        if new_startcol:
            tc.fv( 'edit_attributes', 'startCol', new_startcol )
            submit_required = True
        if submit_required:
            tc.submit( 'save' )
            self.check_page_for_string( 'Attributes updated' )
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )
        for check_str in strings_not_displayed:
            try:
                self.check_page_for_string( check_str )
                raise AssertionError( "String (%s) incorrectly displayed on Edit Attributes page." % check_str )
            except:
                pass
        self.home()

    def check_hda_attribute_info( self, hda_id, strings_displayed=[] ):
        """Edit history_dataset_association attribute information"""
        for check_str in strings_displayed:
            self.check_page_for_string( check_str )

    def auto_detect_metadata( self, hda_id ):
        """Auto-detect history_dataset_association metadata"""
        self.home()
        self.visit_url( "%s/datasets/%s/edit" % ( self.url, self.security.encode_id( hda_id ) ) )
        self.check_page_for_string( 'This will inspect the dataset and attempt' )
        tc.fv( 'auto_detect', 'detect', 'Auto-detect' )
        tc.submit( 'detect' )
        try:
            self.check_page_for_string( 'Attributes have been queued to be updated' )
            self.wait()
        except AssertionError:
            self.check_page_for_string( 'Attributes updated' )
        #self.check_page_for_string( 'Attributes updated' )
        self.home()

    def convert_format( self, hda_id, target_type ):
        """Convert format of history_dataset_association"""
        self.home()
        self.visit_url( "%s/datasets/%s/edit" % ( self.url, self.security.encode_id( hda_id ) ) )
        self.check_page_for_string( 'This will inspect the dataset and attempt' )
        tc.fv( 'convert_data', 'target_type', target_type )
        tc.submit( 'convert_data' )
        self.check_page_for_string( 'The file conversion of Convert BED to GFF on data' )
        self.wait()  # wait for the format convert tool to finish before returning
        self.home()

    def change_datatype( self, hda_id, datatype ):
        """Change format of history_dataset_association"""
        self.home()
        self.visit_url( "%s/datasets/%s/edit" % ( self.url, self.security.encode_id( hda_id ) ) )
        self.check_page_for_string( 'This will change the datatype of the existing dataset but' )
        tc.fv( 'change_datatype', 'datatype', datatype )
        tc.submit( 'change' )
        self.check_page_for_string( 'Changed the type of dataset' )
        self.home()

    def copy_history_item( self, source_dataset_id=None, target_history_id=None, all_target_history_ids=[],
                           deleted_history_ids=[] ):
        """
        Copy 1 history_dataset_association to 1 history (Limited by twill since it doesn't support multiple
        field names, such as checkboxes
        """
        self.home()
        self.visit_url( "%s/dataset/copy_datasets?source_dataset_ids=%s" % ( self.url, source_dataset_id ) )
        self.check_page_for_string( 'Source History:' )
        # Make sure all of users active histories are displayed
        for id in all_target_history_ids:
            self.check_page_for_string( id )
        # Make sure only active histories are displayed
        for id in deleted_history_ids:
            try:
                self.check_page_for_string( id )
                raise AssertionError( "deleted history id %d displayed in list of target histories" % id )
            except:
                pass

        tc.fv( '1', 'target_history_id', target_history_id )
        tc.submit( 'do_copy' )
        check_str = '1 dataset copied to 1 history'
        self.check_page_for_string( check_str )
        self.home()

    def get_hids_in_history( self ):
        """Returns the list of hid values for items in a history"""
        data_list = self.get_history_as_data_list()
        hids = []
        for elem in data_list:
            hid = elem.get('hid')
            hids.append(hid)
        return hids

    def get_hids_in_histories( self ):
        """Returns the list of hids values for items in all histories"""
        data_list = self.get_histories_as_data_list()
        hids = []
        for elem in data_list:
            hid = elem.get('hid')
            hids.append(hid)
        return hids

    def makeTfname(self, fname=None):
        """safe temp name - preserve the file extension for tools that interpret it"""
        suffix = os.path.split(fname)[-1]  # ignore full path
        fd, temp_prefix = tempfile.mkstemp(prefix='tmp', suffix=suffix)
        return temp_prefix

    def verify_dataset_correctness( self, filename, hid=None, wait=True, maxseconds=120, attributes=None, shed_tool_id=None ):
        """Verifies that the attributes and contents of a history item meet expectations"""
        if wait:
            self.wait( maxseconds=maxseconds )  # wait for job to finish
        data_list = self.get_history_as_data_list()
        self.assertTrue( data_list )
        if hid is None:  # take last hid
            elem = data_list[-1]
            hid = str( elem.get('hid') )
        else:
            hid = str( hid )
            elems = [ elem for elem in data_list if elem.get('hid') == hid ]
            self.assertTrue( len(elems) == 1 )
            elem = elems[0]
        self.assertTrue( hid )
        self._assert_dataset_state( elem, 'ok' )
        if filename is not None and self.is_zipped( filename ):
            errmsg = 'History item %s is a zip archive which includes invalid files:\n' % hid
            zip_file = zipfile.ZipFile( filename, "r" )
            name = zip_file.namelist()[0]
            test_ext = name.split( "." )[1].strip().lower()
            if not ( test_ext == 'scf' or test_ext == 'ab1' or test_ext == 'txt' ):
                raise AssertionError( errmsg )
            for name in zip_file.namelist():
                ext = name.split( "." )[1].strip().lower()
                if ext != test_ext:
                    raise AssertionError( errmsg )
        else:
            # See not in controllers/root.py about encoded_id.
            hda_id = self.security.encode_id( elem.get( 'id' ) )
            self.verify_hid( filename, hid=hid, hda_id=hda_id, attributes=attributes, shed_tool_id=shed_tool_id)

    def verify_hid( self, filename, hda_id, attributes, shed_tool_id, hid="", dataset_fetcher=None):
        dataset_fetcher = dataset_fetcher or self.__default_dataset_fetcher()
        data = dataset_fetcher( hda_id )
        if attributes is not None and attributes.get( "assert_list", None ) is not None:
            try:
                verify_assertions(data, attributes["assert_list"])
            except AssertionError, err:
                errmsg = 'History item %s different than expected\n' % (hid)
                errmsg += str( err )
                raise AssertionError( errmsg )
        if filename is not None:
            local_name = self.get_filename( filename, shed_tool_id=shed_tool_id )
            temp_name = self.makeTfname(fname=filename)
            file( temp_name, 'wb' ).write( data )

            # if the server's env has GALAXY_TEST_SAVE, save the output file to that dir
            if self.keepOutdir:
                ofn = os.path.join( self.keepOutdir, os.path.basename( local_name ) )
                log.debug( 'keepoutdir: %s, ofn: %s', self.keepOutdir, ofn )
                try:
                    shutil.copy( temp_name, ofn )
                except Exception, exc:
                    error_log_msg = ( 'TwillTestCase could not save output file %s to %s: ' % ( temp_name, ofn ) )
                    error_log_msg += str( exc )
                    log.error( error_log_msg, exc_info=True )
                else:
                    log.debug('## GALAXY_TEST_SAVE=%s. saved %s' % ( self.keepOutdir, ofn ) )
            try:
                if attributes is None:
                    attributes = {}
                compare = attributes.get( 'compare', 'diff' )
                if attributes.get( 'ftype', None ) == 'bam':
                    local_fh, temp_name = self._bam_to_sam( local_name, temp_name )
                    local_name = local_fh.name
                extra_files = attributes.get( 'extra_files', None )
                if compare == 'diff':
                    self.files_diff( local_name, temp_name, attributes=attributes )
                elif compare == 're_match':
                    self.files_re_match( local_name, temp_name, attributes=attributes )
                elif compare == 're_match_multiline':
                    self.files_re_match_multiline( local_name, temp_name, attributes=attributes )
                elif compare == 'sim_size':
                    delta = attributes.get('delta', '100')
                    s1 = len(data)
                    s2 = os.path.getsize(local_name)
                    if abs(s1 - s2) > int(delta):
                        raise Exception( 'Files %s=%db but %s=%db - compare (delta=%s) failed' % (temp_name, s1, local_name, s2, delta) )
                elif compare == "contains":
                    self.files_contains( local_name, temp_name, attributes=attributes )
                else:
                    raise Exception( 'Unimplemented Compare type: %s' % compare )
                if extra_files:
                    self.verify_extra_files_content( extra_files, hda_id, shed_tool_id=shed_tool_id, dataset_fetcher=dataset_fetcher )
            except AssertionError, err:
                errmsg = 'History item %s different than expected, difference (using %s):\n' % ( hid, compare )
                errmsg += "( %s v. %s )\n" % ( local_name, temp_name )
                errmsg += str( err )
                raise AssertionError( errmsg )
            finally:
                if 'GALAXY_TEST_NO_CLEANUP' not in os.environ:
                    os.remove( temp_name )

    def __default_dataset_fetcher( self ):
        def fetcher( hda_id, filename=None ):
            if filename is None:
                page_url = "display?encoded_id=%s" % hda_id
                self.home()  # I assume this is not needed.
            else:
                page_url = "datasets/%s/display/%s" % ( hda_id, filename )
            self.visit_page( page_url )
            data = self.last_page()
            return data

        return fetcher

    def _bam_to_sam( self, local_name, temp_name ):
        temp_local = tempfile.NamedTemporaryFile( suffix='.sam', prefix='local_bam_converted_to_sam_' )
        fd, temp_temp = tempfile.mkstemp( suffix='.sam', prefix='history_bam_converted_to_sam_' )
        os.close( fd )
        p = subprocess.Popen( args='samtools view -h -o "%s" "%s"' % ( temp_local.name, local_name  ), shell=True )
        assert not p.wait(), 'Converting local (test-data) bam to sam failed'
        p = subprocess.Popen( args='samtools view -h -o "%s" "%s"' % ( temp_temp, temp_name  ), shell=True )
        assert not p.wait(), 'Converting history bam to sam failed'
        os.remove( temp_name )
        return temp_local, temp_temp

    def verify_extra_files_content( self, extra_files, hda_id, dataset_fetcher, shed_tool_id=None ):
        files_list = []
        for extra_type, extra_value, extra_name, extra_attributes in extra_files:
            if extra_type == 'file':
                files_list.append( ( extra_name, extra_value, extra_attributes ) )
            elif extra_type == 'directory':
                for filename in os.listdir( self.get_filename( extra_value, shed_tool_id=shed_tool_id ) ):
                    files_list.append( ( filename, os.path.join( extra_value, filename ), extra_attributes ) )
            else:
                raise ValueError( 'unknown extra_files type: %s' % extra_type )
        for filename, filepath, attributes in files_list:
            self.verify_composite_datatype_file_content( filepath, hda_id, base_name=filename, attributes=attributes, dataset_fetcher=dataset_fetcher, shed_tool_id=shed_tool_id )

    def verify_composite_datatype_file_content( self, file_name, hda_id, base_name=None, attributes=None, dataset_fetcher=None, shed_tool_id=None ):
        dataset_fetcher = dataset_fetcher or self.__default_dataset_fetcher()
        local_name = self.get_filename( file_name, shed_tool_id=shed_tool_id )
        if base_name is None:
            base_name = os.path.split(file_name)[-1]
        temp_name = self.makeTfname(fname=base_name)
        data = dataset_fetcher( hda_id, base_name )
        file( temp_name, 'wb' ).write( data )
        if self.keepOutdir > '':
            ofn = os.path.join(self.keepOutdir, base_name)
            shutil.copy(temp_name, ofn)
            log.debug('## GALAXY_TEST_SAVE=%s. saved %s' % (self.keepOutdir, ofn))
        try:
            if attributes is None:
                attributes = {}
            compare = attributes.get( 'compare', 'diff' )
            if compare == 'diff':
                self.files_diff( local_name, temp_name, attributes=attributes )
            elif compare == 're_match':
                self.files_re_match( local_name, temp_name, attributes=attributes )
            elif compare == 're_match_multiline':
                self.files_re_match_multiline( local_name, temp_name, attributes=attributes )
            elif compare == 'sim_size':
                delta = attributes.get('delta', '100')
                s1 = len(data)
                s2 = os.path.getsize(local_name)
                if abs(s1 - s2) > int(delta):
                    raise Exception( 'Files %s=%db but %s=%db - compare (delta=%s) failed' % (temp_name, s1, local_name, s2, delta) )
            else:
                raise Exception( 'Unimplemented Compare type: %s' % compare )
        except AssertionError, err:
            errmsg = 'Composite file (%s) of History item %s different than expected, difference (using %s):\n' % ( base_name, hda_id, compare )
            errmsg += str( err )
            raise AssertionError( errmsg )
        finally:
            if 'GALAXY_TEST_NO_CLEANUP' not in os.environ:
                os.remove( temp_name )

    def is_zipped( self, filename ):
        if not zipfile.is_zipfile( filename ):
            return False
        return True

    def is_binary( self, filename ):
        temp = open( filename, "U" )  # why is this not filename? Where did temp_name come from
        lineno = 0
        for line in temp:
            lineno += 1
            line = line.strip()
            if line:
                for char in line:
                    if ord( char ) > 128:
                        return True
            if lineno > 10:
                break
        return False

    def verify_genome_build( self, dbkey='hg17' ):
        """Verifies that the last used genome_build at history id 'hid' is as expected"""
        data_list = self.get_history_as_data_list()
        self.assertTrue( data_list )
        elems = [ elem for elem in data_list ]
        elem = elems[-1]
        genome_build = elem.get('dbkey')
        self.assertTrue( genome_build == dbkey )

    # Functions associated with user accounts
    def create( self, cntrller='user', email='test@bx.psu.edu', password='testuser', username='admin-user', redirect='' ):
        # HACK: don't use panels because late_javascripts() messes up the twill browser and it
        # can't find form fields (and hence user can't be logged in).
        self.visit_url( "%s/user/create?cntrller=%s&use_panels=False" % ( self.url, cntrller ) )
        tc.fv( 'registration', 'email', email )
        tc.fv( 'registration', 'redirect', redirect )
        tc.fv( 'registration', 'password', password )
        tc.fv( 'registration', 'confirm', password )
        tc.fv( 'registration', 'username', username )
        tc.submit( 'create_user_button' )
        previously_created = False
        username_taken = False
        invalid_username = False
        try:
            self.check_page_for_string( "Created new user account" )
        except:
            try:
                # May have created the account in a previous test run...
                self.check_page_for_string( "User with that email already exists" )
                previously_created = True
            except:
                try:
                    self.check_page_for_string( 'Public name is taken; please choose another' )
                    username_taken = True
                except:
                    try:
                        # Note that we're only checking if the usr name is >< 4 chars here...
                        self.check_page_for_string( 'Public name must be at least 4 characters in length' )…
Summary ✨

This is a Python script that automates various tasks related to Galaxy, a web-based platform for data analysis and management. The script uses the Twill library to interact with the Galaxy interface and perform actions such as creating libraries, datasets, and tags. It also includes methods for managing permissions, deleting items, and waiting for tools to finish running.
Tech Fingerprint

Alerts (6)

'def' Ensure functions have docstrings for documentation
44
Complexity hotspot; lines 118 to 120 (total complexity: 6)
118 119 120
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
1030 1034