eucalyptus.py | searchcode

/lib/galaxy/cloud/providers/eucalyptus.py

https://bitbucket.org/afgane/galaxy-central-cloud · Python · 1039 lines · 844 code · 27 blank · 168 comment · 68 complexity · 7dc733a90ef920d6a637fd832094d258 MD5 · raw file

import subprocess, threading, os, errno, time, datetime
from Queue import Queue, Empty
from datetime import datetime

from galaxy import model # Database interaction class
from galaxy.model import mapping
from galaxy.datatypes.data import nice_size
from galaxy.util.bunch import Bunch
from galaxy.cloud import UCIwrapper
from Queue import Queue
from sqlalchemy import or_, and_

import galaxy.eggs
galaxy.eggs.require("boto")
from boto.ec2.connection import EC2Connection
from boto.ec2.regioninfo import RegionInfo
import boto.exception
import boto

import logging
log = logging.getLogger( __name__ )

uci_states = Bunch(
    NEW_UCI = "newUCI",
    NEW = "new",
    CREATING = "creating",
    DELETING_UCI = "deletingUCI",
    DELETING = "deleting",
    SUBMITTED_UCI = "submittedUCI",
    SUBMITTED = "submitted",
    SHUTTING_DOWN_UCI = "shutting-downUCI",
    SHUTTING_DOWN = "shutting-down",
    ADD_STORAGE_UCI = "add-storageUCI",
    ADD_STORAGE = "add-storage",
    AVAILABLE = "available",
    RUNNING = "running",
    PENDING = "pending",
    ERROR = "error",
    DELETED = "deleted",
    SNAPSHOT_UCI = "snapshotUCI",
    SNAPSHOT = "snapshot"
)

instance_states = Bunch(
    TERMINATED = "terminated",
    SUBMITTED = "submitted",
    RUNNING = "running",
    ADDING = "adding-storage",
    PENDING = "pending",
    SHUTTING_DOWN = "shutting-down",
    ERROR = "error"
)

store_status = Bunch(
    WAITING = "waiting",
    IN_USE = "in-use",
    ADDING = "adding",
    CREATING = "creating",
    DELETED = 'deleted',
    ERROR = "error"
)

snapshot_status = Bunch(
    SUBMITTED = 'submitted',
    PENDING = 'pending',
    COMPLETED = 'completed',
    DELETE = 'delete',
    DELETED= 'deleted',
    ERROR = "error"
)

class EucalyptusCloudProvider( object ):
    """
    Eucalyptus-based cloud provider implementation for managing instances. 
    """
    STOP_SIGNAL = object()
    def __init__( self, app ):
        self.type = "eucalyptus" # cloud provider type (e.g., ec2, eucalyptus, opennebula)
        self.zone = "epc"
        self.queue = Queue()
        self.sa_session = app.model.context
        
        self.threads = []
        nworkers = 5
        log.info( "Starting eucalyptus cloud controller workers..." )
        for i in range( nworkers  ):
            worker = threading.Thread( target=self.run_next )
            worker.start()
            self.threads.append( worker )
        log.debug( "%d eucalyptus cloud workers ready", nworkers )
        
    def shutdown( self ):
        """Attempts to gracefully shut down the monitor thread"""
        log.info( "sending stop signal to worker threads in eucalyptus cloud manager" )
        for i in range( len( self.threads ) ):
            self.queue.put( self.STOP_SIGNAL )
        log.info( "eucalyptus cloud manager stopped" )
    
    def put( self, uci_wrapper ):
        """
        Add uci_wrapper object to the end of the request queue to be handled by 
        this cloud provider.
        """
        state = uci_wrapper.get_uci_state()
        uci_wrapper.change_state( state.split('U')[0] ) # remove 'UCI' from end of state description (i.e., mark as accepted and ready for processing)
        self.queue.put( uci_wrapper )
        
    def run_next( self ):
        """Process next request, waiting until one is available if necessary."""
        cnt = 0
        while 1:
            uci_wrapper = self.queue.get()
            uci_state = uci_wrapper.get_uci_state()
            if uci_state is self.STOP_SIGNAL:
                return
            try:
                if uci_state==uci_states.NEW:
                    self.create_uci( uci_wrapper )
                elif uci_state==uci_states.DELETING:
                    self.delete_uci( uci_wrapper )
                elif uci_state==uci_states.SUBMITTED:
                    self.start_uci( uci_wrapper )
                    #self.dummy_start_uci( uci_wrapper )
                elif uci_state==uci_states.SHUTTING_DOWN:
                    self.stop_uci( uci_wrapper )
                elif uci_state==uci_states.SNAPSHOT:
                    self.snapshot_uci( uci_wrapper )
                elif uci_state==uci_states.ADD_STORAGE:
                    self.add_storage_to_uci( uci_wrapper )
            except:
                log.exception( "Uncaught exception executing cloud request." )
            cnt += 1
            
    def get_connection( self, uci_wrapper ):
        """
        Establishes cloud connection using user's credentials associated with given UCI
        """
        log.debug( 'Establishing %s cloud connection.' % self.type )
        provider = uci_wrapper.get_provider()
        try:
            region = RegionInfo( None, provider.region_name, provider.region_endpoint )
        except Exception, ex:
            err = "Selecting region with cloud provider failed: " + str( ex )
            log.error( err )
            uci_wrapper.set_error( err, True )
            return None        
        try:
            conn = EC2Connection( aws_access_key_id=uci_wrapper.get_access_key(), 
                                  aws_secret_access_key=uci_wrapper.get_secret_key(), 
                                  is_secure=provider.is_secure, 
                                  port=provider.port, 
                                  region=region, 
                                  path=provider.path )
        except boto.exception.EC2ResponseError, e:
            err = "Establishing connection with cloud failed: " + str( e )
            log.error( err )
            uci_wrapper.set_error( err, True )
            return None
        
        return conn
        
    def check_key_pair( self, uci_wrapper, conn ):
        """
        Check if a key pair associated with this UCI exists on cloud provider.
        If yes, return key pair name; otherwise, generate a key pair with the cloud
        provider and, again, return key pair name.
        Key pair name for given UCI is generated from UCI's name and suffix '_kp' 
        """
        kp = None
        kp_name = uci_wrapper.get_name().replace(' ','_') + "_kp"
        log.debug( "Checking user's key pair: '%s'" % kp_name )
        try:
            kp = conn.get_key_pair( kp_name )
            uci_kp_name = uci_wrapper.get_key_pair_name()
            uci_material = uci_wrapper.get_key_pair_material()
            if kp != None:
                if kp.name != uci_kp_name or uci_material == None:
                    # key pair exists on the cloud but not in local database, so re-generate it (i.e., delete and then create)
                    try: 
                        conn.delete_key_pair( kp_name )
                        kp = self.create_key_pair( conn, kp_name )
                        uci_wrapper.set_key_pair( kp.name, kp.material )
                    except boto.exception.EC2ResponseError, e:
                        err = "EC2 response error while deleting key pair: " + str( e )
                        log.error( err )
                        uci_wrapper.set_error( err, True )
            else:
                try:
                    kp = self.create_key_pair( conn, kp_name )
                    uci_wrapper.set_key_pair( kp.name, kp.material )
                except boto.exception.EC2ResponseError, e:
                    err = "EC2 response error while creating key pair: " + str( e )
                    log.error( err )
                    uci_wrapper.set_error( err, True )
                except Exception, ex:
                    err = "Exception while creating key pair: " + str( ex )
                    log.error( err )
                    uci_wrapper.set_error( err, True )
        except boto.exception.EC2ResponseError, e: # No keypair under this name exists so create it
            if e.code == 'InvalidKeyPair.NotFound': 
                log.info( "No keypair found, creating keypair '%s'" % kp_name )
                kp = self.create_key_pair( conn, kp_name )
                uci_wrapper.set_key_pair( kp.name, kp.material )
            else:
                err = "EC2 response error while retrieving key pair: " + str( e )
                log.error( err )
                uci_wrapper.set_error( err, True )
                        
        if kp != None:
            return kp.name
        else:
            return None
    
    def create_key_pair( self, conn, kp_name ):
        """ Initiate creation of key pair under kp_name by current cloud provider. """
        try:
            return conn.create_key_pair( kp_name )
        except boto.exception.EC2ResponseError, e: 
            return None
    
    def get_mi_id( self, uci_wrapper, i_index ):
        """
        Get appropriate machine image (mi) ID based on instance type.
        """
        i_type = uci_wrapper.get_instance_type( i_index )
        if i_type=='m1.small' or i_type=='c1.medium':
            arch = 'i386'
        else:
            arch = 'x86_64' 
        
        mi = self.sa_session.query( model.CloudImage ).filter_by( deleted=False, provider_type=self.type, architecture=arch ).first()
        if mi:
            return mi.image_id
        else:
            err = "Machine image could not be retrieved"
            log.error( "%s for UCI '%s'." % (err, uci_wrapper.get_name() ) )
            uci_wrapper.set_error( err+". Contact site administrator to ensure needed machine image is registered.", True )
            return None
            
    def create_uci( self, uci_wrapper ):
        """ 
        Create User Configured Instance (UCI) - i.e., create storage volume on cloud provider
        and register relevant information in local Galaxy database.
        """
        conn = self.get_connection( uci_wrapper )
        
        # Because only 1 storage volume may be created at UCI config time, index of this storage volume in local Galaxy DB w.r.t
        # current UCI is 0; therefore, it can be referenced in following code
        log.info( "Creating volume in zone '%s'..." % uci_wrapper.get_uci_availability_zone() )
        if uci_wrapper.get_uci_availability_zone()=='':
            log.info( "Availability zone for UCI (i.e., storage volume) was not selected, using default zone: %s" % self.zone )
            uci_wrapper.set_store_availability_zone( self.zone )
        
#        log.debug( "Creating volume; using command: conn.create_volume( %s, '%s', snapshot=None )" % ( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone() ))
#        vol = conn.create_volume( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone(), snapshot=None )
#        uci_wrapper.set_store_volume_id( 0, vol.id ) 
        store = uci_wrapper.get_all_stores_in_status( store_status.ADDING )[0] # Because at UCI creation time only 1 storage volume can be created, reference it directly
        
        log.info( "Creating storage volume in zone '%s' of size '%s'..." % ( uci_wrapper.get_uci_availability_zone(), store.size ) )
        # Because only 1 storage volume may be created at UCI config time, index of this storage volume in local Galaxy DB w.r.t
        # current UCI is 0, so reference it in following methods
        vol = conn.create_volume( store.size, uci_wrapper.get_uci_availability_zone(), snapshot=None )
        uci_wrapper.set_store_volume_id( store.id, vol.id )
        
        # Retrieve created volume again to get updated status
        try:
            vl = conn.get_all_volumes( [vol.id] )
        except boto.exception.EC2ResponseError, e: 
            err = "EC2 response error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( e )
            log.error( err )
            uci_wrapper.set_store_status( vol.id, uci_states.ERROR )
            uci_wrapper.set_error( err, True )
            return
        except Exception, ex:
            err = "Error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( ex )
            log.error( err )
            uci_wrapper.set_error( err, True )
            return
        
        if len( vl ) > 0:
            # EPC does not allow creation of storage volumes (it deletes one as soon as it is created, so manually set uci_state here)
            if vl[0].status == store_status.DELETED:
                uci_wrapper.change_state( uci_state=uci_states.AVAILABLE )
            else:
                uci_wrapper.change_state( uci_state=vl[0].status )
            uci_wrapper.set_store_status( vol.id, vl[0].status )
        else:
            err = "Volume '" + vol.id +"' not found by EC2 after being created."
            log.error( err )
            uci_wrapper.set_store_status( vol.id, uci_states.ERROR )
            uci_wrapper.set_error( err, True )

    def delete_uci( self, uci_wrapper ):
        """ 
        Delete UCI - i.e., delete all storage volumes associated with this UCI. 
        NOTE that this implies deletion of any and all data associated
        with this UCI from the cloud. All data will be deleted.
        Information in local Galaxy database is marked as deleted but not actually removed
        from the database. 
        """
        conn = self.get_connection( uci_wrapper )
        vl = [] # volume list
        count = 0 # counter for checking if all volumes assoc. w/ UCI were deleted
        
        # Get all volumes assoc. w/ UCI, delete them from cloud as well as in local DB
        vl = uci_wrapper.get_all_stores()
        deletedList = []
        failedList = []
        for v in vl:
            log.debug( "Deleting volume with id='%s'" % v.volume_id )
            try:
                if conn.delete_volume( v.volume_id ):
                    deletedList.append( v.volume_id )
                    v.deleted = True
                    self.sa_session.add( v )
                    self.sa_session.flush()
                    count += 1
                else:
                    failedList.append( v.volume_id )
            except boto.exception.EC2ResponseError, e:
                err = "EC2 response error while deleting storage volume '" + v.volume_id + "': " + str( e )
                log.error( err )
                uci_wrapper.set_store_error( err, store_id = v.volume_id )
                uci_wrapper.set_error( err, True )
                
        # Delete UCI if all of associated 
        if count == len( vl ):
            uci_wrapper.set_deleted()
        else:
            err = "Deleting following volume(s) failed: "+ str( failedList )+". However, these volumes were successfully deleted: " \
                  + str( deletedList ) +". MANUAL intervention and processing needed."
            log.error( err )
            uci_wrapper.set_error( err, True )
            
    def snapshot_uci( self, uci_wrapper ):
        """
        Initiate creation of a snapshot by cloud provider for all storage volumes 
        associated with this UCI. 
        """
        if uci_wrapper.get_uci_state() != uci_states.ERROR:
            conn = self.get_connection( uci_wrapper )
            
            snapshots = uci_wrapper.get_snapshots( status = snapshot_status.SUBMITTED )
            for snapshot in snapshots:
                log.debug( "Snapshot DB id: '%s', volume id: '%s'" % ( snapshot.id, snapshot.store.volume_id ) )
                try:
                    snap = conn.create_snapshot( volume_id=snapshot.store.volume_id )
                    snap_id = str( snap ).split(':')[1]
                    uci_wrapper.set_snapshot_id( snapshot.id, snap_id )
                    sh = conn.get_all_snapshots( snap_id ) # get updated status
                    uci_wrapper.set_snapshot_status( status=sh[0].status, snap_id=snap_id )
                except boto.exception.EC2ResponseError, e:
                    err = "Cloud provider response error while creating snapshot: " + str( e )
                    log.error( err )
                    uci_wrapper.set_snapshot_error( error=err, snap_index=snapshot.id, set_status=True )
                    uci_wrapper.set_error( err, True )
                    return
                except Exception, ex:
                    err = "Error while creating snapshot: " + str( ex )
                    log.error( err )
                    uci_wrapper.set_snapshot_error( error=err, snap_index=snapshot.id, set_status=True )
                    uci_wrapper.set_error( err, True )
                    return
                    
            uci_wrapper.change_state( uci_state=uci_states.AVAILABLE )
        
#        if uci_wrapper.get_uci_state() != uci_states.ERROR:
#            
#            snapshots = uci_wrapper.get_snapshots( status = 'submitted' )
#            for snapshot in snapshots:
#                uci_wrapper.set_snapshot_id( snapshot.id, None, 'euca_error' )
#            
#            log.debug( "Eucalyptus snapshot attempted by user for UCI '%s'" % uci_wrapper.get_name() )
#            uci_wrapper.set_error( "Eucalyptus does not support creation of snapshots at this moment. No snapshot or other changes were performed. \
#                        Feel free to resent state of this instance and use it normally.", True )
            
            
    def add_storage_to_uci( self, uci_wrapper ):
        """ Adds more storage to specified UCI """
        uci_wrapper.set_error( "Adding storage to eucalyptus-based clouds is not yet supported.", True )
    
    def dummy_start_uci( self, uci_wrapper ):
        
        uci = uci_wrapper.get_uci()
        log.debug( "Would be starting instance '%s'" % uci.name )
#        uci_wrapper.change_state( uci_states.SUBMITTED_UCI )
#        log.debug( "Set UCI state to SUBMITTED_UCI" )
        log.debug( "Sleeping a bit... (%s)" % uci.name )
        time.sleep(10)
        log.debug( "Woke up! (%s)" % uci.name )
        
    def start_uci( self, uci_wrapper ):
        """
        Start instance(s) of given UCI on the cloud.  
        """ 
        if uci_wrapper.get_uci_state() != uci_states.ERROR:
            conn = self.get_connection( uci_wrapper )
            self.check_key_pair( uci_wrapper, conn )
            if uci_wrapper.get_key_pair_name() == None:
                err = "Key pair not found"
                log.error( "%s for UCI '%s'." % ( err, uci_wrapper.get_name() ) )
                uci_wrapper.set_error( err + ". Try resetting the state and starting the instance again.", True )
                return
            
            i_indexes = uci_wrapper.get_instances_indexes( state=instance_states.SUBMITTED ) # Get indexes of i_indexes associated with this UCI that are in 'submitted' state
            log.debug( "Starting instances with IDs: '%s' associated with UCI '%s' " % ( i_indexes, uci_wrapper.get_name(),  ) )
            if len( i_indexes ) > 0:
                for i_index in i_indexes:
                    # Get machine image for current instance
                    mi_id = self.get_mi_id( uci_wrapper, i_index )
                    log.debug( "mi_id: %s, uci_wrapper.get_key_pair_name(): %s" % ( mi_id, uci_wrapper.get_key_pair_name() ) )
                    uci_wrapper.set_mi( i_index, mi_id )
                               
                    if uci_wrapper.get_uci_state() != uci_states.ERROR:
                        # Start an instance
                        log.debug( "Starting UCI instance '%s'" % uci_wrapper.get_name() )
                        log.debug( "Using following command: conn.run_instances( image_id='%s', key_name='%s', instance_type='%s' )" 
                                   % ( mi_id, uci_wrapper.get_key_pair_name(), uci_wrapper.get_instance_type( i_index ) ) )
                        reservation = None
                        try:
                            reservation = conn.run_instances( image_id=mi_id, 
                                                              key_name=uci_wrapper.get_key_pair_name(),
                                                              instance_type=uci_wrapper.get_instance_type( i_index ) )
                        except boto.exception.EC2ResponseError, e:
                            err = "EC2 response error when starting UCI '"+ uci_wrapper.get_name() +"': " + str( e )
                            log.error( err )
                            uci_wrapper.set_error( err, True )
                        except Exception, ex:
                            err = "Error when starting UCI '" + uci_wrapper.get_name() + "': " + str( ex )
                            log.error( err )
                            uci_wrapper.set_error( err, True )
                        # Record newly available instance data into local Galaxy database
                        if reservation:
                            l_time = datetime.utcnow()
#                            uci_wrapper.set_instance_launch_time( self.format_time( reservation.instances[0].launch_time ), i_index=i_index )
                            uci_wrapper.set_instance_launch_time( l_time, i_index=i_index )
                            if not uci_wrapper.uci_launch_time_set():
                                uci_wrapper.set_uci_launch_time( l_time )
                            try:
                                uci_wrapper.set_reservation_id( i_index, str( reservation ).split(":")[1] )
                                # TODO: if more than a single instance will be started through single reservation, change this reference from element [0]
                                i_id = str( reservation.instances[0]).split(":")[1]
                                uci_wrapper.set_instance_id( i_index, i_id )
                                s = reservation.instances[0].state
                                uci_wrapper.change_state( s, i_id, s )
                                vol_id = uci_wrapper.get_store_volume_id( store_id=0 ) # TODO: Once more that one vol/UCI is allowed, update this!
                                uci_wrapper.set_store_status( vol_id, store_status.WAITING )
                                log.debug( "Instance of UCI '%s' started, current state: '%s'" % ( uci_wrapper.get_name(), uci_wrapper.get_uci_state() ) )
                            except boto.exception.EC2ResponseError, e:
                                err = "EC2 response error when retrieving instance information for UCI '" + uci_wrapper.get_name() + "': " + str( e )
                                log.error( err )
                                uci_wrapper.set_error( err, True )
                    else:
                        log.error( "UCI '%s' is in 'error' state, starting instance was aborted." % uci_wrapper.get_name() )
            else:
                err = "No instances in state '"+ instance_states.SUBMITTED +"' found for UCI '" + uci_wrapper.get_name() + \
                      "'. Nothing to start."
                log.error( err )
                uci_wrapper.set_error( err, True )
        else:
            log.error( "UCI '%s' is in 'error' state, starting instance was aborted." % uci_wrapper.get_name() )
        
    def stop_uci( self, uci_wrapper):
        """ 
        Stop all cloud instances associated with given UCI. 
        """
        conn = self.get_connection( uci_wrapper )
        
        # Get all instances associated with given UCI
        il = uci_wrapper.get_instances_ids() # instance list
        # Process list of instances and remove any references to empty instance id's
        for i in il:
            if i is None:
                il.remove( i )
        log.debug( 'List of instances being terminated: %s' % il )
        rl = conn.get_all_instances( il ) # Reservation list associated with given instances
                        
        # Initiate shutdown of all instances under given UCI
        cnt = 0
        stopped = []
        not_stopped = []
        for r in rl:
            for inst in r.instances:
                log.debug( "Sending stop signal to instance '%s' associated with reservation '%s' (UCI: %s)." % ( inst, r, uci_wrapper.get_name() ) )
                try:
                    inst.stop()
                    uci_wrapper.set_stop_time( datetime.utcnow(), i_id=inst.id )
                    uci_wrapper.change_state( instance_id=inst.id, i_state=inst.update() )
                    stopped.append( inst )
                except boto.exception.EC2ResponseError, e:
                    not_stopped.append( inst )
                    err = "EC2 response error when stopping instance '" + inst.instance_id + "': " + str( e )
                    log.error( err )
                    uci_wrapper.set_error( err, True )
                
        uci_wrapper.reset_uci_launch_time()
        log.debug( "Termination was initiated for all instances of UCI '%s'." % uci_wrapper.get_name() )

#        dbInstances = get_instances( trans, uci ) #TODO: handle list!
#        
#        # Get actual cloud instance object
#        cloudInstance = get_cloud_instance( conn, dbInstances.instance_id )
#        
#        # TODO: Detach persistent storage volume(s) from instance and update volume data in local database
#        stores = get_stores( trans, uci )
#        for i, store in enumerate( stores ):
#            log.debug( "Detaching volume '%s' to instance '%s'." % ( store.volume_id, dbInstances.instance_id ) )
#            mntDevice = store.device
#            volStat = None
##            Detaching volume does not work with Eucalyptus Public Cloud, so comment it out
##            try:
##                volStat = conn.detach_volume( store.volume_id, dbInstances.instance_id, mntDevice )
##            except:
##                log.debug ( 'Error detaching volume; still going to try and stop instance %s.' % dbInstances.instance_id )
#            store.attach_time = None
#            store.device = None
#            store.inst.instance_id = None
#            store.status = volStat
#            log.debug ( '***** volume status: %s' % volStat )
#        
#        # Stop the instance and update status in local database
#        cloudInstance.stop()
#        dbInstances.stop_time = datetime.utcnow()
#        while cloudInstance.state != 'terminated':
#            log.debug( "Stopping instance %s state; current state: %s" % ( str( cloudInstance ).split(":")[1], cloudInstance.state ) )
#            time.sleep(3)
#            cloudInstance.update()
#        dbInstances.state = cloudInstance.state
#        
#        # Reset relevant UCI fields
#        uci.state = 'available'
#        uci.launch_time = None
#          
#        # Persist
#        session = trans.sa_session
##        session.save_or_update( stores )
#        session.save_or_update( dbInstances ) # TODO: Is this going to work w/ multiple instances stored in dbInstances variable?
#        session.save_or_update( uci )
#        session.flush()
#        trans.log_event( "User stopped cloud instance '%s'" % uci.name )
#        trans.set_message( "Galaxy instance '%s' stopped." % uci.name )

    def update( self ):
        """ 
        Run status update on all instances that are in 'running', 'pending', or 'shutting-down' state.
        Run status update on all storage volumes whose status is 'in-use', 'creating', or 'None'.
        Run status update on all snapshots whose status is 'pending' or 'delete'  
        Run status update on any zombie UCIs, i.e., UCI's that is in 'submitted' state for an 
        extended period of time.
        
        Reason behind this method is to sync state of local DB and real-world resources
        """
        log.debug( "Running general status update for %s UCIs..." % self.type )
        # Update instances
        instances = self.sa_session.query( model.CloudInstance ) \
            .filter( or_( model.CloudInstance.table.c.state==instance_states.RUNNING, 
                          model.CloudInstance.table.c.state==instance_states.PENDING, 
                          model.CloudInstance.table.c.state==instance_states.SHUTTING_DOWN ) ) \
            .all()
        for inst in instances:
            if self.type == inst.uci.credentials.provider.type:
                log.debug( "[%s] Running general status update on instance '%s'" % ( inst.uci.credentials.provider.type, inst.instance_id ) )
                self.update_instance( inst )
        
        # Update storage volume(s)
        stores = self.sa_session.query( model.CloudStore ) \
            .filter( or_( model.CloudStore.table.c.status==store_status.IN_USE, 
                          model.CloudStore.table.c.status==store_status.CREATING,
                          model.CloudStore.table.c.status==store_status.WAITING,
                          model.CloudStore.table.c.status==None ) ) \
            .all()
        for store in stores:
            if self.type == store.uci.credentials.provider.type: # and store.volume_id != None:
                log.debug( "[%s] Running general status update on store with local database ID: '%s'" % ( store.uci.credentials.provider.type, store.id ) )
                self.update_store( store )
        
        # Update pending snapshots or delete ones marked for deletion
        snapshots = self.sa_session.query( model.CloudSnapshot ) \
            .filter( or_( model.CloudSnapshot.table.c.status == snapshot_status.PENDING, model.CloudSnapshot.table.c.status == snapshot_status.DELETE ) ) \
            .all()
        for snapshot in snapshots:
            if self.type == snapshot.uci.credentials.provider.type and snapshot.status == snapshot_status.PENDING:
                log.debug( "[%s] Running general status update on snapshot '%s'" % ( snapshot.uci.credentials.provider.type, snapshot.snapshot_id ) )
                self.update_snapshot( snapshot )
            elif self.type == snapshot.uci.credentials.provider.type and snapshot.status == snapshot_status.DELETE:
                log.debug( "[%s] Initiating deletion of snapshot '%s'" % ( snapshot.uci.credentials.provider.type, snapshot.snapshot_id ) )
                self.delete_snapshot( snapshot )
        
        # Attempt at updating any zombie UCIs (i.e., instances that have been in SUBMITTED state for longer than expected - see below for exact time)
        zombies = self.sa_session.query( model.UCI ).filter_by( state=uci_states.SUBMITTED ).all()
        for zombie in zombies:
            log.debug( "zombie UCI: %s" % zombie.name )
            z_instances = self.sa_session.query( model.CloudInstance ) \
                .filter( or_( model.CloudInstance.table.c.state != instance_states.TERMINATED,
                              model.CloudInstance.table.c.state == None ) ) \
                .all()
            for z_inst in z_instances:
                if self.type == z_inst.uci.credentials.provider.type:
#                    log.debug( "z_inst.id: '%s', state: '%s'" % ( z_inst.id, z_inst.state ) )
                    td = datetime.utcnow() - z_inst.update_time
#                    log.debug( "z_inst.id: %s, time delta is %s sec" % ( z_inst.id, td.seconds ) )
                    if td.seconds > 180: # if instance has been in SUBMITTED state for more than 3 minutes
                        log.debug( "[%s](td=%s) Running zombie repair update on instance with DB id '%s'" % ( z_inst.uci.credentials.provider.type, td.seconds, z_inst.id ) )
                        self.process_zombie( z_inst )
                
    def update_instance( self, inst ):
        """
        Update information in local database for given instance as it is obtained from cloud provider.
        Along with updating information about given instance, information about the UCI controlling
        this instance is also updated.
        """
        # Get credentials associated wit this instance
        uci_id = inst.uci_id
        uci = self.sa_session.query( model.UCI ).get( uci_id )
        self.sa_session.refresh( uci )
        conn = self.get_connection_from_uci( uci )
        
        # Get reservations handle for given instance
        try:
            rl= conn.get_all_instances( [inst.instance_id] )
        except boto.exception.EC2ResponseError, e:
            err = "Retrieving instance(s) from cloud failed for UCI '"+ uci.name +"' during general status update: " + str( e )
            log.error( err )
            uci.error = err
            uci.state = uci_states.ERROR
            self.sa_session.add( uci )
            self.sa_session.flush()
            return None

        # Because references to reservations are deleted shortly after instances have been terminated, getting an empty list as a response to a query
        # typically means the instance has successfully shut down but the check was not performed in short enough amount of time. Until an alternative solution
        # is found, below code sets state of given UCI to 'error' to indicate to the user something out of ordinary happened.
        if len( rl ) == 0:
            err = "Instance ID '"+inst.instance_id+"' was not found by the cloud provider. Instance might have crashed or otherwise been terminated."+ \
                "Manual check is recommended."
            log.error( err )
            inst.error = err
            uci.error = err
            inst.state = instance_states.TERMINATED
            uci.state = uci_states.ERROR
            uci.launch_time = None
            self.sa_session.add( inst )
            self.sa_session.add( uci )
            self.sa_session.flush()
        # Update instance status in local DB with info from cloud provider
        for r in rl:
            for i, cInst in enumerate( r.instances ):
                try:
                    s = cInst.update()
                    log.debug( "Checking state of cloud instance '%s' associated with UCI '%s' and reservation '%s'. State='%s'" % ( cInst, uci.name, r, s ) )
                    if  s != inst.state:
                        inst.state = s
                        self.sa_session.add( inst )
                        self.sa_session.flush()
                         # After instance has shut down, ensure UCI is marked as 'available'
                        if s == instance_states.TERMINATED and uci.state != uci_states.ERROR:
                            uci.state = uci_states.AVAILABLE
                            uci.launch_time = None
                            self.sa_session.add( uci )
                            self.sa_session.flush()
                    # Making sure state of UCI is updated. Once multiple instances become associated with single UCI, this will need to be changed.
                    if s != uci.state and s != instance_states.TERMINATED: 
                        uci.state = s                    
                        self.sa_session.add( uci )
                        self.sa_session.flush()
                    if cInst.public_dns_name != inst.public_dns:
                        inst.public_dns = cInst.public_dns_name
                        self.sa_session.add( inst )
                        self.sa_session.flush()
                    if cInst.private_dns_name != inst.private_dns:
                        inst.private_dns = cInst.private_dns_name
                        self.sa_session.add( inst )
                        self.sa_session.flush()
                except boto.exception.EC2ResponseError, e:
                    err = "Updating instance status from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
                    log.error( err )
                    uci.error = err
                    uci.state = uci_states.ERROR
                    self.sa_session.add( uci )
                    self.sa_session.flush()
                    return None
                
    def update_store( self, store ):
        """
        Update information in local database for given storage volume as it is obtained from cloud provider.
        Along with updating information about given storage volume, information about the UCI controlling
        this storage volume is also updated.
        """
        # Get credentials associated wit this store
        uci_id = store.uci_id
        uci = self.sa_session.query( model.UCI ).get( uci_id )
        self.sa_session.refresh( uci )
        conn = self.get_connection_from_uci( uci )
        
        if store.volume_id != None:
            # Get reservations handle for given store 
            try:
                log.debug( "Updating storage volume command: vl = conn.get_all_volumes( [%s] )" % store.volume_id )
                vl = conn.get_all_volumes( [store.volume_id] )
            except boto.exception.EC2ResponseError, e:
                err = "Retrieving volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
                log.error( err )
                uci.error = err
                uci.state = uci_states.ERROR
                self.sa_session.add( uci )
                self.sa_session.flush()
                return None
            
            # Update store status in local DB with info from cloud provider
            if len(vl) > 0:
                try:
                    log.debug( "Storage volume '%s' current status: '%s'" % (store.volume_id, vl[0].status ) )
                    if store.status != vl[0].status:
                        # In case something failed during creation of UCI but actual storage volume was created and yet 
                        #  UCI state remained as 'new', try to remedy this by updating UCI state here 
                        if ( store.status == None ) and ( store.volume_id != None ):
                            uci.state = vl[0].status
                            self.sa_session.add( uci )
                            self.sa_session.flush()
                        # If UCI was marked in state 'CREATING', update its status to reflect new status
                        elif ( uci.state == uci_states.CREATING ):
                            # Because Eucalyptus Public Cloud (EPC) deletes volumes immediately after they are created, artificially
                            # set status of given UCI to 'available' based on storage volume's availability zone (i.e., it's residing
                            # in EPC as opposed to some other Eucalyptus based cloud that allows creation of storage volumes.
                            if store.availability_zone == 'epc':
                                uci.state = uci_states.AVAILABLE
                            else:
                                uci.state = vl[0].status

                            self.sa_session.add( uci )
                            self.sa_session.flush()
                                
                        store.status = vl[0].status
                        self.sa_session.add( store )
                        self.sa_session.flush()
                    if store.inst != None:
                        if store.inst.instance_id != vl[0].instance_id:
                            store.inst.instance_id = vl[0].instance_id
                            self.sa_session.add( store )
                            self.sa_session.flush()
                    if store.attach_time != vl[0].attach_time:
                        store.attach_time = vl[0].attach_time
                        self.sa_session.add( store )
                        self.sa_session.flush()
                    if store.device != vl[0].device:
                        store.device = vl[0].device
                        self.sa_session.add( store )
                        self.sa_session.flush()
                except boto.exception.EC2ResponseError, e:
                    err = "Updating status of volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
                    log.error( err )
                    uci.error = err
                    uci.state = uci_states.ERROR
                    self.sa_session.add( uci )
                    self.sa_session.flush()
                    return None
            else:
                err = "No storage volumes returned by cloud provider on general update"
                log.error( "%s for UCI '%s'" % ( err, uci.name ) )
                store.status = store_status.ERROR
                store.error = err
                uci.error = err
                uci.state = uci_states.ERROR
                self.sa_session.add( uci )
                self.sa_session.add( store )
                self.sa_session.flush()
        else:
            err = "Missing storage volume ID in local database on general update. Manual check is needed to check " \
                  "if storage volume was actually created by cloud provider."
            log.error( "%s (for UCI '%s')" % ( err, uci.name ) )
            store.status = store_status.ERROR
            store.error = err
            uci.error = err
            uci.state = uci_states.ERROR
            self.sa_session.add( uci )
            self.sa_session.add( store )
            self.sa_session.flush()
   
    def update_snapshot( self, snapshot ):
        """
        Update information in local database for given snapshot as it is obtained from cloud provider.
        Along with updating information about given snapshot, information about the UCI controlling
        this snapshot is also updated.
        """
        # Get credentials associated wit this store
        uci_id = snapshot.uci_id
        uci = self.sa_session.query( model.UCI ).get( uci_id )
        self.sa_session.refresh( uci )
        conn = self.get_connection_from_uci( uci )
        
        try:
            log.debug( "Updating status of snapshot '%s'" % snapshot.snapshot_id )
            snap = conn.get_all_snapshots( [snapshot.snapshot_id] ) 
            if len( snap ) > 0:
                log.debug( "Snapshot '%s' status: %s" % ( snapshot.snapshot_id, snap[0].status ) )
                snapshot.status = snap[0].status
                self.sa_session.add( snapshot )
                self.sa_session.flush()
            else:
                err = "No snapshots returned by EC2 on general update"
                log.error( "%s for UCI '%s'" % ( err, uci.name ) )
                snapshot.status = snapshot_status.ERROR
                snapshot.error = err
                uci.error = err
                uci.state = uci_states.ERROR
                self.sa_session.add( uci )
                self.sa_session.add( snapshot )
                self.sa_session.flush()
        except boto.exception.EC2ResponseError, e:
            err = "EC2 response error while updating snapshot status: " + str( e )
            log.error( err )
            snapshot.status = snapshot_status.ERROR
            snapshot.error = err
            uci.error = err
            uci.state = uci_states.ERROR
            self.sa_session.add( uci )
            self.sa_session.add( snapshot )
            self.sa_session.flush()
        except Exception, ex:
            err = "Error while updating snapshot status: " + str( ex )
            log.error( err )
            snapshot.status = snapshot_status.ERROR
            snapshot.error = err
            uci.error = err
            uci.state = uci_states.ERROR
            self.sa_session.add( uci )
            self.sa_session.add( snapshot )
            self.sa_session.flush()
        
    def delete_snapshot( self, snapshot ):
        """
        Initiate deletion of given snapshot from cloud provider.
        """
        if snapshot.status == snapshot_status.DELETE:
            # Get credentials associated wit this store
            uci_id = snapshot.uci_id
            uci = self.sa_session.query( model.UCI ).get( uci_id )
            self.sa_session.refresh( uci )
            conn = self.get_connection_from_uci( uci )
            
            try:
                log.debug( "Deleting snapshot '%s'" % snapshot.snapshot_id )
                snap = conn.delete_snapshot( snapshot.snapshot_id )
                if snap == True:
                    snapshot.deleted = True
                    snapshot.status = snapshot_status.DELETED
                    self.sa_session.add( snapshot )
                    self.sa_session.flush()
                return snap
            except boto.exception.EC2ResponseError, e:
                err = "EC2 response error while deleting snapshot: " + str( e )
                log.error( err )
                snapshot.status = snapshot_status.ERROR
                snapshot.error = err
                uci.error = err
                uci.state = uci_states.ERROR
                self.sa_session.add( uci )
                self.sa_session.add( snapshot )
                self.sa_session.flush()
            except Exception, ex:
                err = "Error while deleting snapshot: " + str( ex )
                log.error( err )
                snapshot.status = snapshot_status.ERROR
                snapshot.error = err
                uci.error = err
                uci.state = uci_states.ERROR
                self.sa_session.add( uci )
                self.sa_session.add( snapshot )
                self.sa_session.flush()
        else:
            err = "Cannot delete snapshot '"+snapshot.snapshot_id+"' because its status is '"+snapshot.status+"'. Only snapshots with '" + \
                        snapshot_status.COMPLETED+"' status can be deleted."
            log.error( err )
            snapshot.error = err
            self.sa_session.add( snapshot )
            self.sa_session.flush()
            
    def process_zombie( self, inst ):
        """
        Attempt at discovering if starting a cloud instance was successful but local database was not updated
        accordingly or if something else failed and instance was never started. Currently, no automatic 
        repairs are being attempted; instead, appropriate error messages are set.
        """
        uci_id = inst.uci_id
        uci = self.sa_session.query( model.UCI ).get( uci_id )
        self.sa_session.refresh( uci )
        
        # Check if any instance-specific information was written to local DB; if 'yes', set instance and UCI's error message 
        # suggesting manual check.
        if inst.launch_time != None or inst.reservation_id != None or inst.instance_id != None:
            # Try to recover state - this is best-case effort, so if something does not work immediately, not
            # recovery steps are attempted. Recovery is based on hope that instance_id is available in local DB; if not,
            # report as error.
            # Fields attempting to be recovered are: reservation_id, instance status, and launch_time 
            if inst.instance_id != None:
                conn = self.get_connection_from_uci( uci )
                rl = conn.get_all_instances( [inst.instance_id] ) # reservation list
                # Update local DB with relevant data from instance
                if inst.reservation_id == None:
                    try:
                        inst.reservation_id = str(rl[0]).split(":")[1]
                    except: # something failed, so skip
                        pass
                
                try:
                    state = rl[0].instances[0].update()
                    inst.state = state
                    uci.state = state
                    self.sa_session.add( inst )
                    self.sa_session.add( uci )
                    self.sa_session.flush()
                except: # something failed, so skip
                    pass
                
                if inst.launch_time == None:
                    try:
                        launch_time = self.format_time( rl[0].instances[0].launch_time )
                        inst.launch_time = launch_time
                        self.sa_session.add( inst )
                        self.sa_session.flush() 
                        if inst.uci.launch_time == None:
                            uci.launch_time = launch_time
                            self.sa_session.add( uci )
                            self.sa_session.flush()
                    except: # something failed, so skip
                        pass
            else:
                err = "Starting a machine instance (DB id: '"+str(inst.id)+"') associated with this UCI '" + str(inst.uci.name) + \
                      "' seems to have failed. Because it appears that cloud instance might have gotten started, manual check is recommended."
                inst.error = err
                inst.state = instance_states.ERROR
                inst.uci.error = err
                inst.uci.state = uci_states.ERROR
                log.error( err )
                self.sa_session.add( inst )
                self.sa_session.add( uci )
                self.sa_session.flush()         
                
        else: #Instance most likely never got processed, so set error message suggesting user to try starting instance again.
            err = "Starting a machine instance (DB id: '"+str(inst.id)+"') associated with this UCI '" + str(inst.uci.name) + \
                  "' seems to have failed. Because it appears that cloud instance never got started, it should be safe to reset state and try " \
                  "starting the instance again."
            inst.error = err
            inst.state = instance_states.ERROR
            uci.error = err
            uci.state = uci_states.ERROR
            log.error( err )
            self.sa_session.add( inst )
            self.sa_session.add( uci )
            self.sa_session.flush()
#            uw = UCIwrapper( inst.uci )
#            log.debug( "Try automatically re-submitting UCI '%s'." % uw.get_name() )

    def get_connection_from_uci( self, uci ):
        """
        Establish and return connection to cloud provider. Information needed to do so is obtained
        directly from uci database object.
        """
        log.debug( 'Establishing %s cloud connection' % self.type )
        a_key = uci.credentials.access_key
        s_key = uci.credentials.secret_key
        # Get connection
        try:
            region = RegionInfo( None, uci.credentials.provider.region_name, uci.credentials.provider.region_endpoint )
#            log.debug( "[%s] Using following command to connect to cloud provider: "  
#                                "conn = EC2Connection( aws_access_key_id=%s, " 
#                                                      "aws_secret_access_key=%s, " 
#                                                      "port=%s, "
#                                                      "is_secure=%s, " 
#                                                      "region=region, "
#                                                      "path=%s )" % ( self.type, a_key, s_key, uci.credentials.provider.is_secure, uci.credentials.provider.port, uci.credentials.provider.path ) ) 
            conn = EC2Connection( aws_access_key_id=a_key, 
                                  aws_secret_access_key=s_key, 
                                  is_secure=uci.credentials.provider.is_secure,
                                  port=uci.credentials.provider.port,   
                                  region=region, 
                                  path=uci.credentials.provider.path )
        except boto.exception.EC2ResponseError, e:
            err = "Establishing connection with cloud failed: " + str( e )
            log.error( err )
            uci.error = err
            uci.state = uci_states.ERROR
            self.sa_session.add( uci )
            self.sa_session.flush()
            return None

        return conn
    
#    def updateUCI( self, uci ):
#        """ 
#        Runs a global status update on all storage volumes and all instances that are
#        associated with specified UCI
#        """
#        conn = self.get_connection( uci )
#        
#        # Update status of storage volumes
#        vl = model.CloudStore.filter( model.CloudInstance.table.c.uci_id == uci.id ).all()
#        vols = []
#        for v in vl:
#            vols.append( v.volume_id )
#        try:
#            volumes = conn.get_all_volumes( vols )
#            for i, v in enumerate( volumes ):
#                uci.store[i].inst.instance_id = v.instance_id
#                uci.store[i].status = v.status
#                uci.store[i].device = v.device
#                uci.store[i].flush()
#        except:
#            log.debug( "Error updating status of volume(s) associated with UCI '%s'. Status was not updated." % uci.name )
#            pass
#        
#        # Update status of instances
#        il = model.CloudInstance.filter_by( uci_id=uci.id ).filter( model.CloudInstance.table.c.state != 'terminated' ).all()
#        instanceList = []
#        for i in il:
#            instanceList.append( i.instance_id )
#        log.debug( 'instanceList: %s' % instanceList )
#        try:
#            reservations = conn.get_all_instances( instanceList )
#            for i, r in enumerate( reservations ):
#                uci.instance[i].state = r.instances[0].update()
#                log.debug('updating instance %s; status: %s' % ( uci.instance[i].instance_id, uci.instance[i].state ) )
#                uci.state = uci.instance[i].state
#                uci.instance[i].public_dns = r.instances[0].dns_name
#                uci.instance[i].private_dns = r.instances[0].private_dns_name
#                uci.instance[i].flush()
#                uci.flush()
#        except:
#            log.debug( "Error updating status of instances associated with UCI '%s'. Instance status was not updated." % uci.name )
#            pass
        
    # --------- Helper methods ------------
    
    def format_time( self, time ):
        dict = {'T':' ', 'Z':''}
        for i, j in dict.iteritems():
            time = time.replace(i, j)
        return time
Tech Fingerprint

Alerts (20)

'threading.Thread(' Use concurrent.futures for easier thread management
87
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
130 513 902 912 925 1008 1028
'== None' Use 'is' for None comparisons (e.g., x is None)
177 595 716 899 915 921
'time.sleep(' Avoid blocking; use threading.Timer or asyncio.sleep for non-blocking delays
526
'try:' Ensure try blocks have corresponding except or finally blocks
648 791
Complexity hotspot; lines 880 to 881 (total complexity: 5)
880 881
'def' Ensure functions have docstrings for documentation
1034