PageRenderTime 68ms CodeModel.GetById 22ms app.highlight 37ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/galaxy/cloud/providers/ec2.py

https://bitbucket.org/ajish/galaxy-omelogic
Python | 1033 lines | 1006 code | 14 blank | 13 comment | 59 complexity | d6f582a6b4ede3dc825c5f2ce22096b4 MD5 | raw file
   1import subprocess, threading, os, errno, time, datetime
   2from Queue import Queue, Empty
   3from datetime import datetime
   4
   5from galaxy import model # Database interaction class
   6from galaxy.model import mapping
   7from galaxy.datatypes.data import nice_size
   8from galaxy.util.bunch import Bunch
   9from galaxy.cloud import UCIwrapper
  10from Queue import Queue
  11from sqlalchemy import or_, and_
  12
  13import galaxy.eggs
  14galaxy.eggs.require("boto")
  15from boto.ec2.connection import EC2Connection
  16from boto.ec2.regioninfo import RegionInfo
  17import boto.exception
  18import boto
  19
  20import logging
  21log = logging.getLogger( __name__ )
  22
  23uci_states = Bunch(
  24    NEW_UCI = "newUCI",
  25    NEW = "new",
  26    CREATING = "creating",
  27    DELETING_UCI = "deletingUCI",
  28    DELETING = "deleting",
  29    SUBMITTED_UCI = "submittedUCI",
  30    SUBMITTED = "submitted",
  31    SHUTTING_DOWN_UCI = "shutting-downUCI",
  32    SHUTTING_DOWN = "shutting-down",
  33    AVAILABLE = "available",
  34    RUNNING = "running",
  35    PENDING = "pending",
  36    ERROR = "error",
  37    DELETED = "deleted",
  38    SNAPSHOT_UCI = "snapshotUCI",
  39    SNAPSHOT = "snapshot"
  40)
  41
  42instance_states = Bunch(
  43    TERMINATED = "terminated",
  44    SUBMITTED = "submitted",
  45    RUNNING = "running",
  46    PENDING = "pending",
  47    SHUTTING_DOWN = "shutting-down",
  48    ERROR = "error"
  49)
  50
  51store_status = Bunch(
  52    WAITING = "waiting",
  53    IN_USE = "in-use",
  54    CREATING = "creating",
  55    DELETED = 'deleted',
  56    ERROR = "error"
  57)
  58
  59snapshot_status = Bunch(
  60    SUBMITTED = 'submitted',
  61    PENDING = 'pending',
  62    COMPLETED = 'completed',
  63    DELETE = 'delete',
  64    DELETED= 'deleted',
  65    ERROR = "error"
  66)
  67
  68class EC2CloudProvider( object ):
  69    """
  70    Amazon EC2-based cloud provider implementation for managing instances. 
  71    """
  72    STOP_SIGNAL = object()
  73    def __init__( self, app ):
  74        self.type = "ec2" # cloud provider type (e.g., ec2, eucalyptus, opennebula)
  75        self.zone = "us-east-1a"
  76        self.security_group = "galaxyWeb"
  77        self.queue = Queue()
  78        self.sa_session = app.model.context
  79        
  80        self.threads = []
  81        nworkers = 5
  82        log.info( "Starting EC2 cloud controller workers..." )
  83        for i in range( nworkers  ):
  84            worker = threading.Thread( target=self.run_next )
  85            worker.start()
  86            self.threads.append( worker )
  87        log.debug( "%d EC2 cloud workers ready", nworkers )
  88        
  89    def shutdown( self ):
  90        """Attempts to gracefully shut down the monitor thread"""
  91        log.info( "sending stop signal to worker threads in EC2 cloud manager" )
  92        for i in range( len( self.threads ) ):
  93            self.queue.put( self.STOP_SIGNAL )
  94        log.info( "EC2 cloud manager stopped" )
  95    
  96    def put( self, uci_wrapper ):
  97        """
  98        Add uci_wrapper object to the end of the request queue to be handled by 
  99        this cloud provider.
 100        """
 101        state = uci_wrapper.get_uci_state()
 102        uci_wrapper.change_state( state.split('U')[0] ) # remove 'UCI' from end of state description (i.e., mark as accepted and ready for processing)
 103        self.queue.put( uci_wrapper )
 104        
 105    def run_next( self ):
 106        """Process next request, waiting until one is available if necessary."""
 107        cnt = 0
 108        while 1:
 109            
 110            uci_wrapper = self.queue.get()
 111            uci_state = uci_wrapper.get_uci_state()
 112            if uci_state is self.STOP_SIGNAL:
 113                return
 114            try:
 115                if uci_state==uci_states.NEW:
 116                    self.create_uci( uci_wrapper )
 117                elif uci_state==uci_states.DELETING:
 118                    self.delete_uci( uci_wrapper )
 119                elif uci_state==uci_states.SUBMITTED:
 120                    self.start_uci( uci_wrapper )
 121                elif uci_state==uci_states.SHUTTING_DOWN:
 122                    self.stop_uci( uci_wrapper )
 123                elif uci_state==uci_states.SNAPSHOT:
 124                    self.snapshot_uci( uci_wrapper )
 125            except:
 126                log.exception( "Uncaught exception executing cloud request." )
 127            cnt += 1
 128            
 129    def get_connection( self, uci_wrapper ):
 130        """
 131        Establishes cloud connection using user's credentials associated with given UCI
 132        """
 133        log.debug( 'Establishing %s cloud connection.' % self.type )
 134        provider = uci_wrapper.get_provider()
 135        try:
 136            region = RegionInfo( None, provider.region_name, provider.region_endpoint )
 137        except Exception, ex:
 138            err = "Selecting region with cloud provider failed: " + str( ex )
 139            log.error( err )
 140            uci_wrapper.set_error( err, True )
 141            return None
 142        try:
 143            conn = EC2Connection( aws_access_key_id=uci_wrapper.get_access_key(), 
 144                                  aws_secret_access_key=uci_wrapper.get_secret_key(), 
 145                                  is_secure=provider.is_secure, 
 146                                  region=region, 
 147                                  path=provider.path )
 148        except boto.exception.EC2ResponseError, e:
 149            err = "Establishing connection with cloud failed: " + str( e )
 150            log.error( err )
 151            uci_wrapper.set_error( err, True )
 152            return None
 153        
 154        return conn
 155        
 156    def check_key_pair( self, uci_wrapper, conn ):
 157        """
 158        Check if a key pair associated with this UCI exists on cloud provider.
 159        If yes, return key pair name; otherwise, generate a key pair with the cloud
 160        provider and, again, return key pair name.
 161        Key pair name for given UCI is generated from UCI's name and suffix '_kp' 
 162        """
 163        kp = None
 164        kp_name = uci_wrapper.get_name().replace(' ','_') + "_kp"
 165        log.debug( "Checking user's key pair: '%s'" % kp_name )
 166        try:
 167            kp = conn.get_key_pair( kp_name )
 168            uci_kp_name = uci_wrapper.get_key_pair_name()
 169            uci_material = uci_wrapper.get_key_pair_material()
 170            if kp != None:
 171                if kp.name != uci_kp_name or uci_material == None:
 172                    # key pair exists on the cloud but not in local database, so re-generate it (i.e., delete and then create)
 173                    try: 
 174                        conn.delete_key_pair( kp_name )
 175                        kp = self.create_key_pair( conn, kp_name )
 176                        uci_wrapper.set_key_pair( kp.name, kp.material )
 177                    except boto.exception.EC2ResponseError, e:
 178                        err = "EC2 response error while deleting key pair: " + str( e )
 179                        log.error( err )
 180                        uci_wrapper.set_error( err, True )
 181            else:
 182                try:
 183                    kp = self.create_key_pair( conn, kp_name )
 184                    uci_wrapper.set_key_pair( kp.name, kp.material )
 185                except boto.exception.EC2ResponseError, e:
 186                    err = "EC2 response error while creating key pair: " + str( e )
 187                    log.error( err )
 188                    uci_wrapper.set_error( err, True )
 189                except Exception, ex:
 190                    err = "Exception while creating key pair: " + str( ex )
 191                    log.error( err )
 192                    uci_wrapper.set_error( err, True )             
 193        except boto.exception.EC2ResponseError, e: # No keypair under this name exists so create it
 194            if e.code == 'InvalidKeyPair.NotFound': 
 195                log.info( "No keypair found, creating keypair '%s'" % kp_name )
 196                kp = self.create_key_pair( conn, kp_name )
 197                uci_wrapper.set_key_pair( kp.name, kp.material )
 198            else:
 199                err = "EC2 response error while retrieving key pair: " + str( e )
 200                log.error( err )
 201                uci_wrapper.set_error( err, True )
 202                        
 203        if kp != None:
 204            return kp.name
 205        else:
 206            return None
 207    
 208    def create_key_pair( self, conn, kp_name ):
 209        """ Initiate creation of key pair under kp_name by current cloud provider. """
 210        try:
 211            return conn.create_key_pair( kp_name )
 212        except boto.exception.EC2ResponseError, e: 
 213            return None
 214    
 215    def get_mi_id( self, uci_wrapper, i_index ):
 216        """
 217        Get appropriate machine image (mi) based on instance size.
 218        """
 219        i_type = uci_wrapper.get_instance_type( i_index )
 220        if i_type=='m1.small' or i_type=='c1.medium':
 221            arch = 'i386'
 222        else:
 223            arch = 'x86_64' 
 224        
 225        mi = self.sa_session.query( model.CloudImage ).filter_by( deleted=False, provider_type=self.type, architecture=arch ).first()
 226        if mi:
 227            return mi.image_id
 228        else:
 229            err = "Machine image could not be retrieved"
 230            log.error( "%s for UCI '%s'." % (err, uci_wrapper.get_name() ) )
 231            uci_wrapper.set_error( err+". Contact site administrator to ensure needed machine image is registered.", True )
 232            return None
 233            
 234    def create_uci( self, uci_wrapper ):
 235        """ 
 236        Create User Configured Instance (UCI) - i.e., create storage volume on cloud provider
 237        and register relevant information in local Galaxy database.
 238        """
 239        conn = self.get_connection( uci_wrapper )
 240        if uci_wrapper.get_uci_availability_zone()=='':
 241            log.info( "Availability zone for UCI (i.e., storage volume) was not selected, using default zone: %s" % self.zone )
 242            uci_wrapper.set_store_availability_zone( self.zone )
 243        
 244        log.info( "Creating volume in zone '%s'..." % uci_wrapper.get_uci_availability_zone() )
 245        # Because only 1 storage volume may be created at UCI config time, index of this storage volume in local Galaxy DB w.r.t
 246        # current UCI is 0, so reference it in following methods
 247        vol = conn.create_volume( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone(), snapshot=None )
 248        uci_wrapper.set_store_volume_id( 0, vol.id )
 249        
 250        # Wait for a while to ensure volume was created
 251#        vol_status = vol.status
 252#        for i in range( 30 ):
 253#            if vol_status is not "available":
 254#                log.debug( 'Updating volume status; current status: %s' % vol_status )
 255#                vol_status = vol.status
 256#                time.sleep(3)
 257#            if i is 29:
 258#                log.debug( "Error while creating volume '%s'; stuck in state '%s'; deleting volume." % ( vol.id, vol_status ) )
 259#                conn.delete_volume( vol.id )
 260#                uci_wrapper.change_state( uci_state='error' )
 261#                return
 262        
 263        # Retrieve created volume again to get updated status
 264        try:
 265            vl = conn.get_all_volumes( [vol.id] )
 266        except boto.exception.EC2ResponseError, e: 
 267            err = "EC2 response error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( e )
 268            log.error( err )
 269            uci_wrapper.set_store_status( vol.id, uci_states.ERROR )
 270            uci_wrapper.set_error( err, True )
 271            return
 272        except Exception, ex:
 273            err = "Error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( ex )
 274            log.error( err )
 275            uci_wrapper.set_error( err, True )
 276            return
 277        
 278        if len( vl ) > 0:
 279            uci_wrapper.change_state( uci_state=vl[0].status )
 280            uci_wrapper.set_store_status( vol.id, vl[0].status )
 281        else:
 282            err = "Volume '" + vol.id +"' not found by EC2 after being created."
 283            log.error( err )
 284            uci_wrapper.set_store_status( vol.id, uci_states.ERROR )
 285            uci_wrapper.set_error( err, True )
 286
 287    def delete_uci( self, uci_wrapper ):
 288        """ 
 289        Delete UCI - i.e., delete all storage volumes associated with this UCI. 
 290        NOTE that this implies deletion of any and all data associated
 291        with this UCI from the cloud. All data will be deleted.
 292        Information in local Galaxy database is marked as deleted but not actually removed
 293        from the database. 
 294        """
 295        conn = self.get_connection( uci_wrapper )
 296        vl = [] # volume list
 297        count = 0 # counter for checking if all volumes assoc. w/ UCI were deleted
 298        
 299        # Get all volumes assoc. w/ UCI, delete them from cloud as well as in local DB
 300        vl = uci_wrapper.get_all_stores()
 301        deletedList = []
 302        failedList = []
 303        for v in vl:
 304            log.debug( "Deleting volume with id='%s'" % v.volume_id )
 305            try:
 306                if conn.delete_volume( v.volume_id ):
 307                    deletedList.append( v.volume_id )
 308                    v.deleted = True
 309                    self.sa_session.add( v )
 310                    self.sa_session.flush()
 311                    count += 1
 312                else:
 313                    failedList.append( v.volume_id )
 314            except boto.exception.EC2ResponseError, e:
 315                err = "EC2 response error while deleting storage volume '" + v.volume_id + "': " + str( e )
 316                log.error( err )
 317                uci_wrapper.set_store_error( err, store_id = v.volume_id )
 318                uci_wrapper.set_error( err, True )
 319            
 320        # Delete UCI if all of associated 
 321        if count == len( vl ):
 322            uci_wrapper.set_deleted()
 323        else:
 324            err = "Deleting following volume(s) failed: " + str( failedList ) + ". However, these volumes were successfully deleted: " \
 325                  + str( deletedList ) + ". MANUAL intervention and processing needed."
 326            log.error( err )
 327            uci_wrapper.set_error( err, True )
 328            
 329    def snapshot_uci( self, uci_wrapper ):
 330        """
 331        Initiate creation of a snapshot by cloud provider for all storage volumes 
 332        associated with this UCI. 
 333        """
 334        if uci_wrapper.get_uci_state() != uci_states.ERROR:
 335            conn = self.get_connection( uci_wrapper )
 336            
 337            snapshots = uci_wrapper.get_snapshots( status = snapshot_status.SUBMITTED )
 338            for snapshot in snapshots:
 339                log.debug( "Snapshot DB id: '%s', volume id: '%s'" % ( snapshot.id, snapshot.store.volume_id ) )
 340                try:
 341                    snap = conn.create_snapshot( volume_id=snapshot.store.volume_id )
 342                    snap_id = str( snap ).split(':')[1]
 343                    uci_wrapper.set_snapshot_id( snapshot.id, snap_id )
 344                    sh = conn.get_all_snapshots( snap_id ) # get updated status
 345                    uci_wrapper.set_snapshot_status( status=sh[0].status, snap_id=snap_id )
 346                except boto.exception.EC2ResponseError, e:
 347                    err = "EC2 response error while creating snapshot: " + str( e )
 348                    log.error( err )
 349                    uci_wrapper.set_snapshot_error( error=err, snap_index=snapshot.id, set_status=True )
 350                    uci_wrapper.set_error( err, True )
 351                    return
 352                except Exception, ex:
 353                    err = "Error while creating snapshot: " + str( ex )
 354                    log.error( err )
 355                    uci_wrapper.set_snapshot_error( error=err, snap_index=snapshot.id, set_status=True )
 356                    uci_wrapper.set_error( err, True )
 357                    return
 358                    
 359            uci_wrapper.change_state( uci_state=uci_states.AVAILABLE )
 360                
 361    def add_storage_to_uci( self, name ):
 362        """ Adds more storage to specified UCI 
 363        TODO"""
 364    
 365    def dummy_start_uci( self, uci_wrapper ):
 366        
 367        uci = uci_wrapper.get_uci()
 368        log.debug( "Would be starting instance '%s'" % uci.name )
 369        uci_wrapper.change_state( uci_state.PENDING )
 370#        log.debug( "Sleeping a bit... (%s)" % uci.name )
 371#        time.sleep(20)
 372#        log.debug( "Woke up! (%s)" % uci.name )
 373        
 374    def start_uci( self, uci_wrapper ):
 375        """
 376        Start instance(s) of given UCI on the cloud.  
 377        """ 
 378        if uci_wrapper.get_uci_state() != uci_states.ERROR:
 379             conn = self.get_connection( uci_wrapper )
 380             self.check_key_pair( uci_wrapper, conn )
 381             if uci_wrapper.get_key_pair_name() == None:
 382                err = "Key pair not found"
 383                log.error( "%s for UCI '%s'." % ( err, uci_wrapper.get_name() ) )
 384                uci_wrapper.set_error( err + ". Try resetting the state and starting the instance again.", True )
 385                return
 386             
 387             i_indexes = uci_wrapper.get_instances_indexes( state=instance_states.SUBMITTED ) # Get indexes of i_indexes associated with this UCI that are in 'submitted' state
 388             log.debug( "Starting instances with IDs: '%s' associated with UCI '%s' " % ( i_indexes, uci_wrapper.get_name(),  ) )
 389             if len( i_indexes ) > 0:
 390                 for i_index in i_indexes:
 391                    # Get machine image for current instance
 392                    mi_id = self.get_mi_id( uci_wrapper, i_index )
 393                    log.debug( "mi_id: %s, uci_wrapper.get_key_pair_name(): %s" % ( mi_id, uci_wrapper.get_key_pair_name() ) )
 394                    uci_wrapper.set_mi( i_index, mi_id )
 395                    
 396                    if mi_id != None:
 397                        # Check if galaxy security group exists (and create it if it does not)
 398                        log.debug( "Setting up '%s' security group." % self.security_group )
 399                        try:
 400                            conn.get_all_security_groups( [self.security_group] ) # security groups
 401                        except boto.exception.EC2ResponseError, e:
 402                            if e.code == 'InvalidGroup.NotFound': 
 403                                log.info( "No security group found, creating security group '%s'" % self.security_group )
 404                                try:
 405                                    gSecurityGroup = conn.create_security_group(self.security_group, 'Security group for Galaxy.')
 406                                    gSecurityGroup.authorize( 'tcp', 80, 80, '0.0.0.0/0' ) # Open HTTP port
 407                                    gSecurityGroup.authorize( 'tcp', 22, 22, '0.0.0.0/0' ) # Open SSH port
 408                                except boto.exception.EC2ResponseError, ee:
 409                                    err = "EC2 response error while creating security group: " + str( ee )
 410                                    log.error( err )
 411                                    uci_wrapper.set_error( err, True )
 412                            else:
 413                                err = "EC2 response error while retrieving security group: " + str( e )
 414                                log.error( err )
 415                                uci_wrapper.set_error( err, True )
 416                    
 417                        
 418                        if uci_wrapper.get_uci_state() != uci_states.ERROR:
 419                            # Start an instance
 420                            log.debug( "Starting instance for UCI '%s'" % uci_wrapper.get_name() )
 421                            #TODO: Once multiple volumes can be attached to a single instance, update 'userdata' composition            
 422                            userdata = uci_wrapper.get_store_volume_id()+"|"+uci_wrapper.get_access_key()+"|"+uci_wrapper.get_secret_key() 
 423                            log.debug( "Using following command: conn.run_instances( image_id='%s', key_name='%s', security_groups=['%s'], user_data=[OMITTED], instance_type='%s', placement='%s' )" 
 424                                       % ( mi_id, uci_wrapper.get_key_pair_name(), self.security_group, uci_wrapper.get_instance_type( i_index ), uci_wrapper.get_uci_availability_zone() ) )
 425                            reservation = None
 426                            try:
 427                                reservation = conn.run_instances( image_id=mi_id, 
 428                                                                  key_name=uci_wrapper.get_key_pair_name(), 
 429                                                                  security_groups=[self.security_group], 
 430                                                                  user_data=userdata,
 431                                                                  instance_type=uci_wrapper.get_instance_type( i_index ),  
 432                                                                  placement=uci_wrapper.get_uci_availability_zone() )
 433                            except boto.exception.EC2ResponseError, e:
 434                                err = "EC2 response error when starting UCI '"+ uci_wrapper.get_name() +"': " + str( e )
 435                                log.error( err )
 436                                uci_wrapper.set_error( err, True )
 437                            except Exception, ex:
 438                                err = "Error when starting UCI '" + uci_wrapper.get_name() + "': " + str( ex )
 439                                log.error( err )
 440                                uci_wrapper.set_error( err, True )
 441                            # Record newly available instance data into local Galaxy database
 442                            if reservation:
 443                                l_time = datetime.utcnow()
 444    #                            uci_wrapper.set_instance_launch_time( self.format_time( reservation.instances[0].launch_time ), i_index=i_index )
 445                                uci_wrapper.set_instance_launch_time( l_time, i_index=i_index )
 446                                if not uci_wrapper.uci_launch_time_set():
 447                                    uci_wrapper.set_uci_launch_time( l_time )
 448                                try:
 449                                    uci_wrapper.set_reservation_id( i_index, str( reservation ).split(":")[1] )
 450                                    # TODO: if more than a single instance will be started through single reservation, change this reference to element [0]
 451                                    i_id = str( reservation.instances[0]).split(":")[1] 
 452                                    uci_wrapper.set_instance_id( i_index, i_id )
 453                                    s = reservation.instances[0].state 
 454                                    uci_wrapper.change_state( s, i_id, s )
 455                                    uci_wrapper.set_security_group_name( self.security_group, i_id=i_id )
 456                                    vol_id = uci_wrapper.get_store_volume_id( store_id=0 ) # TODO: Once more that one vol/UCI is allowed, update this!
 457                                    uci_wrapper.set_store_status( vol_id, store_status.WAITING )
 458                                    log.debug( "Instance of UCI '%s' started, current state: '%s'" % ( uci_wrapper.get_name(), uci_wrapper.get_uci_state() ) )
 459                                except boto.exception.EC2ResponseError, e:
 460                                    err = "EC2 response error when retrieving instance information for UCI '" + uci_wrapper.get_name() + "': " + str( e )
 461                                    log.error( err )
 462                                    uci_wrapper.set_error( err, True )
 463                        else:
 464                            log.error( "UCI '%s' is in 'error' state, starting instance was aborted." % uci_wrapper.get_name() )
 465             else:
 466                err = "No instances in state '"+ instance_states.SUBMITTED +"' found for UCI '" + uci_wrapper.get_name() + \
 467                      "'. Nothing to start."
 468                log.error( err )
 469                uci_wrapper.set_error( err, True )
 470        else:
 471            log.error( "UCI '%s' is in 'error' state, starting instance was aborted." % uci_wrapper.get_name() )
 472                    
 473    def stop_uci( self, uci_wrapper):
 474        """ 
 475        Stop all of cloud instances associated with given UCI. 
 476        """
 477        conn = self.get_connection( uci_wrapper )
 478        
 479        # Get all instances associated with given UCI
 480        il = uci_wrapper.get_instances_ids() # instance list
 481        # Process list of instances and remove any references to empty instance id's
 482        for i in il:
 483            if i is None:
 484                il.remove( i )
 485        log.debug( 'List of instances being terminated: %s' % il )
 486        rl = conn.get_all_instances( il ) # Reservation list associated with given instances
 487        
 488        # Initiate shutdown of all instances under given UCI
 489        cnt = 0
 490        stopped = []
 491        not_stopped = []
 492        for r in rl:
 493            for inst in r.instances:
 494                log.debug( "Sending stop signal to instance '%s' associated with reservation '%s'." % ( inst, r ) )
 495                try:
 496                    inst.stop()
 497                    uci_wrapper.set_stop_time( datetime.utcnow(), i_id=inst.id )
 498                    uci_wrapper.change_state( instance_id=inst.id, i_state=inst.update() )
 499                    stopped.append( inst )
 500                except boto.exception.EC2ResponseError, e:
 501                    not_stopped.append( inst )
 502                    err = "EC2 response error when stopping instance '" + inst.instance_id + "': " + str(e)
 503                    log.error( err )
 504                    uci_wrapper.set_error( err, True )
 505                
 506        uci_wrapper.reset_uci_launch_time()
 507        log.debug( "Termination was initiated for all instances of UCI '%s'." % uci_wrapper.get_name() )
 508
 509
 510#        dbInstances = get_instances( trans, uci ) #TODO: handle list!
 511#        
 512#        # Get actual cloud instance object
 513#        cloudInstance = get_cloud_instance( conn, dbInstances.instance_id )
 514#        
 515#        # TODO: Detach persistent storage volume(s) from instance and update volume data in local database
 516#        stores = get_stores( trans, uci )
 517#        for i, store in enumerate( stores ):
 518#            log.debug( "Detaching volume '%s' to instance '%s'." % ( store.volume_id, dbInstances.instance_id ) )
 519#            mntDevice = store.device
 520#            volStat = None
 521##            Detaching volume does not work with Eucalyptus Public Cloud, so comment it out
 522##            try:
 523##                volStat = conn.detach_volume( store.volume_id, dbInstances.instance_id, mntDevice )
 524##            except:
 525##                log.debug ( 'Error detaching volume; still going to try and stop instance %s.' % dbInstances.instance_id )
 526#            store.attach_time = None
 527#            store.device = None
 528#            store.i_id = None
 529#            store.status = volStat
 530#            log.debug ( '***** volume status: %s' % volStat )
 531#   
 532#        
 533#        # Stop the instance and update status in local database
 534#        cloudInstance.stop()
 535#        dbInstances.stop_time = datetime.utcnow()
 536#        while cloudInstance.state != 'terminated':
 537#            log.debug( "Stopping instance %s state; current state: %s" % ( str( cloudInstance ).split(":")[1], cloudInstance.state ) )
 538#            time.sleep(3)
 539#            cloudInstance.update()
 540#        dbInstances.state = cloudInstance.state
 541#        
 542#        # Reset relevant UCI fields
 543#        uci.state = 'available'
 544#        uci.launch_time = None
 545#          
 546#        # Persist
 547#        session = trans.sa_session
 548##        session.save_or_update( stores )
 549#        session.save_or_update( dbInstances ) # TODO: Is this going to work w/ multiple instances stored in dbInstances variable?
 550#        session.save_or_update( uci )
 551#        session.flush()
 552#        trans.log_event( "User stopped cloud instance '%s'" % uci.name )
 553#        trans.set_message( "Galaxy instance '%s' stopped." % uci.name )
 554
 555    def update( self ):
 556        """ 
 557        Run status update on all instances that are in 'running', 'pending', or 'shutting-down' state.
 558        Run status update on all storage volumes whose status is 'in-use', 'creating', or 'None'.
 559        Run status update on all snapshots whose status is 'pending' or 'delete'  
 560        Run status update on any zombie UCIs, i.e., UCI's that is in 'submitted' state for an 
 561        extended period of time.
 562        
 563        Reason behind this method is to sync state of local DB and real-world resources
 564        """
 565        log.debug( "Running general status update for %s UCIs..." % self.type )
 566        # Update instances
 567        instances = self.sa_session.query( model.CloudInstance ) \
 568            .filter( or_( model.CloudInstance.table.c.state==instance_states.RUNNING, 
 569                          model.CloudInstance.table.c.state==instance_states.PENDING,  
 570                          model.CloudInstance.table.c.state==instance_states.SHUTTING_DOWN ) ) \
 571            .all()
 572        for inst in instances:
 573            if self.type == inst.uci.credentials.provider.type:
 574                log.debug( "[%s] Running general status update on instance '%s'" % ( inst.uci.credentials.provider.type, inst.instance_id ) )
 575                self.update_instance( inst )
 576            
 577        # Update storage volume(s)
 578        stores = self.sa_session.query( model.CloudStore ) \
 579            .filter( or_( model.CloudStore.table.c.status==store_status.IN_USE, 
 580                          model.CloudStore.table.c.status==store_status.CREATING,
 581                          model.CloudStore.table.c.status==store_status.WAITING,
 582                          model.CloudStore.table.c.status==None ) ) \
 583            .all()
 584        for store in stores:
 585            if self.type == store.uci.credentials.provider.type: # and store.volume_id != None:
 586                log.debug( "[%s] Running general status update on store with local database ID: '%s'" % ( store.uci.credentials.provider.type, store.id ) )
 587                self.update_store( store )
 588#            else:
 589#                log.error( "[%s] There exists an entry for UCI (%s) storage volume without an ID. Storage volume might have been created with "
 590#                           "cloud provider though. Manual check is recommended." % ( store.uci.credentials.provider.type, store.uci.name ) )
 591#                store.uci.error = "There exists an entry in local database for a storage volume without an ID. Storage volume might have been created " \
 592#                            "with cloud provider though. Manual check is recommended. After understanding what happened, local database entry for given " \
 593#                            "storage volume should be updated."
 594#                store.status = store_status.ERROR
 595#                store.uci.state = uci_states.ERROR
 596#                store.uci.flush()
 597#                store.flush()
 598        
 599        # Update pending snapshots or delete ones marked for deletion
 600        snapshots = self.sa_session.query( model.CloudSnapshot ) \
 601            .filter( or_( model.CloudSnapshot.table.c.status == snapshot_status.PENDING, model.CloudSnapshot.table.c.status == snapshot_status.DELETE ) ) \
 602            .all()
 603        for snapshot in snapshots:
 604            if self.type == snapshot.uci.credentials.provider.type and snapshot.status == snapshot_status.PENDING:
 605                log.debug( "[%s] Running general status update on snapshot '%s'" % ( snapshot.uci.credentials.provider.type, snapshot.snapshot_id ) )
 606                self.update_snapshot( snapshot )
 607            elif self.type == snapshot.uci.credentials.provider.type and snapshot.status == snapshot_status.DELETE:
 608                log.debug( "[%s] Initiating deletion of snapshot '%s'" % ( snapshot.uci.credentials.provider.type, snapshot.snapshot_id ) )
 609                self.delete_snapshot( snapshot )
 610             
 611        # Attempt at updating any zombie UCIs (i.e., instances that have been in SUBMITTED state for longer than expected - see below for exact time)
 612        zombies = self.sa_session.query( model.UCI ).filter_by( state=uci_states.SUBMITTED ).all()
 613        for zombie in zombies:
 614            z_instances = self.sa_session.query( model.CloudInstance ) \
 615                .filter_by( uci_id=zombie.id ) \
 616                .filter( or_( model.CloudInstance.table.c.state != instance_states.TERMINATED,
 617                              model.CloudInstance.table.c.state == None ) ) \
 618                .all()
 619            for z_inst in z_instances:
 620                if self.type == z_inst.uci.credentials.provider.type:
 621#                    log.debug( "z_inst.id: '%s', state: '%s'" % ( z_inst.id, z_inst.state ) )
 622                    td = datetime.utcnow() - z_inst.update_time
 623                    if td.seconds > 180: # if instance has been in SUBMITTED state for more than 3 minutes
 624                        log.debug( "[%s] Running zombie repair update on instance with DB id '%s'" % ( z_inst.uci.credentials.provider.type, z_inst.id ) )
 625                        self.process_zombie( z_inst )
 626        
 627    def update_instance( self, inst ):
 628        """
 629        Update information in local database for given instance as it is obtained from cloud provider.
 630        Along with updating information about given instance, information about the UCI controlling
 631        this instance is also updated.
 632        """
 633        # Get credentials associated wit this instance
 634        uci_id = inst.uci_id
 635        uci = self.sa_session.query( model.UCI ).get( uci_id )
 636        self.sa_session.refresh( uci )
 637        conn = self.get_connection_from_uci( uci )
 638        
 639        # Get reservations handle for given instance
 640        try:
 641            rl= conn.get_all_instances( [inst.instance_id] )
 642        except boto.exception.EC2ResponseError, e:
 643            err = "Retrieving instance(s) from cloud failed for UCI '"+ uci.name +"' during general status update: " + str( e )
 644            log.error( err )
 645            uci.error = err
 646            uci.state = uci_states.ERROR
 647            self.sa_session.add( uci )
 648            self.sa_session.flush()
 649            return None
 650
 651        # Because references to reservations are deleted shortly after instances have been terminated, getting an empty list as a response to a query
 652        # typically means the instance has successfully shut down but the check was not performed in short enough amount of time. Until an alternative solution
 653        # is found, below code sets state of given UCI to 'error' to indicate to the user something out of ordinary happened.
 654        if len( rl ) == 0:
 655            err = "Instance ID '"+inst.instance_id+"' was not found by the cloud provider. Instance might have crashed or otherwise been terminated."+ \
 656                "Manual check is recommended."
 657            log.error( err )
 658            inst.error = err
 659            uci.error = err
 660            inst.state = instance_states.TERMINATED
 661            uci.state = uci_states.ERROR
 662            uci.launch_time = None
 663            self.sa_session.add( inst )
 664            self.sa_session.add( uci )
 665            self.sa_session.flush()
 666        # Update instance status in local DB with info from cloud provider
 667        for r in rl:
 668            for i, cInst in enumerate( r.instances ):
 669                try:
 670                    s = cInst.update()
 671                    log.debug( "Checking state of cloud instance '%s' associated with UCI '%s' and reservation '%s'. State='%s'" % ( cInst, uci.name, r, s ) )
 672                    if  s != inst.state:
 673                        inst.state = s
 674                        self.sa_session.add( inst )
 675                        self.sa_session.flush()
 676                         # After instance has shut down, ensure UCI is marked as 'available'
 677                        if s == instance_states.TERMINATED and uci.state != uci_states.ERROR:
 678                            uci.state = uci_states.AVAILABLE
 679                            uci.launch_time = None
 680                            self.sa_session.add( uci )
 681                            self.sa_session.flush()
 682                    # Making sure state of UCI is updated. Once multiple instances become associated with single UCI, this will need to be changed.
 683                    if s != uci.state and s != instance_states.TERMINATED: 
 684                        uci.state = s                    
 685                        self.sa_session.add( uci )
 686                        self.sa_session.flush()
 687                    if cInst.public_dns_name != inst.public_dns:
 688                        inst.public_dns = cInst.public_dns_name
 689                        self.sa_session.add( inst )
 690                        self.sa_session.flush()
 691                    if cInst.private_dns_name != inst.private_dns:
 692                        inst.private_dns = cInst.private_dns_name
 693                        self.sa_session.add( inst )
 694                        self.sa_session.flush()
 695                except boto.exception.EC2ResponseError, e:
 696                    err = "Updating instance status from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
 697                    log.error( err )
 698                    uci.error = err
 699                    uci.state = uci_states.ERROR
 700                    self.sa_session.add( uci )
 701                    self.sa_session.flush()
 702                    return None
 703                
 704    def update_store( self, store ):
 705        """
 706        Update information in local database for given storage volume as it is obtained from cloud provider.
 707        Along with updating information about given storage volume, information about the UCI controlling
 708        this storage volume is also updated.
 709        """
 710        # Get credentials associated wit this store
 711        uci_id = store.uci_id
 712        uci = self.sa_session.query( model.UCI ).get( uci_id )
 713        self.sa_session.refresh( uci )
 714        conn = self.get_connection_from_uci( uci )
 715        
 716        # Get reservations handle for given store 
 717        try:
 718            log.debug( "Updating storage volume command: vl = conn.get_all_volumes( [%s] )" % store.volume_id )
 719            vl = conn.get_all_volumes( [store.volume_id] )
 720        except boto.exception.EC2ResponseError, e:
 721            err = "Retrieving volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
 722            log.error( err )
 723            uci.error = err
 724            uci.state = uci_states.ERROR
 725            self.sa_session.add( uci )
 726            self.sa_session.flush()
 727            return None
 728        
 729        # Update store status in local DB with info from cloud provider
 730        if len(vl) > 0:
 731            try:
 732                log.debug( "Storage volume '%s' current status: '%s'" % (store.volume_id, vl[0].status ) )
 733                if store.status != vl[0].status:
 734                    # In case something failed during creation of UCI but actual storage volume was created and yet 
 735                    #  UCI state remained as 'new', try to remedy this by updating UCI state here 
 736                    if ( store.status == None ) and ( store.volume_id != None ):
 737                        uci.state = vl[0].status
 738                        self.sa_session.add( uci )
 739                        self.sa_session.flush()
 740                    # If UCI was marked in state 'CREATING', update its status to reflect new status
 741                    elif ( uci.state == uci_states.CREATING ):
 742                        uci.state = vl[0].status
 743                        self.sa_session.add( uci )
 744                        self.sa_session.flush()
 745                            
 746                    store.status = vl[0].status
 747                    self.sa_session.add( store )
 748                    self.sa_session.flush()
 749                    if store.inst != None:
 750                        if store.inst.instance_id != vl[0].instance_id:
 751                            store.inst.instance_id = vl[0].instance_id
 752                            self.sa_session.add( store )
 753                            self.sa_session.flush()
 754                    if store.attach_time != vl[0].attach_time:
 755                        store.attach_time = vl[0].attach_time
 756                        self.sa_session.add( store )
 757                        self.sa_session.flush()
 758                    if store.device != vl[0].device:
 759                        store.device = vl[0].device
 760                        self.sa_session.add( store )
 761                        self.sa_session.flush()
 762            except boto.exception.EC2ResponseError, e:
 763                err = "Updating status of volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
 764                log.error( err )
 765                uci.error = err
 766                uci.state = uci_states.ERROR
 767                self.sa_session.add( uci )
 768                self.sa_session.flush()
 769                return None
 770        else:
 771            err = "No storage volumes returned by cloud provider on general update"
 772            log.error( "%s for UCI '%s'" % ( err, uci.name ) )
 773            store.status = store_status.ERROR
 774            store.error = err
 775            uci.error = err
 776            uci.state = uci_states.ERROR
 777            self.sa_session.add( uci )
 778            self.sa_session.add( store )
 779            self.sa_session.flush()
 780   
 781    def update_snapshot( self, snapshot ):
 782        """
 783        Update information in local database for given snapshot as it is obtained from cloud provider.
 784        Along with updating information about given snapshot, information about the UCI controlling
 785        this snapshot is also updated.
 786        """
 787        # Get credentials associated wit this store
 788        uci_id = snapshot.uci_id
 789        uci = self.sa_session.query( model.UCI ).get( uci_id )
 790        self.sa_session.refresh( uci )
 791        conn = self.get_connection_from_uci( uci )
 792        
 793        try:
 794            log.debug( "Updating status of snapshot '%s'" % snapshot.snapshot_id )
 795            snap = conn.get_all_snapshots( [snapshot.snapshot_id] ) 
 796            if len( snap ) > 0:
 797                log.debug( "Snapshot '%s' status: %s" % ( snapshot.snapshot_id, snap[0].status ) )
 798                snapshot.status = snap[0].status
 799                self.sa_session.add( snapshot )
 800                self.sa_session.flush()
 801            else:
 802                err = "No snapshots returned by EC2 on general update"
 803                log.error( "%s for UCI '%s'" % ( err, uci.name ) )
 804                snapshot.status = snapshot_status.ERROR
 805                snapshot.error = err
 806                uci.error = err
 807                uci.state = uci_states.ERROR
 808                self.sa_session.add( uci )
 809                self.sa_session.add( snapshot )
 810                self.sa_session.flush()
 811        except boto.exception.EC2ResponseError, e:
 812            err = "EC2 response error while updating snapshot status: " + str( e )
 813            log.error( err )
 814            snapshot.status = snapshot_status.ERROR
 815            snapshot.error = err
 816            uci.error = err
 817            uci.state = uci_states.ERROR
 818            self.sa_session.add( uci )
 819            self.sa_session.add( snapshot )
 820            self.sa_session.flush()
 821        except Exception, ex:
 822            err = "Error while updating snapshot status: " + str( ex )
 823            log.error( err )
 824            snapshot.status = snapshot_status.ERROR
 825            snapshot.error = err
 826            uci.error = err
 827            uci.state = uci_states.ERROR
 828            self.sa_session.add( uci )
 829            self.sa_session.add( snapshot )
 830            self.sa_session.flush()
 831        
 832    def delete_snapshot( self, snapshot ):
 833        """
 834        Initiate deletion of given snapshot from cloud provider.
 835        """
 836        if snapshot.status == snapshot_status.DELETE:
 837            # Get credentials associated wit this store
 838            uci_id = snapshot.uci_id
 839            uci = self.sa_session.query( model.UCI ).get( uci_id )
 840            self.sa_session.refresh( uci )
 841            conn = self.get_connection_from_uci( uci )
 842            
 843            try:
 844                log.debug( "Deleting snapshot '%s'" % snapshot.snapshot_id )
 845                snap = conn.delete_snapshot( snapshot.snapshot_id )
 846                if snap == True:
 847                    snapshot.deleted = True
 848                    snapshot.status = snapshot_status.DELETED
 849                    self.sa_session.add( snapshot )
 850                    self.sa_session.flush()
 851                return snap
 852            except boto.exception.EC2ResponseError, e:
 853                err = "EC2 response error while deleting snapshot: " + str( e )
 854                log.error( err )
 855                snapshot.status = snapshot_status.ERROR
 856                snapshot.error = err
 857                uci.error = err
 858                uci.state = uci_states.ERROR
 859                self.sa_session.add( uci )
 860                self.sa_session.add( snapshot )
 861                self.sa_session.flush()
 862            except Exception, ex:
 863                err = "Error while deleting snapshot: " + str( ex )
 864                log.error( err )
 865                snapshot.status = snapshot_status.ERROR
 866                snapshot.error = err
 867                uci.error = err
 868                uci.state = uci_states.ERROR
 869                self.sa_session.add( uci )
 870                self.sa_session.add( snapshot )
 871                self.sa_session.flush()
 872        else:
 873            err = "Cannot delete snapshot '"+snapshot.snapshot_id+"' because its status is '"+snapshot.status+"'. Only snapshots with '" + \
 874                        snapshot_status.COMPLETED+"' status can be deleted."
 875            log.error( err )
 876            snapshot.error = err
 877            self.sa_session.add( snapshot )
 878            self.sa_session.flush()
 879            
 880    def process_zombie( self, inst ):
 881        """
 882        Attempt at discovering if starting a cloud instance was successful but local database was not updated
 883        accordingly or if something else failed and instance was never started. Currently, no automatic 
 884        repairs are being attempted; instead, appropriate error messages are set.
 885        """
 886        uci_id = inst.uci_id
 887        uci = self.sa_session.query( model.UCI ).get( uci_id )
 888        self.sa_session.refresh( uci )
 889        
 890        # Check if any instance-specific information was written to local DB; if 'yes', set instance and UCI's error message 
 891        # suggesting manual check.
 892        if inst.launch_time != None or inst.reservation_id != None or inst.instance_id != None:
 893            # Try to recover state - this is best-case effort, so if something does not work immediately, not
 894            # recovery steps are attempted. Recovery is based on hope that instance_id is available in local DB; if not,
 895            # report as error.
 896            # Fields attempting to be recovered are: reservation_id, instance status, and launch_time 
 897            if inst.instance_id != None:
 898                conn = self.get_connection_from_uci( uci )
 899                rl = conn.get_all_instances( [inst.instance_id] ) # reservation list
 900                # Update local DB with relevant data from instance
 901                if inst.reservation_id == None:
 902                    try:
 903                        inst.reservation_id = str(rl[0]).split(":")[1]
 904                    except: # something failed, so skip
 905                        pass
 906                
 907                try:
 908                    state = rl[0].instances[0].update()
 909                    inst.state = state
 910                    uci.state = state
 911                    self.sa_session.add( inst )
 912                    self.sa_session.add( uci )
 913                    self.sa_session.flush()
 914                except: # something failed, so skip
 915                    pass
 916                
 917                if inst.launch_time == None:
 918                    try:
 919                        launch_time = self.format_time( rl[0].instances[0].launch_time )
 920                        inst.launch_time = launch_time
 921                        self.sa_session.add( inst )
 922                        self.sa_session.flush() 
 923                        if inst.uci.launch_time == None:
 924                            uci.launch_time = launch_time
 925                            self.sa_session.add( uci )
 926                            self.sa_session.flush()
 927                    except: # something failed, so skip
 928                        pass
 929            else:
 930                err = "Starting a machine instance (DB id: '"+str(inst.id)+"') associated with this UCI '" + str(inst.uci.name) + \
 931                      "' seems to have failed. Because it appears that cloud instance might have gotten started, manual check is recommended."
 932                inst.error = err
 933                inst.state = instance_states.ERROR
 934                inst.uci.error = err
 935                inst.uci.state = uci_states.ERROR
 936                log.error( err )
 937                self.sa_session.add( inst )
 938                self.sa_session.add( uci )
 939                self.sa_session.flush()         
 940                
 941        else: #Instance most likely never got processed, so set error message suggesting user to try starting instance again.
 942            err = "Starting a machine instance (DB id: '"+str(inst.id)+"') associated with this UCI '" + str(inst.uci.name) + \
 943                  "' seems to have failed. Because it appears that cloud instance never got started, it should be safe to reset state and try " \
 944                  "starting the instance again."
 945            inst.error = err
 946            inst.state = instance_states.ERROR
 947            uci.error = err
 948            uci.state = uci_states.ERROR
 949            log.error( err )
 950            self.sa_session.add( inst )
 951            self.sa_session.add( uci )
 952            self.sa_session.flush()
 953#            uw = UCIwrapper( inst.uci )
 954#            log.debug( "Try automatically re-submitting UCI '%s'." % uw.get_name() )
 955
 956    def get_connection_from_uci( self, uci ):
 957        """
 958        Establish and return connection to cloud provider. Information needed to do so is obtained
 959        directly from uci database object.
 960        """
 961        log.debug( 'Establishing %s cloud connection' % self.type )
 962        a_key = uci.credentials.access_key
 963        s_key = uci.credentials.secret_key
 964        # Get connection
 965        try:
 966            region = RegionInfo( None, uci.credentials.provider.region_name, uci.credentials.provider.region_endpoint )
 967            conn = EC2Connection( aws_access_key_id=a_key, 
 968                                  aws_secret_access_key=s_key, 
 969                                  is_secure=uci.credentials.provider.is_secure, 
 970                                  region=region, 
 971                                  path=uci.credentials.provider.path )
 972        except boto.exception.EC2ResponseError, e:
 973            err = "Establishing connection with cloud failed: " + str( e )
 974            log.error( err )
 975            uci.error = err
 976            uci.state = uci_states.ERROR
 977            self.sa_session.add( uci )
 978            self.sa_session.flush()
 979            return None
 980
 981        return conn
 982   
 983#    def updateUCI( self, uci ):
 984#        """ 
 985#        Runs a global status update on all storage volumes and all instances that are
 986#        associated with specified UCI
 987#        """
 988#        conn = self.get_connection( uci )
 989#        
 990#        # Update status of storage volumes
 991#        vl = model.CloudStore.filter( model.CloudInstance.table.c.uci_id == uci.id ).all()
 992#        vols = []
 993#        for v in vl:
 994#            vols.append( v.volume_id )
 995#        try:
 996#            volumes = conn.get_all_volumes( vols )
 997#            for i, v in enumerate( volumes ):
 998#                uci.store[i].i_id = v.instance_id
 999#                uci.store[i].status = v.status
1000#                uci.store[i].device = v.device
1001#                uci.store[i].flush()
1002#        except:
1003#            log.debug( "Error updating status of volume(s) associated with UCI '%s'. Status was not updated." % uci.name )
1004#            pass
1005#        
1006#        # Update status of instances
1007#        il = model.CloudInstance.filter_by( uci_id=uci.id ).filter( model.CloudInstance.table.c.state != 'terminated' ).all()
1008#        instanceList = []
1009#        for i in il:
1010#            instanceList.append( i.instance_id )
1011#        log.debug( 'instanceList: %s' % instanceList )
1012#        try:
1013#            reservations = conn.get_all_instances( instanceList )
1014#            for i, r in enumerate( reservations ):
1015#                uci.instance[i].state = r.instances[0].update()
1016#                log.debug('updating instance %s; status: %s' % ( uci.instance[i].instance_id, uci.instance[i].state ) )
1017#                uci.state = uci.instance[i].state
1018#                uci.instance[i].public_dns = r.instances[0].dns_name
1019#                uci.instance[i].private_dns = r.instances[0].private_dns_name
1020#                uci.instance[i].flush()
1021#                uci.flush()
1022#        except:
1023#            log.debug( "Error updating status of instances associated with UCI '%s'. Instance status was not updated." % uci.name )
1024#            pass
1025        
1026    # --------- Helper methods ------------
1027    
1028    def format_time( self, time ):
1029        dict = {'T':' ', 'Z':''}
1030        for i, j in dict.iteritems():
1031            time = time.replace(i, j)
1032        return time
1033