PageRenderTime 107ms CodeModel.GetById 3ms app.highlight 94ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/cloud/providers/eucalyptus.py

https://bitbucket.org/ajish/galaxy-omelogic
Python | 1025 lines | 921 code | 27 blank | 77 comment | 64 complexity | a8937e5a91375e2fb3bedaabda6d817b MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1import subprocess, threading, os, errno, time, datetime
  2from Queue import Queue, Empty
  3from datetime import datetime
  4
  5from galaxy import model # Database interaction class
  6from galaxy.model import mapping
  7from galaxy.datatypes.data import nice_size
  8from galaxy.util.bunch import Bunch
  9from galaxy.cloud import UCIwrapper
 10from Queue import Queue
 11from sqlalchemy import or_, and_
 12
 13import galaxy.eggs
 14galaxy.eggs.require("boto")
 15from boto.ec2.connection import EC2Connection
 16from boto.ec2.regioninfo import RegionInfo
 17import boto.exception
 18import boto
 19
 20import logging
 21log = logging.getLogger( __name__ )
 22
 23uci_states = Bunch(
 24    NEW_UCI = "newUCI",
 25    NEW = "new",
 26    CREATING = "creating",
 27    DELETING_UCI = "deletingUCI",
 28    DELETING = "deleting",
 29    SUBMITTED_UCI = "submittedUCI",
 30    SUBMITTED = "submitted",
 31    SHUTTING_DOWN_UCI = "shutting-downUCI",
 32    SHUTTING_DOWN = "shutting-down",
 33    AVAILABLE = "available",
 34    RUNNING = "running",
 35    PENDING = "pending",
 36    ERROR = "error",
 37    DELETED = "deleted",
 38    SNAPSHOT_UCI = "snapshotUCI",
 39    SNAPSHOT = "snapshot"
 40)
 41
 42instance_states = Bunch(
 43    TERMINATED = "terminated",
 44    SUBMITTED = "submitted",
 45    RUNNING = "running",
 46    PENDING = "pending",
 47    SHUTTING_DOWN = "shutting-down",
 48    ERROR = "error"
 49)
 50
 51store_status = Bunch(
 52    WAITING = "waiting",
 53    IN_USE = "in-use",
 54    CREATING = "creating",
 55    DELETED = 'deleted',
 56    ERROR = "error"
 57)
 58
 59snapshot_status = Bunch(
 60    SUBMITTED = 'submitted',
 61    PENDING = 'pending',
 62    COMPLETED = 'completed',
 63    DELETE = 'delete',
 64    DELETED= 'deleted',
 65    ERROR = "error"
 66)
 67
 68class EucalyptusCloudProvider( object ):
 69    """
 70    Eucalyptus-based cloud provider implementation for managing instances. 
 71    """
 72    STOP_SIGNAL = object()
 73    def __init__( self, app ):
 74        self.type = "eucalyptus" # cloud provider type (e.g., ec2, eucalyptus, opennebula)
 75        self.zone = "epc"
 76        self.queue = Queue()
 77        self.sa_session = app.model.context
 78        
 79        self.threads = []
 80        nworkers = 5
 81        log.info( "Starting eucalyptus cloud controller workers..." )
 82        for i in range( nworkers  ):
 83            worker = threading.Thread( target=self.run_next )
 84            worker.start()
 85            self.threads.append( worker )
 86        log.debug( "%d eucalyptus cloud workers ready", nworkers )
 87        
 88    def shutdown( self ):
 89        """Attempts to gracefully shut down the monitor thread"""
 90        log.info( "sending stop signal to worker threads in eucalyptus cloud manager" )
 91        for i in range( len( self.threads ) ):
 92            self.queue.put( self.STOP_SIGNAL )
 93        log.info( "eucalyptus cloud manager stopped" )
 94    
 95    def put( self, uci_wrapper ):
 96        """
 97        Add uci_wrapper object to the end of the request queue to be handled by 
 98        this cloud provider.
 99        """
100        state = uci_wrapper.get_uci_state()
101        uci_wrapper.change_state( state.split('U')[0] ) # remove 'UCI' from end of state description (i.e., mark as accepted and ready for processing)
102        self.queue.put( uci_wrapper )
103        
104    def run_next( self ):
105        """Process next request, waiting until one is available if necessary."""
106        cnt = 0
107        while 1:
108            uci_wrapper = self.queue.get()
109            uci_state = uci_wrapper.get_uci_state()
110            if uci_state is self.STOP_SIGNAL:
111                return
112            try:
113                if uci_state==uci_states.NEW:
114                    self.create_uci( uci_wrapper )
115                elif uci_state==uci_states.DELETING:
116                    self.delete_uci( uci_wrapper )
117                elif uci_state==uci_states.SUBMITTED:
118                    self.start_uci( uci_wrapper )
119                    #self.dummy_start_uci( uci_wrapper )
120                elif uci_state==uci_states.SHUTTING_DOWN:
121                    self.stop_uci( uci_wrapper )
122                elif uci_state==uci_states.SNAPSHOT:
123                    self.snapshot_uci( uci_wrapper )
124            except:
125                log.exception( "Uncaught exception executing cloud request." )
126            cnt += 1
127            
128    def get_connection( self, uci_wrapper ):
129        """
130        Establishes cloud connection using user's credentials associated with given UCI
131        """
132        log.debug( 'Establishing %s cloud connection.' % self.type )
133        provider = uci_wrapper.get_provider()
134        try:
135            region = RegionInfo( None, provider.region_name, provider.region_endpoint )
136        except Exception, ex:
137            err = "Selecting region with cloud provider failed: " + str( ex )
138            log.error( err )
139            uci_wrapper.set_error( err, True )
140            return None        
141        try:
142            conn = EC2Connection( aws_access_key_id=uci_wrapper.get_access_key(), 
143                                  aws_secret_access_key=uci_wrapper.get_secret_key(), 
144                                  is_secure=provider.is_secure, 
145                                  port=provider.port, 
146                                  region=region, 
147                                  path=provider.path )
148        except boto.exception.EC2ResponseError, e:
149            err = "Establishing connection with cloud failed: " + str( e )
150            log.error( err )
151            uci_wrapper.set_error( err, True )
152            return None
153        
154        return conn
155        
156    def check_key_pair( self, uci_wrapper, conn ):
157        """
158        Check if a key pair associated with this UCI exists on cloud provider.
159        If yes, return key pair name; otherwise, generate a key pair with the cloud
160        provider and, again, return key pair name.
161        Key pair name for given UCI is generated from UCI's name and suffix '_kp' 
162        """
163        kp = None
164        kp_name = uci_wrapper.get_name().replace(' ','_') + "_kp"
165        log.debug( "Checking user's key pair: '%s'" % kp_name )
166        try:
167            kp = conn.get_key_pair( kp_name )
168            uci_kp_name = uci_wrapper.get_key_pair_name()
169            uci_material = uci_wrapper.get_key_pair_material()
170            if kp != None:
171                if kp.name != uci_kp_name or uci_material == None:
172                    # key pair exists on the cloud but not in local database, so re-generate it (i.e., delete and then create)
173                    try: 
174                        conn.delete_key_pair( kp_name )
175                        kp = self.create_key_pair( conn, kp_name )
176                        uci_wrapper.set_key_pair( kp.name, kp.material )
177                    except boto.exception.EC2ResponseError, e:
178                        err = "EC2 response error while deleting key pair: " + str( e )
179                        log.error( err )
180                        uci_wrapper.set_error( err, True )
181            else:
182                try:
183                    kp = self.create_key_pair( conn, kp_name )
184                    uci_wrapper.set_key_pair( kp.name, kp.material )
185                except boto.exception.EC2ResponseError, e:
186                    err = "EC2 response error while creating key pair: " + str( e )
187                    log.error( err )
188                    uci_wrapper.set_error( err, True )
189                except Exception, ex:
190                    err = "Exception while creating key pair: " + str( ex )
191                    log.error( err )
192                    uci_wrapper.set_error( err, True )
193        except boto.exception.EC2ResponseError, e: # No keypair under this name exists so create it
194            if e.code == 'InvalidKeyPair.NotFound': 
195                log.info( "No keypair found, creating keypair '%s'" % kp_name )
196                kp = self.create_key_pair( conn, kp_name )
197                uci_wrapper.set_key_pair( kp.name, kp.material )
198            else:
199                err = "EC2 response error while retrieving key pair: " + str( e )
200                log.error( err )
201                uci_wrapper.set_error( err, True )
202                        
203        if kp != None:
204            return kp.name
205        else:
206            return None
207    
208    def create_key_pair( self, conn, kp_name ):
209        """ Initiate creation of key pair under kp_name by current cloud provider. """
210        try:
211            return conn.create_key_pair( kp_name )
212        except boto.exception.EC2ResponseError, e: 
213            return None
214    
215    def get_mi_id( self, uci_wrapper, i_index ):
216        """
217        Get appropriate machine image (mi) ID based on instance type.
218        """
219        i_type = uci_wrapper.get_instance_type( i_index )
220        if i_type=='m1.small' or i_type=='c1.medium':
221            arch = 'i386'
222        else:
223            arch = 'x86_64' 
224        
225        mi = self.sa_session.query( model.CloudImage ).filter_by( deleted=False, provider_type=self.type, architecture=arch ).first()
226        if mi:
227            return mi.image_id
228        else:
229            err = "Machine image could not be retrieved"
230            log.error( "%s for UCI '%s'." % (err, uci_wrapper.get_name() ) )
231            uci_wrapper.set_error( err+". Contact site administrator to ensure needed machine image is registered.", True )
232            return None
233            
234    def create_uci( self, uci_wrapper ):
235        """ 
236        Create User Configured Instance (UCI) - i.e., create storage volume on cloud provider
237        and register relevant information in local Galaxy database.
238        """
239        conn = self.get_connection( uci_wrapper )
240        
241        # Because only 1 storage volume may be created at UCI config time, index of this storage volume in local Galaxy DB w.r.t
242        # current UCI is 0; therefore, it can be referenced in following code
243        log.info( "Creating volume in zone '%s'..." % uci_wrapper.get_uci_availability_zone() )
244        if uci_wrapper.get_uci_availability_zone()=='':
245            log.info( "Availability zone for UCI (i.e., storage volume) was not selected, using default zone: %s" % self.zone )
246            uci_wrapper.set_store_availability_zone( self.zone )
247        
248        log.debug( "Creating volume; using command: conn.create_volume( %s, '%s', snapshot=None )" % ( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone() ))
249        vol = conn.create_volume( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone(), snapshot=None )
250        uci_wrapper.set_store_volume_id( 0, vol.id ) 
251        
252        # Retrieve created volume again to get updated status
253        try:
254            vl = conn.get_all_volumes( [vol.id] )
255        except boto.exception.EC2ResponseError, e: 
256            err = "EC2 response error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( e )
257            log.error( err )
258            uci_wrapper.set_store_status( vol.id, uci_states.ERROR )
259            uci_wrapper.set_error( err, True )
260            return
261        except Exception, ex:
262            err = "Error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( ex )
263            log.error( err )
264            uci_wrapper.set_error( err, True )
265            return
266        
267        if len( vl ) > 0:
268            # EPC does not allow creation of storage volumes (it deletes one as soon as it is created, so manually set uci_state here)
269            if vl[0].status == store_status.DELETED:
270                uci_wrapper.change_state( uci_state=uci_states.AVAILABLE )
271            else:
272                uci_wrapper.change_state( uci_state=vl[0].status )
273            uci_wrapper.set_store_status( vol.id, vl[0].status )
274        else:
275            err = "Volume '" + vol.id +"' not found by EC2 after being created."
276            log.error( err )
277            uci_wrapper.set_store_status( vol.id, uci_states.ERROR )
278            uci_wrapper.set_error( err, True )
279
280    def delete_uci( self, uci_wrapper ):
281        """ 
282        Delete UCI - i.e., delete all storage volumes associated with this UCI. 
283        NOTE that this implies deletion of any and all data associated
284        with this UCI from the cloud. All data will be deleted.
285        Information in local Galaxy database is marked as deleted but not actually removed
286        from the database. 
287        """
288        conn = self.get_connection( uci_wrapper )
289        vl = [] # volume list
290        count = 0 # counter for checking if all volumes assoc. w/ UCI were deleted
291        
292        # Get all volumes assoc. w/ UCI, delete them from cloud as well as in local DB
293        vl = uci_wrapper.get_all_stores()
294        deletedList = []
295        failedList = []
296        for v in vl:
297            log.debug( "Deleting volume with id='%s'" % v.volume_id )
298            try:
299                if conn.delete_volume( v.volume_id ):
300                    deletedList.append( v.volume_id )
301                    v.deleted = True
302                    self.sa_session.add( v )
303                    self.sa_session.flush()
304                    count += 1
305                else:
306                    failedList.append( v.volume_id )
307            except boto.exception.EC2ResponseError, e:
308                err = "EC2 response error while deleting storage volume '" + v.volume_id + "': " + str( e )
309                log.error( err )
310                uci_wrapper.set_store_error( err, store_id = v.volume_id )
311                uci_wrapper.set_error( err, True )
312                
313        # Delete UCI if all of associated 
314        if count == len( vl ):
315            uci_wrapper.set_deleted()
316        else:
317            err = "Deleting following volume(s) failed: "+ str( failedList )+". However, these volumes were successfully deleted: " \
318                  + str( deletedList ) +". MANUAL intervention and processing needed."
319            log.error( err )
320            uci_wrapper.set_error( err, True )
321            
322    def snapshot_uci( self, uci_wrapper ):
323        """
324        Initiate creation of a snapshot by cloud provider for all storage volumes 
325        associated with this UCI. 
326        """
327        if uci_wrapper.get_uci_state() != uci_states.ERROR:
328            conn = self.get_connection( uci_wrapper )
329            
330            snapshots = uci_wrapper.get_snapshots( status = snapshot_status.SUBMITTED )
331            for snapshot in snapshots:
332                log.debug( "Snapshot DB id: '%s', volume id: '%s'" % ( snapshot.id, snapshot.store.volume_id ) )
333                try:
334                    snap = conn.create_snapshot( volume_id=snapshot.store.volume_id )
335                    snap_id = str( snap ).split(':')[1]
336                    uci_wrapper.set_snapshot_id( snapshot.id, snap_id )
337                    sh = conn.get_all_snapshots( snap_id ) # get updated status
338                    uci_wrapper.set_snapshot_status( status=sh[0].status, snap_id=snap_id )
339                except boto.exception.EC2ResponseError, e:
340                    err = "Cloud provider response error while creating snapshot: " + str( e )
341                    log.error( err )
342                    uci_wrapper.set_snapshot_error( error=err, snap_index=snapshot.id, set_status=True )
343                    uci_wrapper.set_error( err, True )
344                    return
345                except Exception, ex:
346                    err = "Error while creating snapshot: " + str( ex )
347                    log.error( err )
348                    uci_wrapper.set_snapshot_error( error=err, snap_index=snapshot.id, set_status=True )
349                    uci_wrapper.set_error( err, True )
350                    return
351                    
352            uci_wrapper.change_state( uci_state=uci_states.AVAILABLE )
353        
354#        if uci_wrapper.get_uci_state() != uci_states.ERROR:
355#            
356#            snapshots = uci_wrapper.get_snapshots( status = 'submitted' )
357#            for snapshot in snapshots:
358#                uci_wrapper.set_snapshot_id( snapshot.id, None, 'euca_error' )
359#            
360#            log.debug( "Eucalyptus snapshot attempted by user for UCI '%s'" % uci_wrapper.get_name() )
361#            uci_wrapper.set_error( "Eucalyptus does not support creation of snapshots at this moment. No snapshot or other changes were performed. \
362#                        Feel free to resent state of this instance and use it normally.", True )
363            
364            
365    def add_storage_to_uci( self, uci_wrapper ):
366        """ Adds more storage to specified UCI """
367    
368    def dummy_start_uci( self, uci_wrapper ):
369        
370        uci = uci_wrapper.get_uci()
371        log.debug( "Would be starting instance '%s'" % uci.name )
372#        uci_wrapper.change_state( uci_states.SUBMITTED_UCI )
373#        log.debug( "Set UCI state to SUBMITTED_UCI" )
374        log.debug( "Sleeping a bit... (%s)" % uci.name )
375        time.sleep(10)
376        log.debug( "Woke up! (%s)" % uci.name )
377        
378    def start_uci( self, uci_wrapper ):
379        """
380        Start instance(s) of given UCI on the cloud.  
381        """ 
382        if uci_wrapper.get_uci_state() != uci_states.ERROR:
383            conn = self.get_connection( uci_wrapper )
384            self.check_key_pair( uci_wrapper, conn )
385            if uci_wrapper.get_key_pair_name() == None:
386                err = "Key pair not found"
387                log.error( "%s for UCI '%s'." % ( err, uci_wrapper.get_name() ) )
388                uci_wrapper.set_error( err + ". Try resetting the state and starting the instance again.", True )
389                return
390            
391            i_indexes = uci_wrapper.get_instances_indexes( state=instance_states.SUBMITTED ) # Get indexes of i_indexes associated with this UCI that are in 'submitted' state
392            log.debug( "Starting instances with IDs: '%s' associated with UCI '%s' " % ( i_indexes, uci_wrapper.get_name(),  ) )
393            if len( i_indexes ) > 0:
394                for i_index in i_indexes:
395                    # Get machine image for current instance
396                    mi_id = self.get_mi_id( uci_wrapper, i_index )
397                    log.debug( "mi_id: %s, uci_wrapper.get_key_pair_name(): %s" % ( mi_id, uci_wrapper.get_key_pair_name() ) )
398                    uci_wrapper.set_mi( i_index, mi_id )
399                               
400                    if uci_wrapper.get_uci_state() != uci_states.ERROR:
401                        # Start an instance
402                        log.debug( "Starting UCI instance '%s'" % uci_wrapper.get_name() )
403                        log.debug( "Using following command: conn.run_instances( image_id='%s', key_name='%s', instance_type='%s' )" 
404                                   % ( mi_id, uci_wrapper.get_key_pair_name(), uci_wrapper.get_instance_type( i_index ) ) )
405                        reservation = None
406                        try:
407                            reservation = conn.run_instances( image_id=mi_id, 
408                                                              key_name=uci_wrapper.get_key_pair_name(),
409                                                              instance_type=uci_wrapper.get_instance_type( i_index ) )
410                        except boto.exception.EC2ResponseError, e:
411                            err = "EC2 response error when starting UCI '"+ uci_wrapper.get_name() +"': " + str( e )
412                            log.error( err )
413                            uci_wrapper.set_error( err, True )
414                        except Exception, ex:
415                            err = "Error when starting UCI '" + uci_wrapper.get_name() + "': " + str( ex )
416                            log.error( err )
417                            uci_wrapper.set_error( err, True )
418                        # Record newly available instance data into local Galaxy database
419                        if reservation:
420                            l_time = datetime.utcnow()
421#                            uci_wrapper.set_instance_launch_time( self.format_time( reservation.instances[0].launch_time ), i_index=i_index )
422                            uci_wrapper.set_instance_launch_time( l_time, i_index=i_index )
423                            if not uci_wrapper.uci_launch_time_set():
424                                uci_wrapper.set_uci_launch_time( l_time )
425                            try:
426                                uci_wrapper.set_reservation_id( i_index, str( reservation ).split(":")[1] )
427                                # TODO: if more than a single instance will be started through single reservation, change this reference from element [0]
428                                i_id = str( reservation.instances[0]).split(":")[1]
429                                uci_wrapper.set_instance_id( i_index, i_id )
430                                s = reservation.instances[0].state
431                                uci_wrapper.change_state( s, i_id, s )
432                                vol_id = uci_wrapper.get_store_volume_id( store_id=0 ) # TODO: Once more that one vol/UCI is allowed, update this!
433                                uci_wrapper.set_store_status( vol_id, store_status.WAITING )
434                                log.debug( "Instance of UCI '%s' started, current state: '%s'" % ( uci_wrapper.get_name(), uci_wrapper.get_uci_state() ) )
435                            except boto.exception.EC2ResponseError, e:
436                                err = "EC2 response error when retrieving instance information for UCI '" + uci_wrapper.get_name() + "': " + str( e )
437                                log.error( err )
438                                uci_wrapper.set_error( err, True )
439                    else:
440                        log.error( "UCI '%s' is in 'error' state, starting instance was aborted." % uci_wrapper.get_name() )
441            else:
442                err = "No instances in state '"+ instance_states.SUBMITTED +"' found for UCI '" + uci_wrapper.get_name() + \
443                      "'. Nothing to start."
444                log.error( err )
445                uci_wrapper.set_error( err, True )
446        else:
447            log.error( "UCI '%s' is in 'error' state, starting instance was aborted." % uci_wrapper.get_name() )
448        
449    def stop_uci( self, uci_wrapper):
450        """ 
451        Stop all cloud instances associated with given UCI. 
452        """
453        conn = self.get_connection( uci_wrapper )
454        
455        # Get all instances associated with given UCI
456        il = uci_wrapper.get_instances_ids() # instance list
457        # Process list of instances and remove any references to empty instance id's
458        for i in il:
459            if i is None:
460                il.remove( i )
461        log.debug( 'List of instances being terminated: %s' % il )
462        rl = conn.get_all_instances( il ) # Reservation list associated with given instances
463                        
464        # Initiate shutdown of all instances under given UCI
465        cnt = 0
466        stopped = []
467        not_stopped = []
468        for r in rl:
469            for inst in r.instances:
470                log.debug( "Sending stop signal to instance '%s' associated with reservation '%s' (UCI: %s)." % ( inst, r, uci_wrapper.get_name() ) )
471                try:
472                    inst.stop()
473                    uci_wrapper.set_stop_time( datetime.utcnow(), i_id=inst.id )
474                    uci_wrapper.change_state( instance_id=inst.id, i_state=inst.update() )
475                    stopped.append( inst )
476                except boto.exception.EC2ResponseError, e:
477                    not_stopped.append( inst )
478                    err = "EC2 response error when stopping instance '" + inst.instance_id + "': " + str( e )
479                    log.error( err )
480                    uci_wrapper.set_error( err, True )
481                
482        uci_wrapper.reset_uci_launch_time()
483        log.debug( "Termination was initiated for all instances of UCI '%s'." % uci_wrapper.get_name() )
484
485#        dbInstances = get_instances( trans, uci ) #TODO: handle list!
486#        
487#        # Get actual cloud instance object
488#        cloudInstance = get_cloud_instance( conn, dbInstances.instance_id )
489#        
490#        # TODO: Detach persistent storage volume(s) from instance and update volume data in local database
491#        stores = get_stores( trans, uci )
492#        for i, store in enumerate( stores ):
493#            log.debug( "Detaching volume '%s' to instance '%s'." % ( store.volume_id, dbInstances.instance_id ) )
494#            mntDevice = store.device
495#            volStat = None
496##            Detaching volume does not work with Eucalyptus Public Cloud, so comment it out
497##            try:
498##                volStat = conn.detach_volume( store.volume_id, dbInstances.instance_id, mntDevice )
499##            except:
500##                log.debug ( 'Error detaching volume; still going to try and stop instance %s.' % dbInstances.instance_id )
501#            store.attach_time = None
502#            store.device = None
503#            store.inst.instance_id = None
504#            store.status = volStat
505#            log.debug ( '***** volume status: %s' % volStat )
506#        
507#        # Stop the instance and update status in local database
508#        cloudInstance.stop()
509#        dbInstances.stop_time = datetime.utcnow()
510#        while cloudInstance.state != 'terminated':
511#            log.debug( "Stopping instance %s state; current state: %s" % ( str( cloudInstance ).split(":")[1], cloudInstance.state ) )
512#            time.sleep(3)
513#            cloudInstance.update()
514#        dbInstances.state = cloudInstance.state
515#        
516#        # Reset relevant UCI fields
517#        uci.state = 'available'
518#        uci.launch_time = None
519#          
520#        # Persist
521#        session = trans.sa_session
522##        session.save_or_update( stores )
523#        session.save_or_update( dbInstances ) # TODO: Is this going to work w/ multiple instances stored in dbInstances variable?
524#        session.save_or_update( uci )
525#        session.flush()
526#        trans.log_event( "User stopped cloud instance '%s'" % uci.name )
527#        trans.set_message( "Galaxy instance '%s' stopped." % uci.name )
528
529    def update( self ):
530        """ 
531        Run status update on all instances that are in 'running', 'pending', or 'shutting-down' state.
532        Run status update on all storage volumes whose status is 'in-use', 'creating', or 'None'.
533        Run status update on all snapshots whose status is 'pending' or 'delete'  
534        Run status update on any zombie UCIs, i.e., UCI's that is in 'submitted' state for an 
535        extended period of time.
536        
537        Reason behind this method is to sync state of local DB and real-world resources
538        """
539        log.debug( "Running general status update for %s UCIs..." % self.type )
540        # Update instances
541        instances = self.sa_session.query( model.CloudInstance ) \
542            .filter( or_( model.CloudInstance.table.c.state==instance_states.RUNNING, 
543                          model.CloudInstance.table.c.state==instance_states.PENDING, 
544                          model.CloudInstance.table.c.state==instance_states.SHUTTING_DOWN ) ) \
545            .all()
546        for inst in instances:
547            if self.type == inst.uci.credentials.provider.type:
548                log.debug( "[%s] Running general status update on instance '%s'" % ( inst.uci.credentials.provider.type, inst.instance_id ) )
549                self.update_instance( inst )
550        
551        # Update storage volume(s)
552        stores = self.sa_session.query( model.CloudStore ) \
553            .filter( or_( model.CloudStore.table.c.status==store_status.IN_USE, 
554                          model.CloudStore.table.c.status==store_status.CREATING,
555                          model.CloudStore.table.c.status==store_status.WAITING,
556                          model.CloudStore.table.c.status==None ) ) \
557            .all()
558        for store in stores:
559            if self.type == store.uci.credentials.provider.type: # and store.volume_id != None:
560                log.debug( "[%s] Running general status update on store with local database ID: '%s'" % ( store.uci.credentials.provider.type, store.id ) )
561                self.update_store( store )
562        
563        # Update pending snapshots or delete ones marked for deletion
564        snapshots = self.sa_session.query( model.CloudSnapshot ) \
565            .filter( or_( model.CloudSnapshot.table.c.status == snapshot_status.PENDING, model.CloudSnapshot.table.c.status == snapshot_status.DELETE ) ) \
566            .all()
567        for snapshot in snapshots:
568            if self.type == snapshot.uci.credentials.provider.type and snapshot.status == snapshot_status.PENDING:
569                log.debug( "[%s] Running general status update on snapshot '%s'" % ( snapshot.uci.credentials.provider.type, snapshot.snapshot_id ) )
570                self.update_snapshot( snapshot )
571            elif self.type == snapshot.uci.credentials.provider.type and snapshot.status == snapshot_status.DELETE:
572                log.debug( "[%s] Initiating deletion of snapshot '%s'" % ( snapshot.uci.credentials.provider.type, snapshot.snapshot_id ) )
573                self.delete_snapshot( snapshot )
574        
575        # Attempt at updating any zombie UCIs (i.e., instances that have been in SUBMITTED state for longer than expected - see below for exact time)
576        zombies = self.sa_session.query( model.UCI ).filter_by( state=uci_states.SUBMITTED ).all()
577        for zombie in zombies:
578            log.debug( "zombie UCI: %s" % zombie.name )
579            z_instances = self.sa_session.query( model.CloudInstance ) \
580                .filter( or_( model.CloudInstance.table.c.state != instance_states.TERMINATED,
581                              model.CloudInstance.table.c.state == None ) ) \
582                .all()
583            for z_inst in z_instances:
584                if self.type == z_inst.uci.credentials.provider.type:
585#                    log.debug( "z_inst.id: '%s', state: '%s'" % ( z_inst.id, z_inst.state ) )
586                    td = datetime.utcnow() - z_inst.update_time
587#                    log.debug( "z_inst.id: %s, time delta is %s sec" % ( z_inst.id, td.seconds ) )
588                    if td.seconds > 180: # if instance has been in SUBMITTED state for more than 3 minutes
589                        log.debug( "[%s](td=%s) Running zombie repair update on instance with DB id '%s'" % ( z_inst.uci.credentials.provider.type, td.seconds, z_inst.id ) )
590                        self.process_zombie( z_inst )
591                
592    def update_instance( self, inst ):
593        """
594        Update information in local database for given instance as it is obtained from cloud provider.
595        Along with updating information about given instance, information about the UCI controlling
596        this instance is also updated.
597        """
598        # Get credentials associated wit this instance
599        uci_id = inst.uci_id
600        uci = self.sa_session.query( model.UCI ).get( uci_id )
601        self.sa_session.refresh( uci )
602        conn = self.get_connection_from_uci( uci )
603        
604        # Get reservations handle for given instance
605        try:
606            rl= conn.get_all_instances( [inst.instance_id] )
607        except boto.exception.EC2ResponseError, e:
608            err = "Retrieving instance(s) from cloud failed for UCI '"+ uci.name +"' during general status update: " + str( e )
609            log.error( err )
610            uci.error = err
611            uci.state = uci_states.ERROR
612            self.sa_session.add( uci )
613            self.sa_session.flush()
614            return None
615
616        # Because references to reservations are deleted shortly after instances have been terminated, getting an empty list as a response to a query
617        # typically means the instance has successfully shut down but the check was not performed in short enough amount of time. Until an alternative solution
618        # is found, below code sets state of given UCI to 'error' to indicate to the user something out of ordinary happened.
619        if len( rl ) == 0:
620            err = "Instance ID '"+inst.instance_id+"' was not found by the cloud provider. Instance might have crashed or otherwise been terminated."+ \
621                "Manual check is recommended."
622            log.error( err )
623            inst.error = err
624            uci.error = err
625            inst.state = instance_states.TERMINATED
626            uci.state = uci_states.ERROR
627            uci.launch_time = None
628            self.sa_session.add( inst )
629            self.sa_session.add( uci )
630            self.sa_session.flush()
631        # Update instance status in local DB with info from cloud provider
632        for r in rl:
633            for i, cInst in enumerate( r.instances ):
634                try:
635                    s = cInst.update()
636                    log.debug( "Checking state of cloud instance '%s' associated with UCI '%s' and reservation '%s'. State='%s'" % ( cInst, uci.name, r, s ) )
637                    if  s != inst.state:
638                        inst.state = s
639                        self.sa_session.add( inst )
640                        self.sa_session.flush()
641                         # After instance has shut down, ensure UCI is marked as 'available'
642                        if s == instance_states.TERMINATED and uci.state != uci_states.ERROR:
643                            uci.state = uci_states.AVAILABLE
644                            uci.launch_time = None
645                            self.sa_session.add( uci )
646                            self.sa_session.flush()
647                    # Making sure state of UCI is updated. Once multiple instances become associated with single UCI, this will need to be changed.
648                    if s != uci.state and s != instance_states.TERMINATED: 
649                        uci.state = s                    
650                        self.sa_session.add( uci )
651                        self.sa_session.flush()
652                    if cInst.public_dns_name != inst.public_dns:
653                        inst.public_dns = cInst.public_dns_name
654                        self.sa_session.add( inst )
655                        self.sa_session.flush()
656                    if cInst.private_dns_name != inst.private_dns:
657                        inst.private_dns = cInst.private_dns_name
658                        self.sa_session.add( inst )
659                        self.sa_session.flush()
660                except boto.exception.EC2ResponseError, e:
661                    err = "Updating instance status from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
662                    log.error( err )
663                    uci.error = err
664                    uci.state = uci_states.ERROR
665                    self.sa_session.add( uci )
666                    self.sa_session.flush()
667                    return None
668                
669    def update_store( self, store ):
670        """
671        Update information in local database for given storage volume as it is obtained from cloud provider.
672        Along with updating information about given storage volume, information about the UCI controlling
673        this storage volume is also updated.
674        """
675        # Get credentials associated wit this store
676        uci_id = store.uci_id
677        uci = self.sa_session.query( model.UCI ).get( uci_id )
678        self.sa_session.refresh( uci )
679        conn = self.get_connection_from_uci( uci )
680        
681        if store.volume_id != None:
682            # Get reservations handle for given store 
683            try:
684                log.debug( "Updating storage volume command: vl = conn.get_all_volumes( [%s] )" % store.volume_id )
685                vl = conn.get_all_volumes( [store.volume_id] )
686            except boto.exception.EC2ResponseError, e:
687                err = "Retrieving volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
688                log.error( err )
689                uci.error = err
690                uci.state = uci_states.ERROR
691                self.sa_session.add( uci )
692                self.sa_session.flush()
693                return None
694            
695            # Update store status in local DB with info from cloud provider
696            if len(vl) > 0:
697                try:
698                    log.debug( "Storage volume '%s' current status: '%s'" % (store.volume_id, vl[0].status ) )
699                    if store.status != vl[0].status:
700                        # In case something failed during creation of UCI but actual storage volume was created and yet 
701                        #  UCI state remained as 'new', try to remedy this by updating UCI state here 
702                        if ( store.status == None ) and ( store.volume_id != None ):
703                            uci.state = vl[0].status
704                            self.sa_session.add( uci )
705                            self.sa_session.flush()
706                        # If UCI was marked in state 'CREATING', update its status to reflect new status
707                        elif ( uci.state == uci_states.CREATING ):
708                            # Because Eucalyptus Public Cloud (EPC) deletes volumes immediately after they are created, artificially
709                            # set status of given UCI to 'available' based on storage volume's availability zone (i.e., it's residing
710                            # in EPC as opposed to some other Eucalyptus based cloud that allows creation of storage volumes.
711                            if store.availability_zone == 'epc':
712                                uci.state = uci_states.AVAILABLE
713                            else:
714                                uci.state = vl[0].status
715
716                            self.sa_session.add( uci )
717                            self.sa_session.flush()
718                                
719                        store.status = vl[0].status
720                        self.sa_session.add( store )
721                        self.sa_session.flush()
722                    if store.inst != None:
723                        if store.inst.instance_id != vl[0].instance_id:
724                            store.inst.instance_id = vl[0].instance_id
725                            self.sa_session.add( store )
726                            self.sa_session.flush()
727                    if store.attach_time != vl[0].attach_time:
728                        store.attach_time = vl[0].attach_time
729                        self.sa_session.add( store )
730                        self.sa_session.flush()
731                    if store.device != vl[0].device:
732                        store.device = vl[0].device
733                        self.sa_session.add( store )
734                        self.sa_session.flush()
735                except boto.exception.EC2ResponseError, e:
736                    err = "Updating status of volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
737                    log.error( err )
738                    uci.error = err
739                    uci.state = uci_states.ERROR
740                    self.sa_session.add( uci )
741                    self.sa_session.flush()
742                    return None
743            else:
744                err = "No storage volumes returned by cloud provider on general update"
745                log.error( "%s for UCI '%s'" % ( err, uci.name ) )
746                store.status = store_status.ERROR
747                store.error = err
748                uci.error = err
749                uci.state = uci_states.ERROR
750                self.sa_session.add( uci )
751                self.sa_session.add( store )
752                self.sa_session.flush()
753        else:
754            err = "Missing storage volume ID in local database on general update. Manual check is needed to check " \
755                  "if storage volume was actually created by cloud provider."
756            log.error( "%s (for UCI '%s')" % ( err, uci.name ) )
757            store.status = store_status.ERROR
758            store.error = err
759            uci.error = err
760            uci.state = uci_states.ERROR
761            self.sa_session.add( uci )
762            self.sa_session.add( store )
763            self.sa_session.flush()
764   
765    def update_snapshot( self, snapshot ):
766        """
767        Update information in local database for given snapshot as it is obtained from cloud provider.
768        Along with updating information about given snapshot, information about the UCI controlling
769        this snapshot is also updated.
770        """
771        # Get credentials associated wit this store
772        uci_id = snapshot.uci_id
773        uci = self.sa_session.query( model.UCI ).get( uci_id )
774        self.sa_session.refresh( uci )
775        conn = self.get_connection_from_uci( uci )
776        
777        try:
778            log.debug( "Updating status of snapshot '%s'" % snapshot.snapshot_id )
779            snap = conn.get_all_snapshots( [snapshot.snapshot_id] ) 
780            if len( snap ) > 0:
781                log.debug( "Snapshot '%s' status: %s" % ( snapshot.snapshot_id, snap[0].status ) )
782                snapshot.status = snap[0].status
783                self.sa_session.add( snapshot )
784                self.sa_session.flush()
785            else:
786                err = "No snapshots returned by EC2 on general update"
787                log.error( "%s for UCI '%s'" % ( err, uci.name ) )
788                snapshot.status = snapshot_status.ERROR
789                snapshot.error = err
790                uci.error = err
791                uci.state = uci_states.ERROR
792                self.sa_session.add( uci )
793                self.sa_session.add( snapshot )
794                self.sa_session.flush()
795        except boto.exception.EC2ResponseError, e:
796            err = "EC2 response error while updating snapshot status: " + str( e )
797            log.error( err )
798            snapshot.status = snapshot_status.ERROR
799            snapshot.error = err
800            uci.error = err
801            uci.state = uci_states.ERROR
802            self.sa_session.add( uci )
803            self.sa_session.add( snapshot )
804            self.sa_session.flush()
805        except Exception, ex:
806            err = "Error while updating snapshot status: " + str( ex )
807            log.error( err )
808            snapshot.status = snapshot_status.ERROR
809            snapshot.error = err
810            uci.error = err
811            uci.state = uci_states.ERROR
812            self.sa_session.add( uci )
813            self.sa_session.add( snapshot )
814            self.sa_session.flush()
815        
816    def delete_snapshot( self, snapshot ):
817        """
818        Initiate deletion of given snapshot from cloud provider.
819        """
820        if snapshot.status == snapshot_status.DELETE:
821            # Get credentials associated wit this store
822            uci_id = snapshot.uci_id
823            uci = self.sa_session.query( model.UCI ).get( uci_id )
824            self.sa_session.refresh( uci )
825            conn = self.get_connection_from_uci( uci )
826            
827            try:
828                log.debug( "Deleting snapshot '%s'" % snapshot.snapshot_id )
829                snap = conn.delete_snapshot( snapshot.snapshot_id )
830                if snap == True:
831                    snapshot.deleted = True
832                    snapshot.status = snapshot_status.DELETED
833                    self.sa_session.add( snapshot )
834                    self.sa_session.flush()
835                return snap
836            except boto.exception.EC2ResponseError, e:
837                err = "EC2 response error while deleting snapshot: " + str( e )
838                log.error( err )
839                snapshot.status = snapshot_status.ERROR
840                snapshot.error = err
841                uci.error = err
842                uci.state = uci_states.ERROR
843                self.sa_session.add( uci )
844                self.sa_session.add( snapshot )
845                self.sa_session.flush()
846            except Exception, ex:
847                err = "Error while deleting snapshot: " + str( ex )
848                log.error( err )
849                snapshot.status = snapshot_status.ERROR
850                snapshot.error = err
851                uci.error = err
852                uci.state = uci_states.ERROR
853                self.sa_session.add( uci )
854                self.sa_session.add( snapshot )
855                self.sa_session.flush()
856        else:
857            err = "Cannot delete snapshot '"+snapshot.snapshot_id+"' because its status is '"+snapshot.status+"'. Only snapshots with '" + \
858                        snapshot_status.COMPLETED+"' status can be deleted."
859            log.error( err )
860            snapshot.error = err
861            self.sa_session.add( snapshot )
862            self.sa_session.flush()
863            
864    def process_zombie( self, inst ):
865        """
866        Attempt at discovering if starting a cloud instance was successful but local database was not updated
867        accordingly or if something else failed and instance was never started. Currently, no automatic 
868        repairs are being attempted; instead, appropriate error messages are set.
869        """
870        uci_id = inst.uci_id
871        uci = self.sa_session.query( model.UCI ).get( uci_id )
872        self.sa_session.refresh( uci )
873        
874        # Check if any instance-specific information was written to local DB; if 'yes', set instance and UCI's error message 
875        # suggesting manual check.
876        if inst.launch_time != None or inst.reservation_id != None or inst.instance_id != None:
877            # Try to recover state - this is best-case effort, so if something does not work immediately, not
878            # recovery steps are attempted. Recovery is based on hope that instance_id is available in local DB; if not,
879            # report as error.
880            # Fields attempting to be recovered are: reservation_id, instance status, and launch_time 
881            if inst.instance_id != None:
882                conn = self.get_connection_from_uci( uci )
883                rl = conn.get_all_instances( [inst.instance_id] ) # reservation list
884                # Update local DB with relevant data from instance
885                if inst.reservation_id == None:
886                    try:
887                        inst.reservation_id = str(rl[0]).split(":")[1]
888                    except: # something failed, so skip
889                        pass
890                
891                try:
892                    state = rl[0].instances[0].update()
893                    inst.state = state
894                    uci.state = state
895                    self.sa_session.add( inst )
896                    self.sa_session.add( uci )
897                    self.sa_session.flush()
898                except: # something failed, so skip
899                    pass
900                
901                if inst.launch_time == None:
902                    try:
903                        launch_time = self.format_time( rl[0].instances[0].launch_time )
904                        inst.launch_time = launch_time
905                        self.sa_session.add( inst )
906                        self.sa_session.flush() 
907                        if inst.uci.launch_time == None:
908                            uci.launch_time = launch_time
909                            self.sa_session.add( uci )
910                            self.sa_session.flush()
911                    except: # something failed, so skip
912                        pass
913            else:
914                err = "Starting a machine instance (DB id: '"+str(inst.id)+"') associated with this UCI '" + str(inst.uci.name) + \
915                      "' seems to have failed. Because it appears that cloud instance might have gotten started, manual check is recommended."
916                inst.error = err
917                inst.state = instance_states.ERROR
918                inst.uci.error = err
919                inst.uci.state = uci_states.ERROR
920                log.error( err )
921                self.sa_session.add( inst )
922                self.sa_session.add( uci )
923                self.sa_session.flush()         
924                
925        else: #Instance most likely never got processed, so set error message suggesting user to try starting instance again.
926            err = "Starting a machine instance (DB id: '"+str(inst.id)+"') associated with this UCI '" + str(inst.uci.name) + \
927                  "' seems to have failed. Because it appears that cloud instance never got started, it should be safe to reset state and try " \
928                  "starting the instance again."
929            inst.error = err
930            inst.state = instance_states.ERROR
931            uci.error = err
932            uci.state = uci_states.ERROR
933            log.error( err )
934            self.sa_session.add( inst )
935            self.sa_session.add( uci )
936            self.sa_session.flush()
937#            uw = UCIwrapper( inst.uci )
938#            log.debug( "Try automatically re-submitting UCI '%s'." % uw.get_name() )
939
940    def get_connection_from_uci( self, uci ):
941        """
942        Establish and return connection to cloud provider. Information needed to do so is obtained
943        directly from uci database object.
944        """
945        log.debug( 'Establishing %s cloud connection' % self.type )
946        a_key = uci.credentials.access_key
947        s_key = uci.credentials.secret_key
948        # Get connection
949        try:
950            region = RegionInfo( None, uci.credentials.provider.region_name, uci.credentials.provider.region_endpoint )
951#            log.debug( "[%s] Using following command to connect to cloud provider: "  
952#                                "conn = EC2Connection( aws_access_key_id=%s, " 
953#                                                      "aws_secret_access_key=%s, " 
954#                                                      "port=%s, "
955#                                                      "is_secure=%s, " 
956#                                                      "region=region, "
957#                                                      "path=%s )" % ( self.type, a_key, s_key, uci.credentials.provider.is_secure, uci.credentials.provider.port, uci.credentials.provider.path ) ) 
958            conn = EC2Connection( aws_access_key_id=a_key, 
959                                  aws_secret_access_key=s_key, 
960                                  is_secure=uci.credentials.provider.is_secure,
961                                  port=uci.credentials.provider.port,   
962                                  region=region, 
963                                  path=uci.credentials.provider.path )
964        except boto.except

Large files files are truncated, but you can click here to view the full file