gchelpers.py | searchcode

/gchelpers.py

https://gitlab.com/abushoeb/gc-v1 · Python · 539 lines · 393 code · 123 blank · 23 comment · 66 complexity · 041b2ac8467f22f5b95763b9813b268e MD5 · raw file

#!/usr/bin/python
import paramiko
import subprocess
import time
import re
import math
import uuid
from datetime import datetime
from datetime import timedelta
from threading import Thread
from gccommon import *
from itertools import izip_longest
from bisect import *

#from pyjolokia import *

USERNAME="wkatsak"

def execute_remote_command_sync(host, username, command, max_tries=5, retry_delay=10, timeout=30):
	
	for i in xrange(0, max_tries):
		try:
			cmd = "bash -c 'source ~/.bash_profile && %s'" % command
			sshc = paramiko.SSHClient()
			sshc.set_missing_host_key_policy(paramiko.AutoAddPolicy())
			sshc.load_system_host_keys()
			sshc.connect(host, port = 22, username = username, timeout=timeout)
			#sshc.get_pty()
			transport = sshc.get_transport()
			transport.set_keepalive(30)
			
			ssh_in, ssh_out, ssh_err = sshc.exec_command(cmd)
			raw_out = ssh_out.readlines()
			raw_err = ssh_err.readlines()
			sshc.close()

			out = list()
			err = list()

			for line in raw_out:
				line = line.replace("\n", "")
				#line = line.strip()
				out.append(line)

			for line in raw_err:
				line = line.replace("\n", "")
				#line = line.strip()
				err.append(line)

			return out, err
		except:
			print "Host: %s, Could not execute command: %s" % (host, cmd)
			time.sleep(retry_delay)
			continue
	
	#raise Exception("Host: %s, Could not execute command: %s" % (host, cmd))
	return [], []

def execute_remote_command_wait(host, username, command, interval=30, outfile="/dev/null", errfile="/dev/null", status=True):
	name = "cmd-%s" % str(uuid.uuid1())
	remote_cmd = "%s/gcdaemon.py --start --name %s --out %s --err %s %s" % (GC_PATH, name, outfile, errfile, command)
	#print remote_cmd
	
	out, err = execute_remote_command_sync(host, username, remote_cmd)
	
	while True:
		check_cmd = "%s/gcdaemon.py --isrunning --name %s" % (GC_PATH, name)
		#print check_cmd
		out, err = execute_remote_command_sync(host, username, check_cmd)
		#print out, err
		if len(out) <= 0:
			return
		
		reply = int(out[0].strip())
		#print "reply", reply
		# we asked "is running?"
		
		# if not running, we are done
		if reply == 0:
			if status:
				sys.stdout.write("\n")
				sys.stdout.flush()
			break
		# if still running
		else:
			if status:
				sys.stdout.write(". ")
				sys.stdout.flush()
		
		time.sleep(interval)
	
class ExecuteResults:
	out = ""
	err = ""
	
def execute_remote_command_thread(host, username, cmd, results):
	results.out, results.err = execute_remote_command_sync(host, username, cmd)
	#print results.out
	
def execute_remote_command_multihost(hosts, username, cmd):
	out_dict = dict()
	err_dict = dict()
	thread_list = list()
	results_dict = dict()

	for host in hosts:
		results = ExecuteResults()
		results_dict[host] = results
		thread = Thread(target=execute_remote_command_thread, args=(host, username, cmd, results))
		thread_list.append(thread)
		thread.start()

	for thread in thread_list:
		thread.join()

	for host in hosts:
		out_dict[host] = results_dict[host].out
		err_dict[host] = results_dict[host].err

	return out_dict, err_dict

def execute_remote_commands_parallel(host, username, cmds):
	out_dict = dict()
	err_dict = dict()
	thread_list = list()
	results_dict = dict()

	for cmd in cmds:
		results = ExecuteResults()
		results_dict[cmd] = results
		thread = Thread(target=execute_remote_command_thread, args=(host, username, cmd, results))
		thread_list.append(thread)
		thread.start()
		time.sleep(1)

	for thread in thread_list:
		thread.join()

	for cmd in cmds:
		out_dict[cmd] = results_dict[cmd].out
		err_dict[cmd] = results_dict[cmd].err

	return out_dict, err_dict


def find_remote_pid(host, process_name):
	cmd = "ps axo pid,command"
	ps_re = re.compile(".*" + process_name + ".*")
	out, err = execute_remote_command_sync(host, USERNAME, cmd)

	for line in out:
			m = ps_re.search(line)

			if not m:
				continue

			return line.strip().split(" ")[0]

	return ""

def kill_cassandra_node(host):
	pid = find_remote_pid(host, "CassandraDaemon")
	if pid != "":
		cmd = "pkill -SIGHUP -f CassandraDaemon"
		execute_remote_command_sync(host, USERNAME, cmd)
		#print "...killed Cassandra, was pid " + pid
	else:
		print "...Cassandra not running on " + host

def sleep_cassandra_node(host):
	pid = find_remote_pid(host, "CassandraDaemon")
	
	if pid != "":
		cmd = "cassandra_sleep"
		execute_remote_command_sync(host, USERNAME, cmd)
	else:
		print "...Cassandra not running on " + host

def wake_cassandra_node(host):
	pid = find_remote_pid(host, "CassandraDaemon")
	
	if pid != "":
		cmd = "cassandra_wake"
		execute_remote_command_sync(host, USERNAME, cmd)
	else:
		print "...Cassandra not running on " + host

def start_cassandra_node(host):
	cmd = "cassandra"
	#print "Starting cassandra on " + host
	out, err = execute_remote_command_sync(host, USERNAME, cmd)
	

def kill_cassandra_cluster(control_node):
	cmd = "cassandra_cluster_kill"
	execute_remote_command_sync(control_node, USERNAME, cmd)
	

def start_cassandra_cluster(control_node):
	cmd = "cassandra_cluster_quickstart"
	execute_remote_command_sync(control_node, USERNAME, cmd)
		
def parasol_s3(control_node, host):
	print "Sending " + host + " to S3..."
	cmd = "sudo parasol --s3 %s" % host
	execute_remote_command_sync(control_node, USERNAME, cmd)

def parasol_wake(control_node, host):
	print "Waking up " + host + "..."
	cmd = "sudo parasol --wake %s" % host
	execute_remote_command_sync(control_node, USERNAME, cmd)
	
def cassandra_nodetool(control_node, action_node, params):
	params_str = params
	
	cmd = "nodetool -host %s %s" % (action_node, params_str)
	#print "Cmd: %s" % cmd
	execute_remote_command_sync(control_node, USERNAME, cmd)

def cassandra_nodetool_direct(action_node, params):
	cmd = "nodetool %s" % params
	#print "Cmd: %s" % cmd
	execute_remote_command_sync(action_node, USERNAME, cmd)

def cassandra_nodetool_parallel(control_node, action_nodes, params):
	params_str = ""
	for p in params:
		params_str += p + " "
		
	cmd = "nodetool -host %s %s" % (action_node, params_str)
	#print "Cmd: %s" % cmds
	execute_remote_command_sync(control_node, USERNAME, cmd)

def interpolate(p1, p2, x):
	x1, y1 = p1
	x2, y2 = p2
	x1 = dt_to_unix(x1)
	x2 = dt_to_unix(x2)
	x = dt_to_unix(x)

	m = float(y2 - y1)/float(x2-x1)
	b = y1 - m*x1
	return m*x + b

def interpolate_dt(p1, p2, x):
	return interpolate(p1, p2, x)

def extrapolate_dt(p1, p2, x):
	x1, y1 = p1
	x2, y2 = p2
	x1 = dt_to_unix(x1)
	x2 = dt_to_unix(x2)
	x = dt_to_unix(x)
		
	y = y1 + ((1.0*x - x1) / (x2 - x1)) * (y2 - y1)
	
	return y

def dt_to_unix(dt):
	return (dt - datetime(1970, 1, 1)).total_seconds()

def normalize(value, maximum):
	return float(value) / float(maximum)

def denormalize(value, maximum):
	return int(math.ceil(float(value) * float(maximum)))

def sleep_delta(delta):
	sleeptime = 0
	sleeptime += 60*60*24*delta.days + 1*delta.seconds + 0.000001*delta.microseconds
	if sleeptime > 0:
		time.sleep(sleeptime)
	
def delta_total_microseconds(delta):
	return (delta.microseconds + (delta.seconds + delta.days * 24 * 3600) * 10**6)

def parse_timestamp(timestamp):
	return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f")

def wait_with_status(sec_to_wait, status_interval=5):
	
	total_waited_ms = 0
	
	while (total_waited_ms / 1000) < sec_to_wait:
		start_dt = datetime.now()
		time.sleep(status_interval)
		sys.stdout.write(". ")
		sys.stdout.flush()
		end_dt = datetime.now()
		diff = end_dt - start_dt
		diff_ms = diff.seconds*1000 + diff.microseconds/1000
		total_waited_ms += diff_ms
	
	sys.stdout.write("\n")
		
		
def calcOffNodes(total_nodes, off_skip):
	remaining = len(MINIMUM_NODES + OPTIONAL_NODES) - total_nodes
	nodes_off = []
	i = 0
	
	while remaining > 0:
		nodes_off.append(OPTIONAL_NODES[i])
		remaining -= 1
		i += off_skip
		
		if (i > len(OPTIONAL_NODES) - 1):
			if off_skip == 2:
				i = 1
			else:
				break
	
	return nodes_off
	
def optionalSortKey(node):
	index = OPTIONAL_NODES.index(node)
	
	if index % 2 == 1:
		key = index + len(OPTIONAL_NODES)
	else:
		key = index
		
	return key


CASSANDRA_JOLOKIA_URL = "http://%s:8778/jolokia/"

def getCassandraJolokiaConn(host, timeout=None):
	url = CASSANDRA_JOLOKIA_URL % host
	if timeout == None:
		jolokia_conn = Jolokia(url, timeout)
	else:
		jolokia_conn = Jolokia(url, timeout=timeout)
	
	return jolokia_conn


def compact_greenhints(node):
	mbean = "org.apache.cassandra.db:type=StorageService"
	operation = "forceTableCompaction"

	try:
		jolokia_conn = getCassandraJolokiaConn(node, timeout=10000000000)
		jolokia_conn.request(type="exec", mbean=mbean, operation=operation, arguments=["system", "GreenHintsColumnFamily"])
	except JolokiaError:
		raise Exception("Could not compact greenhints on %s" % node)

def compact_greenhints_ssh(node):
	cmd = "nodetool compact system GreenHintsColumnFamily"
	execute_remote_command_sync(node, USERNAME, cmd)

# from http://docs.python.org/2/library/itertools.html#recipes
def grouper(iterable, n, fillvalue=None):
    "Collect data into fixed-length chunks or blocks"
    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
    args = [iter(iterable)] * n
    return izip_longest(fillvalue=fillvalue, *args)

# from http://docs.python.org/2/library/bisect.html
def index(a, x):
    'Locate the leftmost value exactly equal to x'
    i = bisect_left(a, x)
    if i != len(a) and a[i] == x:
        return i
    raise ValueError

def find_lt(a, x):
    'Find rightmost value less than x'
    i = bisect_left(a, x)
    if i:
        return a[i-1]
    raise ValueError

def find_le(a, x):
    'Find rightmost value less than or equal to x'
    i = bisect_right(a, x)
    if i:
        return a[i-1]
    raise ValueError

def find_gt(a, x):
    'Find leftmost value greater than x'
    i = bisect_right(a, x)
    if i != len(a):
        return a[i]
    raise ValueError

def find_ge(a, x):
    'Find leftmost item greater than or equal to x'
    i = bisect_left(a, x)
    if i != len(a):
        return a[i]
    raise ValueError

class Transition(object):
	def __init__(self, tid, node, offDt, offIndex, transDt, transIndex, onDt, onIndex): #, nodesReady, integratedWorkload, maxTransitionLatency, avgTransitionWorkload):
		self.tid = tid
		self.node = node
		self.offDt = offDt
		self.offIndex = offIndex
		self.transDt = transDt
		self.transIndex = transIndex
		self.onDt = onDt
		self.onIndex = onIndex
		
		self.nodesReady = -1
		self.integratedOffWorkload = -1
		self.maxTransitionLatency = -1
		self.avgTransitionWorkload = -1
		self.schedDt = transDt
		self.schedIndex = transIndex
	
	def __str__(self):
		return "Transition(tid=%d, node=%s, offDt=%s, transDt=%s, onDt=%s, nodesReady=%d, integratedOffWorkload=%0.2f, maxTransitionLatency=%0.2f, avgTransitionWorkload=%0.2f" % (self.tid, self.node, str(self.offDt), str(self.transDt), str(self.onDt), self.nodesReady, self.integratedOffWorkload, self.maxTransitionLatency, self.avgTransitionWorkload)

class TransitionAnalyzer(object):
	
	def __init__(self, results):
		self.results = results
		self.extractTransitions()
	
	def integrateOffWorkload(self, offIndex, transIndex, timestamps):
		integral = 0.0
		for i in xrange(offIndex+1, transIndex):
			thisDt = timestamps[i]
			lastDt = timestamps[i-1]
			delta = thisDt - lastDt
			seconds = delta.total_seconds()
			
			workload = self.results.getCommonValue(timestamps[i], "actual_workload")
			integral += workload*seconds
		
		return integral / 100000
	
	def maxTransitionLatency(self, transIndex, onIndex, timestamps):
		maxLatency = 0.0
		
		for i in xrange(transIndex, onIndex):
			value = self.results.getCommonValue(timestamps[i], "readlatency_99_window")
			if value > maxLatency:
				maxLatency = value
		
		return maxLatency	
	
	def avgTransitionWorkload(self, transIndex, onIndex, timestamps):
		accum = 0.0
		count = 0
		
		for i in xrange(transIndex, onIndex):
			value = self.results.getCommonValue(timestamps[i], "actual_workload")
			if not math.isnan(value):
				accum += value
				count += 1
		try:
			return accum / count
		except:
			return 0.0


	def postprocessTransitions(self, transitions, timestamps):
		transitions.sort(key=lambda(x):x.onDt)
		transitions.reverse()
		
		for i in xrange(0, len(transitions)-1):		
			transitions[i].transIndex = max(transitions[i].transIndex, transitions[i+1].onIndex)
			transitions[i].transDt = timestamps[transitions[i].transIndex]
		
		for transition in transitions:
			transition.nodesReady = self.results.getCommonValue(transition.transDt, "nodes_ready")
			transition.integratedOffWorkload = self.integrateOffWorkload(transition.offIndex, transition.transIndex, timestamps)
			transition.maxTransitionLatency = self.maxTransitionLatency(transition.transIndex, transition.onIndex, timestamps)
			transition.avgTransitionWorkload = self.avgTransitionWorkload(transition.transIndex, transition.onIndex, timestamps)

	def extractTransitions(self):
		tids = 0
		transitions = []
		timestamps = self.results.availableTimestamps()
		max_i = len(timestamps)
			
		for node in OPTIONAL_NODES:	
			current_i = 0
			while current_i < max_i:
				# find first off time
				offIndex = next((i for i in xrange(current_i, max_i) if self.results.getNodeValue(timestamps[i], node, "state") == 0), None)
				if offIndex == None:
					break
					
				off_dt = timestamps[offIndex]
				
				transIndex = next((i for i in xrange(offIndex, max_i) if self.results.getNodeValue(timestamps[i], node, "state") == 1), None)
				if transIndex == None:
					break
				
				trans_dt = timestamps[transIndex]
				
				onIndex = next((i for i in xrange(transIndex, max_i) if self.results.getNodeValue(timestamps[i], node, "state") == 2), None)
				if onIndex == None:
					onIndex = max_i-1
				else:
					on_dt = timestamps[onIndex]
					transitions.append(Transition(tids, node, off_dt, offIndex, trans_dt, transIndex, on_dt, onIndex))
					tids += 1
				
				current_i = onIndex + 1
				
		self.postprocessTransitions(transitions, timestamps)
		
		transitions.sort(key=lambda(x):x.transDt)
		self.transitions = transitions
	
	def getTransition(self, dt):
		for transition in self.transitions:
			if transition.transDt <= dt and dt <= transition.onDt:
				return transition
		raise(Exception("No match for dt %s..." % str(dt)))
		
	def getTransitionLength(self, dt):
		transition = self.getTransition(dt)
		delta = transition.onDt - transition.transDt
		return delta.total_seconds()
	
	def getOffLength(self, dt):
		transition = self.getTransition(dt)
		delta = transition.transDt - transition.offDt
		return delta.total_seconds()
	
	def getIntegratedOffWorkload(self, dt):
		transition = self.getTransition(dt)
		return transition.integratedOffWorkload
		
	def getMaxTransitionLatency(self, dt):
		transition = self.getTransition(dt)
		return transition.maxTransitionLatency
	
	def getAvgTransitionWorkload(self, dt):
		transition = self.getTransition(dt)
		return transition.avgTransitionWorkload
Tech Fingerprint

Alerts (57)

'import *' Avoid to prevent namespace pollution; import specific names or use aliases
11 13 15
'def' Ensure functions have docstrings for documentation
19 59 96 100 122 146 161 170 179 188 194 199 203 208 213 220 225 234 245 248 259 262 265 268 274 277 280 297 315 517 522 527 531 535
'try:' Ensure try blocks have corresponding except or finally blocks
22
'list(' Avoid unnecessary list conversions; use generators where possible
37 38 103 125
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
51
'time.sleep(' Avoid blocking; use threading.Timer or asyncio.sleep for non-blocking delays
53 90 134 272 286
'raise Exception(' Raise specific exception types for better error handling
56
Complexity hotspot; lines 484 to 485 (total complexity: 3)
484 485
Complexity hotspot; lines 490 to 491 (total complexity: 3)
490 491
Complexity hotspot; lines 496 to 497 (total complexity: 3)
496 497
Complexity hotspot; lines 512 to 513 (total complexity: 3)
512 513