import subprocess import json import datetime import time import calendar import socket import os import re import thread #import MySQLdb import sys from ftplib import FTP DEBUG = False PrintOutDelimeter = ' ' #listOfStats.txt should be in the same folder as this .py file. FILEPATHOFSTATSTOUPDATE= os.path.join(os.path.dirname(__file__), r'listOfStats.txt') LOADBALANCERS = ["vpc-elb-rpdxtmweb", "vpc-elb-rpdxcsweb", 'vpc-elb-rpdxtwem'] EC2MACHINENAMEBUCKETS = {'RPDXCSWEB\d+$':'rpdxcsweb', 'RPDXNUM\d+$':'rpdxnum', 'RPDXREDIS\d+$':'rpdxredis', 'RPDXSQL\d+$':'rpdxsql', 'RPDXTMWEB\d+$':'rpdxtmweb', 'RPDXNUM\d+$':'rpdxnum', 'RPDXNUT\d+$':'rpdxnut', 'RPDXCSV\d+$':'rpdxcsv'} EC2VALIDINSTANCEIDS = [] AWSSERVERNAMESPACES = {'ELB':r'AWS/ELB', 'CUSTOM':'"Custom Metrics"', 'EC2':r'AWS/EC2', 'Dynamo':r'AWS/DynamoDB'} g_startStatTime = None g_currentTime = None # what graphite server we are sending to GRAPHITE_IP = 'tewrgraph1.taketwo.online' GRAPHITE_PORT = 2003 # what internal sql server we are sending to SQL_ADDRESS = "RSGSANFPS2/online" SQL_USER = "online" SQL_PASSWORD = "ewrewrewr" SQL_DB = "online" FTP_HOST="ftp.rockstarsd.com" FTP_USER="online" FTP_PASSWORD="ewrewrewr" FTP_DIRECTORY={'AWS':"GraphiteLogs\\AWS", 'EC2':'GraphiteLogs\\EC2'} THREAD_COUNTER = 0 LIST_OF_THREADS_FINISHED= [] AWS_CSV_STRING = '' EC2_CSV_STRING = '' SOCK = None """ HELPER FUNCTIONS """ def getLatestTimestampEntry(jsonListOfDict): # Actually this gets the 2nd to latest timestamp entry, since the latest one may be incomplete return sorted(jsonListOfDict, key=lambda x: datetime.datetime.strptime(x['Timestamp'], "%Y-%m-%dT%H:%M:%SZ"))[-2] def getAWSStatListFromFile(): # #THIS IS HARDCODING [0] to structured Graphite name of stat # [1] is going to be the stat name to ping AWS with # [2] is going to be --statistics ______ # [3] is going to be the metrics server namespace # f = open(FILEPATHOFSTATSTOUPDATE, 'r') listOfMetricNames = [] for line in f.readlines(): if not line.isspace(): items = line.split(';') i = 0 while i < len(items): items[i] = items[i].strip() i += 1 listOfMetricNames.append(items) f.close() return listOfMetricNames def getSQLTimeString(timeSinceEpoch): return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(timeSinceEpoch)) def getAWSTimeString(): current_time = time.gmtime(int(time.time())) return "%d-%d-%dT%02d:%02d:%02d" % (current_time[0], current_time[1], current_time[2], current_time[3], current_time[4], current_time[5]) def getCSVFileName(statType): timestamp = int(time.time()) if statType == 'AWS': return "aws" + str(timestamp) + ".csv" elif statType == 'EC2': return 'ec2_' + str(timestamp) + '.csv' def sendFileToFTP(f, statType): ftp = FTP(FTP_HOST) ftp.login(user=FTP_USER, passwd=FTP_PASSWORD) ftp.cwd(FTP_DIRECTORY[statType]) ftp.storlines("STOR " + f.name, f) ftp.quit() """ /HELPER FUNCTIONS """ """ STAT CLASSES """ class Stat: #Base Stat class to push to Graphite structuredOutputName = "" timeSinceEpoch = 0 value = None def __init__(self, structuredName): self.structuredOutputName = structuredName self.timeSinceEpoch = int(time.time()) def formatData(self): #example expected formatting: #rsg.stat.stsat.name value timesinceEpcho return self.structuredOutputName + PrintOutDelimeter + str(self.value) + PrintOutDelimeter + str(self.timeSinceEpoch) + "\n" class AWSStat(Stat): #Assumption this class is only going to configure for one stat, per load balancer #variables metricName = "" statisticValueName = "" loadBalancer = "" namespace = "" cmdToPoll = "" def __init__(self, structuredName, metricName, statisticValueName, namespace, loadBalancer=None): Stat.__init__(self, structuredName) self.metricName = metricName self.statisticValueName = statisticValueName self.loadBalancer = loadBalancer self.namespace = namespace def generateCmdToPoll(self): if self.namespace == AWSSERVERNAMESPACES['CUSTOM']: self.cmdToPoll = 'aws cloudwatch get-metric-statistics --namespace %s --metric-name "%s" --statistics %s --start-time %s --end-time %s \ --period 60' % (self.namespace, self.metricName, self.statisticValueName, g_startStatTime, g_currentTime) elif self.namespace == AWSSERVERNAMESPACES['ELB']: self.cmdToPoll = 'aws cloudwatch get-metric-statistics --namespace %s --metric-name "%s" --statistics %s --dimensions \ Name=LoadBalancerName,Value=%s --start-time %s --end-time %s --period 60' % (self.namespace, self.metricName, self.statisticValueName, self.loadBalancer, g_startStatTime, g_currentTime) # print self.cmdToPoll def setValue(self): self.generateCmdToPoll() jsonDict = json.loads(subprocess.check_output(self.cmdToPoll)) if jsonDict['Label'] == self.metricName: #grab the last point to send if len(jsonDict['Datapoints']) == 0: #about to crash, dump something useful #our cmd line is wrong is you see this print self.metricName + self.statisticValueName + self.namespace + self.structuredOutputName print json.dumps(jsonDict, indent=4) return 2 self.value = getLatestTimestampEntry(jsonDict['Datapoints'])['%s' % self.statisticValueName] return 0 return 1 def formatDataForSQL(self): #example expected formatting: #INSERT INTO table VALUES (x, y, z) dbName = "online.dbo.AmazonStats" namespace = self.namespace metric = self.metricName value = self.value date = getSQLTimeString(self.timeSinceEpoch) graphiteNameSpace = self.structuredOutputName return "INSERT INTO " + dbName + " VALUES ('" + namespace + "', '" + metric + "', '" + str(value) + "', '" + str(date) + "', '" + graphiteNameSpace + "')" def formatDataForCSV(self): #example expected formatting: #namespace,metric,value,date,graphiteNameSpace namespace = self.namespace metric = self.metricName value = self.value date = getAWSTimeString() graphiteNameSpace = self.structuredOutputName return namespace + "," + metric + "," + str(value) + "," + str(date) + "," + graphiteNameSpace + '\n' class DynamoStat(AWSStat): operationName = None tableName = 'rsg-cloudsaves-gtav' def __init__(self, listOfInputs): AWSStat.__init__(self, listOfInputs[0], listOfInputs[1], listOfInputs[2], AWSSERVERNAMESPACES[listOfInputs[3]], None) if len(listOfInputs) > 4: self.operationName = listOfInputs[4] self.generateCmdToPoll() # print self.cmdToPoll def generateCmdToPoll(self): if self.namespace == AWSSERVERNAMESPACES['Dynamo']: if self.operationName: #Table Operation Metric self.cmdToPoll = 'aws cloudwatch get-metric-statistics --namespace %s --metric-name "%s" --statistics "%s" --start-time "%s" --end-time "%s" \ --period 60 --dimensions "Name=TableName,Value=%s" "Name=Operation,Value=%s"' % \ (self.namespace, self.metricName, self.statisticValueName, g_startStatTime, g_currentTime, self.tableName, self.operationName) else: #Table Metric self.cmdToPoll = 'aws cloudwatch get-metric-statistics --namespace %s --metric-name "%s" --statistics "%s" --start-time "%s" --end-time "%s" \ --period 60 --dimensions "Name=TableName,Value=%s"' % \ (self.namespace, self.metricName, self.statisticValueName, g_startStatTime, g_currentTime, self.tableName) def formatDataForCSV(self): #example expected formatting: #namespace,metric,value,date,graphiteNameSpace date = getAWSTimeString() if self.operationName: return self.namespace + "," + self.operationName + "," + str(self.value) + "," + str(date) + "," + self.structuredOutputName + "\n" else: return self.namespace + "," + self.metricName + "," + str(self.value) + "," + str(date) + "," + self.structuredOutputName + "\n" class EC2Stat(AWSStat): friendlyEC2Name = "" instanceId = "" def __init__(self, structuredName, metricName, statisticValueName, namespace, friendlyName, instanceId, LoadBalancer=None): AWSStat.__init__(self, structuredName, metricName, statisticValueName, namespace, LoadBalancer) self.friendlyEC2Name = friendlyName self.instanceId = instanceId def generateCmdToPoll(self): if self.namespace == AWSSERVERNAMESPACES['EC2']: self.cmdToPoll = 'aws cloudwatch get-metric-statistics --namespace %s --metric-name "%s" --statistics "%s" --start-time "%s" --end-time "%s" \ --period 300 --dimensions "Name=InstanceId,Value=%s"' % (self.namespace, self.metricName, self.statisticValueName, g_startStatTime, g_currentTime, self.instanceId) def setValue(self): self.generateCmdToPoll() jsonDict = json.loads(subprocess.check_output(self.cmdToPoll)) if jsonDict['Label'] == self.metricName: #grab the last point to send if len(jsonDict['Datapoints']) > 0: #Some machines will have no data self.value = getLatestTimestampEntry(jsonDict['Datapoints'])['%s' % self.statisticValueName] return 0 else: return 2 return 1 def formatDataForCSV(self): #example expected formatting: #friendlyName,instanceId,metric,value,date,graphiteNameSpace namespace = self.namespace metric = self.metricName value = self.value date = getAWSTimeString() graphiteNameSpace = self.structuredOutputName return self.friendlyEC2Name + ',' + self.instanceId + "," + metric + "," + str(value) + "," + str(date) + "," + graphiteNameSpace + '\n' class EC2SummedStatCall(object): buckets = {} averagedValues = {} graphiteBaseNamespace = "" timeSinceEpoch = 0 AWSNamespace = "" AWSStatValueName = "" #Average,Max,Min, etc. MetricName = "" sendIndividualStats = True def __init__(self, graphiteNamespace, metricName, statisticValueName, namespace, sendIndividualStats=True): self.timeSinceEpoch = time.time() self.graphiteBaseNamespace = graphiteNamespace self.MetricName = metricName self.AWSStatValueName = statisticValueName self.AWSNamespace = namespace self.buckets = {} for bucketName in EC2MACHINENAMEBUCKETS: self.buckets[EC2MACHINENAMEBUCKETS[bucketName]] = {'Calls':[],'GraphiteNamespace':self.graphiteBaseNamespace.replace('ec2', 'ec2.%s' % EC2MACHINENAMEBUCKETS[bucketName])} self.averagedValues[EC2MACHINENAMEBUCKETS[bucketName]] = 0 self.createEC2StatCalls() def createEC2StatCalls(self): for EC2Data in EC2VALIDINSTANCEIDS: if self.sendIndividualStats: #LEAVE newStr. Python passes everything as pointers and it's screwing up __init__ of Stat newStr = self.graphiteBaseNamespace.replace('ec2', 'ec2.%s.%s' % (EC2Data[2], EC2Data[0])) self.buckets[EC2Data[2]]['Calls'].append(EC2Stat(newStr, self.MetricName, self.AWSStatValueName, self.AWSNamespace, EC2Data[0], EC2Data[1])) else: newStr = self.graphiteBaseNamespace.replace('ec2', 'ec2.%s' % EC2Data[2]) self.buckets[EC2Data[2]]['Calls'].append(EC2Stat(newStr, self.MetricName, self.AWSStatValueName, self.AWSNamespace, EC2Data[0], EC2Data[1])) def populateBucketCallValues(self): for bucket in self.buckets: if self.buckets[bucket]['Calls']: avgValue = 0 statBearingEC2 = 0 for EC2Call in self.buckets[bucket]['Calls']: #individual call to EC2 machine. Will need to be avg'd across all in order to get a value res = EC2Call.setValue() if res == 0: avgValue += EC2Call.value statBearingEC2 += 1 elif res == 1: print('ERROR SETTINGS EC2 Value') elif res == 2: print("This EC2 didn't have any datapoints") self.averagedValues[bucket] = avgValue / statBearingEC2 def dumpCSVPrint(self): for bucket in self.buckets: for ec2call in self.buckets[bucket]['Calls']: print 'csv format: ' + ec2call.formatDataForCSV() def writeToCSV(self): global EC2_CSV_STRING for bucket in self.buckets: for EC2Call in self.buckets[bucket]['Calls']: EC2_CSV_STRING += EC2Call.formatDataForCSV() def sendData(self, socket): self.populateBucketCallValues() for bucket in self.buckets: if self.sendIndividualStats: #Send Graphite a stat for every InstanceId for EC2Call in self.buckets[bucket]['Calls']: outstring = EC2Call.formatData() print outstring socket.sendall(outstring) else: #Send Graphite a stat for every bucket of EC2 machines outstring = bucket['GraphiteNamespace'] + PrintOutDelimeter + str(self.averagedValues[bucket]) + PrintOutDelimeter + str(self.timeSinceEpoch) + '\n' print outstring socket.sendall(outstring) """ //STAT CLASSES """ """ Multi threaded starting function """ def processEC2SummedStat(threadName, EC2SummedObj, threadID): global SOCK print "%s: is starting processing.\n" % threadName if not DEBUG: EC2SummedObj.sendData(SOCK) EC2SummedObj.writeToCSV() print "%s: is finished processing.\n" % threadName LIST_OF_THREADS_FINISHED[threadID] = True """ //Multi threaded starting function """ """ ENTRY POINT """ if __name__ == "__main__": #entry point #Get active EC2 Machines ec2Instances = json.loads( subprocess.check_output("aws ec2 describe-instances") ) # print json.dumps(ec2Instances['Reservations'][5], indent=4) for resv in ec2Instances['Reservations']: for inst in resv['Instances']: instanceTags = inst.get('Tags') if instanceTags and len(instanceTags) and instanceTags[0]['Value']: for regex in EC2MACHINENAMEBUCKETS: #Looking at the first one always? IDK if this will be a problem with multiple sets of tags? matchObj = re.match(regex, instanceTags[0]['Value'], flags=0) if matchObj: if inst['State']['Name'] == 'running': #only want active servers #Friendly ServerName : instanceID EC2VALIDINSTANCEIDS.append((instanceTags[0]['Value'], inst['InstanceId'], EC2MACHINENAMEBUCKETS[regex])) break #connect to servers # graphite web server if not DEBUG: SOCK = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: SOCK.connect((GRAPHITE_IP, GRAPHITE_PORT)) except: print("error could not connect to graphite server") sys.exit(1) #internal sql db #try: # sqlConnection = MySQLdb.connect(host=SQL_ADDRESS, user=SQL_USER, passwd=SQL_PASSWORD, db=SQL_DB) #except Exception as e: # print("error could not connect to SQL database") # print(e) # sys.exit(1) #csv file fAWSname = getCSVFileName('AWS') awsFile = file(fAWSname, "w") fEC2name = getCSVFileName('EC2') ec2File = file(fEC2name, 'w') #sqlCursor = sqlConnection.cursor() """ DO ALL THE WORK - Took out the while loop to put this into a scheduled task """ #read file every time so script doesn't need to be restarted g_currentTime = time.gmtime(int(time.time())) dt = datetime.datetime.fromtimestamp(time.mktime(g_currentTime)) #[THP] Why we have to subtract this hour? blame python pastStructTime = (dt - datetime.timedelta(minutes=5) - datetime.timedelta(hours=1)).timetuple() g_currentTime = "%d-%d-%dT%02d:%02d:%02d" % (g_currentTime[0], g_currentTime[1], g_currentTime[2], g_currentTime[3], g_currentTime[4], g_currentTime[5]) g_startStatTime = "%d-%d-%dT%02d:%02d:%02d" % (pastStructTime[0], pastStructTime[1], pastStructTime[2], pastStructTime[3], pastStructTime[4], pastStructTime[5]) # print g_currentTime # print g_startStatTime #CREATE THE CALLS AWSStatRequests = [] # EC2StatRequests = [] statNames = getAWSStatListFromFile() for stat in statNames: if AWSSERVERNAMESPACES[stat[3]] == AWSSERVERNAMESPACES['EC2']: #Creating EC2Stat #TODO: If we want to dump into buckets, add False to the __init__ args # Just do everything here. Python likes to copy our dictionaries to point all instance's object to a single self.buckets obj = EC2SummedStatCall(stat[0], stat[1], stat[2], AWSSERVERNAMESPACES[stat[3]] ) LIST_OF_THREADS_FINISHED.append(False) thread.start_new_thread(processEC2SummedStat, ("Thread-%d" % THREAD_COUNTER, obj, THREAD_COUNTER)) THREAD_COUNTER += 1 else: #Creating AWSStat if stat[3] == 'ELB': #Make two jobs for loadbalancer specific jobs for lb in LOADBALANCERS: AWSStatRequests.append(AWSStat(stat[0].replace('elb','elb.%s' % lb), stat[1], stat[2], AWSSERVERNAMESPACES[stat[3]], lb)) elif stat[3] == 'CUSTOM': #Doesn't matter which loadbalancer we ask as they going to reference third party stat pool AWSStatRequests.append(AWSStat(stat[0], stat[1], stat[2], AWSSERVERNAMESPACES[stat[3]])) elif stat[3] == 'Dynamo': AWSStatRequests.append(DynamoStat(stat)) # print stat[0] # print AWSStatRequests[-1].timeSinceEpoch # print len(AWSStatRequests) # print len(EC2StatRequests) #GET THAT DATA for AWSCall in AWSStatRequests: if AWSCall.setValue(): print('ERROR WHEN SETTING THE AWS VALUE') continue #SEND THAT DATA outstring = AWSCall.formatData() #debug printing to console so we can check status #send to Graphite if not DEBUG: SOCK.sendall(outstring) print(outstring) #send the data to the sql server #dbstring = AWSCall.formatDataForSQL() #print(dbstring) #sqlConnection.execute(dbstring) #sqlConnection.commit() #send the data to a csv file csvstring = AWSCall.formatDataForCSV() AWS_CSV_STRING += (csvstring) #Wait for all threads to be done processing while (True): bAllThreadsDone = True for thread in LIST_OF_THREADS_FINISHED: if thread == False: bAllThreadsDone = False break if bAllThreadsDone: break print 'All threads arent done.' threadStatuses = '' for item in LIST_OF_THREADS_FINISHED: threadStatuses += str(item) + ' ' print threadStatuses time.sleep(5) #Write to files now that all the threads are done awsFile.write(AWS_CSV_STRING) awsFile.close() if not DEBUG: sendFileToFTP(file(fAWSname, "r"),'AWS') ec2File.write(EC2_CSV_STRING) ec2File.close() if not DEBUG: sendFileToFTP(file(fEC2name, 'r'),'EC2') #close down print("Script terminating") if not DEBUG: SOCK.close() #os.remove(fAWSname) #os.remove(fEC2name) """ //ENTRY POINT """