Files
2025-09-29 00:52:08 +02:00

534 lines
21 KiB
Python
Executable File

import subprocess
import json
import datetime
import time
import calendar
import socket
import os
import re
import thread
#import MySQLdb
import sys
from ftplib import FTP
DEBUG = False
PrintOutDelimeter = ' '
#listOfStats.txt should be in the same folder as this .py file.
FILEPATHOFSTATSTOUPDATE= os.path.join(os.path.dirname(__file__), r'listOfStats.txt')
LOADBALANCERS = ["vpc-elb-rpdxtmweb", "vpc-elb-rpdxcsweb", 'vpc-elb-rpdxtwem']
EC2MACHINENAMEBUCKETS = {'RPDXCSWEB\d+$':'rpdxcsweb',
'RPDXNUM\d+$':'rpdxnum',
'RPDXREDIS\d+$':'rpdxredis',
'RPDXSQL\d+$':'rpdxsql',
'RPDXTMWEB\d+$':'rpdxtmweb',
'RPDXNUM\d+$':'rpdxnum',
'RPDXNUT\d+$':'rpdxnut',
'RPDXCSV\d+$':'rpdxcsv'}
EC2VALIDINSTANCEIDS = []
AWSSERVERNAMESPACES = {'ELB':r'AWS/ELB', 'CUSTOM':'"Custom Metrics"', 'EC2':r'AWS/EC2', 'Dynamo':r'AWS/DynamoDB'}
g_startStatTime = None
g_currentTime = None
# what graphite server we are sending to
GRAPHITE_IP = 'tewrgraph1.taketwo.online'
GRAPHITE_PORT = 2003
# what internal sql server we are sending to
SQL_ADDRESS = "RSGSANFPS2/online"
SQL_USER = "online"
SQL_PASSWORD = "ewrewrewr"
SQL_DB = "online"
FTP_HOST="ftp.rockstarsd.com"
FTP_USER="online"
FTP_PASSWORD="ewrewrewr"
FTP_DIRECTORY={'AWS':"GraphiteLogs\\AWS", 'EC2':'GraphiteLogs\\EC2'}
THREAD_COUNTER = 0
LIST_OF_THREADS_FINISHED= []
AWS_CSV_STRING = ''
EC2_CSV_STRING = ''
SOCK = None
"""
HELPER FUNCTIONS
"""
def getLatestTimestampEntry(jsonListOfDict):
# Actually this gets the 2nd to latest timestamp entry, since the latest one may be incomplete
return sorted(jsonListOfDict, key=lambda x: datetime.datetime.strptime(x['Timestamp'], "%Y-%m-%dT%H:%M:%SZ"))[-2]
def getAWSStatListFromFile():
#
#THIS IS HARDCODING [0] to structured Graphite name of stat
# [1] is going to be the stat name to ping AWS with
# [2] is going to be --statistics ______
# [3] is going to be the metrics server namespace
#
f = open(FILEPATHOFSTATSTOUPDATE, 'r')
listOfMetricNames = []
for line in f.readlines():
if not line.isspace():
items = line.split(';')
i = 0
while i < len(items):
items[i] = items[i].strip()
i += 1
listOfMetricNames.append(items)
f.close()
return listOfMetricNames
def getSQLTimeString(timeSinceEpoch):
return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(timeSinceEpoch))
def getAWSTimeString():
current_time = time.gmtime(int(time.time()))
return "%d-%d-%dT%02d:%02d:%02d" % (current_time[0], current_time[1], current_time[2], current_time[3], current_time[4], current_time[5])
def getCSVFileName(statType):
timestamp = int(time.time())
if statType == 'AWS':
return "aws" + str(timestamp) + ".csv"
elif statType == 'EC2':
return 'ec2_' + str(timestamp) + '.csv'
def sendFileToFTP(f, statType):
ftp = FTP(FTP_HOST)
ftp.login(user=FTP_USER, passwd=FTP_PASSWORD)
ftp.cwd(FTP_DIRECTORY[statType])
ftp.storlines("STOR " + f.name, f)
ftp.quit()
"""
/HELPER FUNCTIONS
"""
"""
STAT CLASSES
"""
class Stat:
#Base Stat class to push to Graphite
structuredOutputName = ""
timeSinceEpoch = 0
value = None
def __init__(self, structuredName):
self.structuredOutputName = structuredName
self.timeSinceEpoch = int(time.time())
def formatData(self):
#example expected formatting:
#rsg.stat.stsat.name value timesinceEpcho
return self.structuredOutputName + PrintOutDelimeter + str(self.value) + PrintOutDelimeter + str(self.timeSinceEpoch) + "\n"
class AWSStat(Stat):
#Assumption this class is only going to configure for one stat, per load balancer
#variables
metricName = ""
statisticValueName = ""
loadBalancer = ""
namespace = ""
cmdToPoll = ""
def __init__(self, structuredName, metricName, statisticValueName, namespace, loadBalancer=None):
Stat.__init__(self, structuredName)
self.metricName = metricName
self.statisticValueName = statisticValueName
self.loadBalancer = loadBalancer
self.namespace = namespace
def generateCmdToPoll(self):
if self.namespace == AWSSERVERNAMESPACES['CUSTOM']:
self.cmdToPoll = 'aws cloudwatch get-metric-statistics --namespace %s --metric-name "%s" --statistics %s --start-time %s --end-time %s \
--period 60' % (self.namespace, self.metricName, self.statisticValueName, g_startStatTime, g_currentTime)
elif self.namespace == AWSSERVERNAMESPACES['ELB']:
self.cmdToPoll = 'aws cloudwatch get-metric-statistics --namespace %s --metric-name "%s" --statistics %s --dimensions \
Name=LoadBalancerName,Value=%s --start-time %s --end-time %s --period 60' % (self.namespace, self.metricName, self.statisticValueName, self.loadBalancer, g_startStatTime, g_currentTime)
# print self.cmdToPoll
def setValue(self):
self.generateCmdToPoll()
jsonDict = json.loads(subprocess.check_output(self.cmdToPoll))
if jsonDict['Label'] == self.metricName:
#grab the last point to send
if len(jsonDict['Datapoints']) == 0:
#about to crash, dump something useful
#our cmd line is wrong is you see this
print self.metricName + self.statisticValueName + self.namespace + self.structuredOutputName
print json.dumps(jsonDict, indent=4)
return 2
self.value = getLatestTimestampEntry(jsonDict['Datapoints'])['%s' % self.statisticValueName]
return 0
return 1
def formatDataForSQL(self):
#example expected formatting:
#INSERT INTO table VALUES (x, y, z)
dbName = "online.dbo.AmazonStats"
namespace = self.namespace
metric = self.metricName
value = self.value
date = getSQLTimeString(self.timeSinceEpoch)
graphiteNameSpace = self.structuredOutputName
return "INSERT INTO " + dbName + " VALUES ('" + namespace + "', '" + metric + "', '" + str(value) + "', '" + str(date) + "', '" + graphiteNameSpace + "')"
def formatDataForCSV(self):
#example expected formatting:
#namespace,metric,value,date,graphiteNameSpace
namespace = self.namespace
metric = self.metricName
value = self.value
date = getAWSTimeString()
graphiteNameSpace = self.structuredOutputName
return namespace + "," + metric + "," + str(value) + "," + str(date) + "," + graphiteNameSpace + '\n'
class DynamoStat(AWSStat):
operationName = None
tableName = 'rsg-cloudsaves-gtav'
def __init__(self, listOfInputs):
AWSStat.__init__(self, listOfInputs[0], listOfInputs[1], listOfInputs[2], AWSSERVERNAMESPACES[listOfInputs[3]], None)
if len(listOfInputs) > 4:
self.operationName = listOfInputs[4]
self.generateCmdToPoll()
# print self.cmdToPoll
def generateCmdToPoll(self):
if self.namespace == AWSSERVERNAMESPACES['Dynamo']:
if self.operationName:
#Table Operation Metric
self.cmdToPoll = 'aws cloudwatch get-metric-statistics --namespace %s --metric-name "%s" --statistics "%s" --start-time "%s" --end-time "%s" \
--period 60 --dimensions "Name=TableName,Value=%s" "Name=Operation,Value=%s"' % \
(self.namespace, self.metricName, self.statisticValueName, g_startStatTime, g_currentTime, self.tableName, self.operationName)
else:
#Table Metric
self.cmdToPoll = 'aws cloudwatch get-metric-statistics --namespace %s --metric-name "%s" --statistics "%s" --start-time "%s" --end-time "%s" \
--period 60 --dimensions "Name=TableName,Value=%s"' % \
(self.namespace, self.metricName, self.statisticValueName, g_startStatTime, g_currentTime, self.tableName)
def formatDataForCSV(self):
#example expected formatting:
#namespace,metric,value,date,graphiteNameSpace
date = getAWSTimeString()
if self.operationName:
return self.namespace + "," + self.operationName + "," + str(self.value) + "," + str(date) + "," + self.structuredOutputName + "\n"
else:
return self.namespace + "," + self.metricName + "," + str(self.value) + "," + str(date) + "," + self.structuredOutputName + "\n"
class EC2Stat(AWSStat):
friendlyEC2Name = ""
instanceId = ""
def __init__(self, structuredName, metricName, statisticValueName, namespace, friendlyName, instanceId, LoadBalancer=None):
AWSStat.__init__(self, structuredName, metricName, statisticValueName, namespace, LoadBalancer)
self.friendlyEC2Name = friendlyName
self.instanceId = instanceId
def generateCmdToPoll(self):
if self.namespace == AWSSERVERNAMESPACES['EC2']:
self.cmdToPoll = 'aws cloudwatch get-metric-statistics --namespace %s --metric-name "%s" --statistics "%s" --start-time "%s" --end-time "%s" \
--period 300 --dimensions "Name=InstanceId,Value=%s"' % (self.namespace, self.metricName, self.statisticValueName, g_startStatTime, g_currentTime, self.instanceId)
def setValue(self):
self.generateCmdToPoll()
jsonDict = json.loads(subprocess.check_output(self.cmdToPoll))
if jsonDict['Label'] == self.metricName:
#grab the last point to send
if len(jsonDict['Datapoints']) > 0:
#Some machines will have no data
self.value = getLatestTimestampEntry(jsonDict['Datapoints'])['%s' % self.statisticValueName]
return 0
else:
return 2
return 1
def formatDataForCSV(self):
#example expected formatting:
#friendlyName,instanceId,metric,value,date,graphiteNameSpace
namespace = self.namespace
metric = self.metricName
value = self.value
date = getAWSTimeString()
graphiteNameSpace = self.structuredOutputName
return self.friendlyEC2Name + ',' + self.instanceId + "," + metric + "," + str(value) + "," + str(date) + "," + graphiteNameSpace + '\n'
class EC2SummedStatCall(object):
buckets = {}
averagedValues = {}
graphiteBaseNamespace = ""
timeSinceEpoch = 0
AWSNamespace = ""
AWSStatValueName = "" #Average,Max,Min, etc.
MetricName = ""
sendIndividualStats = True
def __init__(self, graphiteNamespace, metricName, statisticValueName, namespace, sendIndividualStats=True):
self.timeSinceEpoch = time.time()
self.graphiteBaseNamespace = graphiteNamespace
self.MetricName = metricName
self.AWSStatValueName = statisticValueName
self.AWSNamespace = namespace
self.buckets = {}
for bucketName in EC2MACHINENAMEBUCKETS:
self.buckets[EC2MACHINENAMEBUCKETS[bucketName]] = {'Calls':[],'GraphiteNamespace':self.graphiteBaseNamespace.replace('ec2', 'ec2.%s' % EC2MACHINENAMEBUCKETS[bucketName])}
self.averagedValues[EC2MACHINENAMEBUCKETS[bucketName]] = 0
self.createEC2StatCalls()
def createEC2StatCalls(self):
for EC2Data in EC2VALIDINSTANCEIDS:
if self.sendIndividualStats:
#LEAVE newStr. Python passes everything as pointers and it's screwing up __init__ of Stat
newStr = self.graphiteBaseNamespace.replace('ec2', 'ec2.%s.%s' % (EC2Data[2], EC2Data[0]))
self.buckets[EC2Data[2]]['Calls'].append(EC2Stat(newStr, self.MetricName, self.AWSStatValueName, self.AWSNamespace, EC2Data[0], EC2Data[1]))
else:
newStr = self.graphiteBaseNamespace.replace('ec2', 'ec2.%s' % EC2Data[2])
self.buckets[EC2Data[2]]['Calls'].append(EC2Stat(newStr, self.MetricName, self.AWSStatValueName, self.AWSNamespace, EC2Data[0], EC2Data[1]))
def populateBucketCallValues(self):
for bucket in self.buckets:
if self.buckets[bucket]['Calls']:
avgValue = 0
statBearingEC2 = 0
for EC2Call in self.buckets[bucket]['Calls']:
#individual call to EC2 machine. Will need to be avg'd across all in order to get a value
res = EC2Call.setValue()
if res == 0:
avgValue += EC2Call.value
statBearingEC2 += 1
elif res == 1:
print('ERROR SETTINGS EC2 Value')
elif res == 2:
print("This EC2 didn't have any datapoints")
self.averagedValues[bucket] = avgValue / statBearingEC2
def dumpCSVPrint(self):
for bucket in self.buckets:
for ec2call in self.buckets[bucket]['Calls']:
print 'csv format: ' + ec2call.formatDataForCSV()
def writeToCSV(self):
global EC2_CSV_STRING
for bucket in self.buckets:
for EC2Call in self.buckets[bucket]['Calls']:
EC2_CSV_STRING += EC2Call.formatDataForCSV()
def sendData(self, socket):
self.populateBucketCallValues()
for bucket in self.buckets:
if self.sendIndividualStats:
#Send Graphite a stat for every InstanceId
for EC2Call in self.buckets[bucket]['Calls']:
outstring = EC2Call.formatData()
print outstring
socket.sendall(outstring)
else:
#Send Graphite a stat for every bucket of EC2 machines
outstring = bucket['GraphiteNamespace'] + PrintOutDelimeter + str(self.averagedValues[bucket]) + PrintOutDelimeter + str(self.timeSinceEpoch) + '\n'
print outstring
socket.sendall(outstring)
"""
//STAT CLASSES
"""
"""
Multi threaded starting function
"""
def processEC2SummedStat(threadName, EC2SummedObj, threadID):
global SOCK
print "%s: is starting processing.\n" % threadName
if not DEBUG:
EC2SummedObj.sendData(SOCK)
EC2SummedObj.writeToCSV()
print "%s: is finished processing.\n" % threadName
LIST_OF_THREADS_FINISHED[threadID] = True
"""
//Multi threaded starting function
"""
"""
ENTRY POINT
"""
if __name__ == "__main__":
#entry point
#Get active EC2 Machines
ec2Instances = json.loads(
subprocess.check_output("aws ec2 describe-instances")
)
# print json.dumps(ec2Instances['Reservations'][5], indent=4)
for resv in ec2Instances['Reservations']:
for inst in resv['Instances']:
instanceTags = inst.get('Tags')
if instanceTags and len(instanceTags) and instanceTags[0]['Value']:
for regex in EC2MACHINENAMEBUCKETS:
#Looking at the first one always? IDK if this will be a problem with multiple sets of tags?
matchObj = re.match(regex, instanceTags[0]['Value'], flags=0)
if matchObj:
if inst['State']['Name'] == 'running':
#only want active servers
#Friendly ServerName : instanceID
EC2VALIDINSTANCEIDS.append((instanceTags[0]['Value'], inst['InstanceId'], EC2MACHINENAMEBUCKETS[regex]))
break
#connect to servers
# graphite web server
if not DEBUG:
SOCK = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
SOCK.connect((GRAPHITE_IP, GRAPHITE_PORT))
except:
print("error could not connect to graphite server")
sys.exit(1)
#internal sql db
#try:
# sqlConnection = MySQLdb.connect(host=SQL_ADDRESS, user=SQL_USER, passwd=SQL_PASSWORD, db=SQL_DB)
#except Exception as e:
# print("error could not connect to SQL database")
# print(e)
# sys.exit(1)
#csv file
fAWSname = getCSVFileName('AWS')
awsFile = file(fAWSname, "w")
fEC2name = getCSVFileName('EC2')
ec2File = file(fEC2name, 'w')
#sqlCursor = sqlConnection.cursor()
"""
DO ALL THE WORK - Took out the while loop to put this into a scheduled task
"""
#read file every time so script doesn't need to be restarted
g_currentTime = time.gmtime(int(time.time()))
dt = datetime.datetime.fromtimestamp(time.mktime(g_currentTime))
#[THP] Why we have to subtract this hour? blame python
pastStructTime = (dt - datetime.timedelta(minutes=5) - datetime.timedelta(hours=1)).timetuple()
g_currentTime = "%d-%d-%dT%02d:%02d:%02d" % (g_currentTime[0], g_currentTime[1], g_currentTime[2], g_currentTime[3], g_currentTime[4], g_currentTime[5])
g_startStatTime = "%d-%d-%dT%02d:%02d:%02d" % (pastStructTime[0], pastStructTime[1], pastStructTime[2], pastStructTime[3], pastStructTime[4], pastStructTime[5])
# print g_currentTime
# print g_startStatTime
#CREATE THE CALLS
AWSStatRequests = []
# EC2StatRequests = []
statNames = getAWSStatListFromFile()
for stat in statNames:
if AWSSERVERNAMESPACES[stat[3]] == AWSSERVERNAMESPACES['EC2']:
#Creating EC2Stat
#TODO: If we want to dump into buckets, add False to the __init__ args
# Just do everything here. Python likes to copy our dictionaries to point all instance's object to a single self.buckets
obj = EC2SummedStatCall(stat[0], stat[1], stat[2], AWSSERVERNAMESPACES[stat[3]] )
LIST_OF_THREADS_FINISHED.append(False)
thread.start_new_thread(processEC2SummedStat, ("Thread-%d" % THREAD_COUNTER, obj, THREAD_COUNTER))
THREAD_COUNTER += 1
else:
#Creating AWSStat
if stat[3] == 'ELB':
#Make two jobs for loadbalancer specific jobs
for lb in LOADBALANCERS:
AWSStatRequests.append(AWSStat(stat[0].replace('elb','elb.%s' % lb), stat[1], stat[2], AWSSERVERNAMESPACES[stat[3]], lb))
elif stat[3] == 'CUSTOM':
#Doesn't matter which loadbalancer we ask as they going to reference third party stat pool
AWSStatRequests.append(AWSStat(stat[0], stat[1], stat[2], AWSSERVERNAMESPACES[stat[3]]))
elif stat[3] == 'Dynamo':
AWSStatRequests.append(DynamoStat(stat))
# print stat[0]
# print AWSStatRequests[-1].timeSinceEpoch
# print len(AWSStatRequests)
# print len(EC2StatRequests)
#GET THAT DATA
for AWSCall in AWSStatRequests:
if AWSCall.setValue():
print('ERROR WHEN SETTING THE AWS VALUE')
continue
#SEND THAT DATA
outstring = AWSCall.formatData()
#debug printing to console so we can check status
#send to Graphite
if not DEBUG:
SOCK.sendall(outstring)
print(outstring)
#send the data to the sql server
#dbstring = AWSCall.formatDataForSQL()
#print(dbstring)
#sqlConnection.execute(dbstring)
#sqlConnection.commit()
#send the data to a csv file
csvstring = AWSCall.formatDataForCSV()
AWS_CSV_STRING += (csvstring)
#Wait for all threads to be done processing
while (True):
bAllThreadsDone = True
for thread in LIST_OF_THREADS_FINISHED:
if thread == False:
bAllThreadsDone = False
break
if bAllThreadsDone:
break
print 'All threads arent done.'
threadStatuses = ''
for item in LIST_OF_THREADS_FINISHED:
threadStatuses += str(item) + ' '
print threadStatuses
time.sleep(5)
#Write to files now that all the threads are done
awsFile.write(AWS_CSV_STRING)
awsFile.close()
if not DEBUG:
sendFileToFTP(file(fAWSname, "r"),'AWS')
ec2File.write(EC2_CSV_STRING)
ec2File.close()
if not DEBUG:
sendFileToFTP(file(fEC2name, 'r'),'EC2')
#close down
print("Script terminating")
if not DEBUG:
SOCK.close()
#os.remove(fAWSname)
#os.remove(fEC2name)
"""
//ENTRY POINT
"""