gtav-src/tools_ng/script/coding/memory/compare_heaps.py

import sys
import argparse
import re

SUPPORTED_DUMP_VERSION=4

# Globals
g_platform = []
g_build_version = []

class Function:
	def __init__(self, name, size):
		self.name = name
		self.size = size

class Entry:
	def __init__(self, func, incl_size, excl_size, count, buckets):
		self.func = func
		self.incl_size = incl_size
		self.excl_size = excl_size
		self.count = count
		self.children = []
		self.buckets = buckets

	def add(self, op):
		return Entry( self.func, self.incl_size + op.incl_size, self.excl_size + op.excl_size, self.count + op.count, sorted( list( set( self.buckets + op.buckets ) ) ) )

	def sub(self, op):
		return Entry( self.func, self.incl_size - op.incl_size, self.excl_size - op.excl_size, self.count - op.count, sorted( list( set( self.buckets + op.buckets ) ) ) )

	def __neg__(self):
		return Entry( self.func, -self.incl_size, -self.excl_size, -self.count, self.buckets )

def get_level(line):
	level = 0
	while level < len(line) and line[level] == ' ':
		level += 1
	return level

def strip_entry(prefix, entry):
	if not prefix in entry.func:
		return entry

	value = prefix
	start = entry.func.rfind('>::')
	if start >= 0:
		end = entry.func.find('(', start)
		value += entry.func[start + 1:end]

	entry.func = value;
	return entry

def read_entries(filename, args, metadata, header):
	file = open( filename, 'r' )
	stack = []
	root = None

	xbox = False

	for line in file.readlines():
		# Does this line contain meta information?
		if line[0] == '#':
			header.append(line.strip())
			# This is meta data. Process it, then skip the line.
			# Meta data is a key/value pair, so let's get them both.
			m = re.match(r"\s*(?P<key>[^\:]+):\s*(?P<value>.+)", line[1:])
			if m.lastindex < 1:
				print 'Warning - unable to parse metadata ' + line[1:]
			else:
				key = m.group('key').strip().lower()
				value = m.group('value').strip()

				metadata[key] = value
				if 'platform' in key and 'Xbox 360' in value:
					xbox = True

				if key == 'dumpversion':
					if (int(value) > SUPPORTED_DUMP_VERSION):
						print "WARNING: Input used by this report was generated by a new version of memvisualize (using dumpversion " + value + "). Ensure this script is still compatible, then update SUPPORTED_DUMP_VERSION."
			continue

		# Get rid of whitespace
		buckets = []

		if (line.strip().startswith('[')):
			parts = [ part.strip() for part in line.split('|') ]
			if (len(parts) > 3):
				buckets = [ bucket.strip() for bucket in parts[3].split(',') ]

			func = parts[0] + " | " + parts[1]
			alloc = parts[2]
		else:
			parts = [ part.strip() for part in line.split('|') ]
			func, alloc = parts[:2]

			if len(parts) > 2:
				buckets = [ bucket.strip() for bucket in parts[2].split(',') ]

		if args.noparams:
			func = re.sub( r'\(.*?\)', '()', func )
			func = re.sub( r'<.*?>', '<>', func )
		incl_size, count = [ int( x.strip() ) for x in alloc.split()[0], alloc.split()[2] ]
		entry = Entry( func, incl_size, None, count, buckets )

		level = get_level( line )
		if level == 0:
			root = entry
		else:
			del stack[ level: ]
			stack[-1].children.append( entry )

		if args.storage_details:
			caller, match = None, None
			if func == 'atArray':
				caller = stack[-1].func
				match = re.match( r'rage::atArray<(.*)>::Construct', caller )
				match = match or re.match( r'rage::atArray<(.*)>::Resize', caller )
				match = match or re.match( r'rage::atArray<(.*)>::Reserve', caller )
			elif 'rage::fwBasePool::AllocStorage' in func:
				caller = stack[-3].func
				match = re.match( r'(.*)::InitPool', stack[-3].func )
				match = match or re.match( r'rage::fwAssetStore<(.*)>::FinalizeSize', caller )

			storagetype = (match and match.group(1)) or caller
			if storagetype:
				entry.func = '{} [{}]'.format( func, storagetype )

		stack.append( entry )

		# Fucking shit-ass hack
		if xbox == True and args.xboxhack:
			candidates = ['atArray', 'rage::atArray', 'atMapMemory', 'rage::atMapMemory', 'atPtrCreator', 'rage::atPtrCreator']

			for name in candidates:
				if func.startswith(name):
					i = 0
					queue = []
					while i < 4:
						item = strip_entry(name, stack.pop())
						queue.append(item)
						i += 1

					while len(queue) > 0:
						item = queue.pop()
						stack.append(item)
	return root

def compute_exclusive_sizes(entry):
	entry.excl_size = entry.incl_size
	for child in entry.children:
		entry.excl_size -= child.incl_size

	for child in entry.children:
		compute_exclusive_sizes(child)

def get_func_key(entry):
	bucket = get_bucket_key(entry)
	if bucket == None:
		return entry.func

	return entry.func + ' | ' + bucket

def append_func_dict(entry, funcs):

	func_key = get_func_key(entry)

	if func_key in funcs:
		funcs[ func_key ] = funcs[ func_key ].add( entry )
	else:
		funcs[ func_key ] = Entry( entry.func, entry.incl_size, entry.excl_size, entry.count, entry.buckets )

	for child in entry.children:
		append_func_dict( child, funcs )

## custom sort
def comparator(x, y):
	if getsize(x) != getsize(y):
		if getsize(x) < getsize(y):
			return -1
		else:
			return 1

	return cmp(x.func.lower(), y.func.lower())

def compare_func_dicts(funcs0, funcs1, getsize):
	summary = []
	for func in funcs0:
		if func in funcs1:
			summary.append( funcs1[func].sub( funcs0[func] ) )
		else:
			summary.append( -funcs0[func] )
	for func in funcs1:
		if func not in funcs0:
			summary.append( funcs1[func] )

	summary.sort( comparator )

	return summary

def compare_functions(funcs0, funcs1, getsize):
	results = []
	for func in funcs0:
		buckets = funcs0[func].buckets
		if len(buckets) > 0:
			if func in funcs1:
				entry = funcs1[func].sub( funcs0[func] )
				if getsize(entry) != 0:
					data = Function(entry.func, getsize(entry))
					results.append(data)
			else:
				entry = -funcs0[func]
				if getsize(entry) != 0:
					data = Function(entry.func, getsize(entry))
					results.append(data)
	for func in funcs1:
		buckets = funcs1[func].buckets
		if len(buckets) > 0:
			if func not in funcs0:
				entry = funcs1[func]
				if getsize(entry) != 0:
					data = Function(entry.func, getsize(entry))
					results.append(data)

	return results

def get_bucket_key(entry):
	if len(entry.buckets) == 0:
		return None

	buckets = entry.buckets
	bucket = buckets[0]

	i = 1
	while i < len(buckets):
		bucket += ' | '
		bucket += buckets[i]
		i += 1

	return bucket

def compare_buckets(funcs0, funcs1, getsize):
	results = {}
	for func in funcs0:
		bucket = get_bucket_key(funcs0[func])
		if bucket != None:
			if results.get(bucket) == None:
				results[bucket] = 0

			if func in funcs1:
				entry = funcs1[func].sub( funcs0[func] )
				if getsize(entry) != 0:
					results[bucket] += getsize(entry)
			else:
				entry = -funcs0[func]
				if getsize(entry) != 0:
					results[bucket] += getsize(entry)
	for func in funcs1:
		if func not in funcs0:
			entry = funcs1[func]
			if getsize(entry) != 0:
				bucket = get_bucket_key(funcs1[func])
				if bucket != None:
					if results.get(bucket) == None:
						results[bucket] = 0
					results[bucket] += getsize(entry)

	return results

def alert_on_mismatch(metadatas):
	# Platform
	if 'platform' in metadatas[0] and 'platform' in metadatas[1]:
		global g_platform
		g_platform.append(metadatas[0]["platform"])
		g_platform.append(metadatas[1]["platform"])

		plat0 = metadatas[0]["platform"]
		plat1 = metadatas[1]["platform"]
		if plat0 != "" and plat1 != "":
			if plat0 != plat1:
				print "WARNING: Comparing different platforms: " + plat0 + " vs " + plat1

	if 'configuration' in metadatas[0] and 'configuration' in metadatas[1]:
		config0 = metadatas[0]["configuration"]
		config1 = metadatas[1]["configuration"]
		if config0 != "" and config1 != "":
			if config0 != config1:
				print "WARNING: Comparing different configurations: " + config0 + " vs " + config1

	if 'packagetype' in metadatas[0] and 'packagetype' in metadatas[1]:
		pack0 = metadatas[0]["packagetype"]
		pack1 = metadatas[1]["packagetype"]
		if pack0 != "" and pack1 != "":
			if pack0 != pack1:
				print "WARNING: Comparing different package types: " + pack0 + " vs " + pack1

	# Build Version
	if 'buildversion' in metadatas[0] and 'buildversion' in metadatas[1]:
		global g_build_version
		g_build_version.append(metadatas[0]["buildversion"])
		g_build_version.append(metadatas[1]["buildversion"])

		build0 = metadatas[0]["buildversion"]
		build1 = metadatas[1]["buildversion"]
		if build0 != "" and build1 != "":
			if build0 != build1:
				print "WARNING: Comparing different build versions: " + build0 + " vs " + build1

def name_comparator(x, y):
	return cmp(x.name.lower(), y.name.lower())

def size_comparator(x, y):
	return cmp(x.size, y.size)

def get_funcs_by_bucket(summary):
	results = {}

	for entry in summary:
		if getsize(entry) != 0:
			bucket_key = get_bucket_key(entry)
			if results.get(bucket_key) == None:
				results[bucket_key] = []
			results[bucket_key].append(entry)

	return results

def get_buckets(funcs):
	results = {}

	for func in funcs:
		bucket = get_bucket_key(funcs[func])
		if bucket != None:
			if results.get(bucket) == None:
				results[bucket] = 0

			entry = funcs[func]
			size = getsize(entry)
			results[bucket] += size

	return results

def get_bucket_total(buckets):
	result = 0
	for bucket in buckets:
		result += buckets[bucket]
	return result

def print_buckets(source, target, delta, summary):
	global g_build_version
	print ""
	print "{0:{width}}".format("[BUCKETS", width=24), "{0:>14}{1:>10}".format(g_build_version[0], "KB"), "{0:>15}{1:>10}".format(g_build_version[1], "KB"), "{0:>11}{1:>10}".format("DELTA", "KB]")
	print("-------------------------------------------------------------------------------------------------")

	delta_sorted = sorted(list(delta))

	for bucket in delta_sorted:
		source_size = 0;
		if source.get(bucket) != None:
			source_size = source[bucket]

		target_size = 0;
		if target.get(bucket) != None:
			target_size = target[bucket]
		print "{0:{width}}".format(bucket, width=24), '    {0:{width}}{1:{width}.2f}   '.format(source_size, round(source_size / 1024.0, 2), width=10), '  {0:{width}}{1:{width}.2f} '.format(target_size, round(target_size / 1024.0, 2), width=10), '{0:{width}}{1:{width}.2f}'.format(delta[bucket], round(delta[bucket] / 1024.0, 2), width=10)

	print("-------------------------------------------------------------------------------------------------")
	print "{0:{width}}".format("TOTAL", width=24), '    {0:{width}}{1:{width}.2f}   '.format(get_bucket_total(source), round(get_bucket_total(source) / 1024.0, 2), width=10), '  {0:{width}}{1:{width}.2f}'.format(get_bucket_total(target), round(get_bucket_total(target) / 1024.0, 2), width=10), ' {0:{width}}{1:{width}.2f}'.format(get_bucket_total(delta), round(get_bucket_total(delta) / 1024.0, 2), width=10)

	bucket_detail = get_funcs_by_bucket(summary)
	bucket_detail_sorted = sorted(list(bucket_detail))
	for bucket in bucket_detail_sorted:
		if bucket == None:
			continue
		sys.stdout.write("\n[" + bucket + "]\n")
		print("-------------------------------------------------------------------------------------------------")
		entries = bucket_detail[bucket]
		for entry in entries:
			print '{0:{width}}   {1}'.format(getsize(entry), entry.func, width=8)

def compare(fileset0, fileset1, getsize, args):
	func_dicts = []
	metadatas = []
	header = []
	for fileset in fileset0, fileset1:
		func_dict = {}
		for filename in fileset:
			metadata = {}
			entry_tree = read_entries( filename, args, metadata, header )
			compute_exclusive_sizes( entry_tree )
			append_func_dict( entry_tree, func_dict )
		metadatas.append( metadata )
		func_dicts.append( func_dict )

	alert_on_mismatch(metadatas)

	# Build Version
	global g_build_version
	g_build_version.append(metadatas[0]["buildversion"])
	g_build_version.append(metadatas[1]["buildversion"])

	# Compare
	summary = compare_func_dicts( func_dicts[0], func_dicts[1], getsize )

	# Buckets
	if args.buckets:
		source_buckets = get_buckets(func_dicts[0])
		target_buckets = get_buckets(func_dicts[1])
		delta_buckets = compare_buckets(func_dicts[0], func_dicts[1], getsize)
		print_buckets(source_buckets, target_buckets, delta_buckets, summary)

	print "\n[FUNCTION SUMMARY]"
	print("---------------------------------------------------------------------------------------")

	total_delta = 0
	for entry in summary:
		if getsize(entry) != 0:
			print "{:10} {:10} {} - {}".format(  getsize(entry), entry.count, entry.func, ', '.join( entry.buckets ) )
			total_delta +=  getsize(entry)
	print 'Total delta: {0}'.format( total_delta )


def report(fileset, getsize, args):
	func_dict = {}
	metadatas = []
	metadata = {}
	header = []
	for filename in fileset:
		entry_tree = read_entries( filename, args, metadata, header)
		compute_exclusive_sizes( entry_tree )
		append_func_dict( entry_tree, func_dict )
	metadatas.append(metadata)

	summary = func_dict.values()[:]
	summary.sort( lambda x,y: getsize(x) - getsize(y) )
	summary = summary[-10:]

	total_delta = 0
	for entry in summary:
		print "{:10} {:10} {} - {}".format(  getsize(entry), entry.count, entry.func, ', '.join( entry.buckets ) )
		total_delta += getsize(entry)
	print 'Total delta: {0}'.format( total_delta )

def callstacks(function, fileset, getsize, args):

	def visit(entry, stack):
		stack.append( entry )
		total = 0

		if function in entry.func:
			total += getsize( entry )
			for frame in reversed( stack ):
				if len(frame.buckets) <= 4:
					buckets = 'bucket(s) {}'.format( ', '.join(frame.buckets) )
				else:
					buckets = '{} buckets'.format( len(frame.buckets) )
				print '{} | {} bytes in {}'.format( frame.func, getsize(frame), buckets )
			print ''

		for child in entry.children:
			total += visit( child, stack )

		stack.pop()
		return total

	total = 0
	metadatas = []
	metadata = {}

	for filename in fileset:
		header = []
		entry_tree = read_entries( filename, args, metadata, header)
		for item in header:
			print item
		print

		compute_exclusive_sizes( entry_tree )
		total += visit( entry_tree, [])
	metadatas.append( metadata )

	print 'Total: {}'.format( total )

def parse_args(argv):
	parser = argparse.ArgumentParser( description = 'Compares the heap memory allocations between two dumps of Memvisualize.' )
	parser.add_argument( '-r', '--report', nargs = '+', help = 'Reports the 10 functions with biggest heap allocation in the given file set.' )
	parser.add_argument( '-c', '--compare', nargs = '+', help = 'Prints heap allocation differences between two dump filesets (separe filesets with a single "/").' )
	parser.add_argument( '-a', '--callstacks', nargs = '+', help = 'Prints the callstacks ending with the given function in the given fileset.' )
	parser.add_argument( '-e', '--exclusive', action = 'store_true', help = 'Memory counters for each function are exclusive, i.e. they don\'t include the memory allocated by children functions. Default behaviour if neither of --exclusive and --inclusive is specified.' )
	parser.add_argument( '-i', '--inclusive', action = 'store_true', help = 'Memory counters for each function are inclusive, i.e. they do include the memory allocated by children functions.' )
	parser.add_argument( '-p', '--noparams', action = 'store_true', help = 'Remove function and template parameters from function signatures. Useful to make heap comparisons between PS3 and Xbox360, for example. Not compatible with --storage-details.' )
	parser.add_argument( '-s', '--storage-details', action = 'store_true', help = 'Include type details for storage allocations like atArray and fwPool. Not compatible with --noparams.' )
	parser.add_argument( '-b', '--buckets', action = 'store_true', help = 'Prints the bucket summary.' )
	parser.add_argument( '-x', '--xboxhack', action = 'store_true', help = 'Xbox callstack hack for atArray and atMapMemory.' )
	args = parser.parse_args( argv )

	num_actions = sum( 1 if action else 0 for action in (args.report, args.compare, args.callstacks) )
	if num_actions != 1:
		print 'Error: you must specify exactly one between --report, --compare and --callstacks. Use -h/--help for usage help.'
		sys.exit(1)

	if args.exclusive and args.inclusive:
		print 'Error: you must specify exactly one between --exclusive and --inclusive. Use -h/--help for usage help.'
		sys.exit(1)

	if args.noparams and args.storage_details:
		print 'Error: --noparams and --storage-details and incompatible with each other. Use -h/--help for usage help.'
		sys.exit(1)

	if args.compare:
		try:
			args.compare.index( '/' )
		except:
			print 'Error: with -c/--compare you should provide two file sets separated by "/". Use -h/--help for usage help.'
			sys.exit(1)

	if not args.exclusive and not args.inclusive:
		args.exclusive = True

	return args

if __name__ == '__main__':
	args = parse_args( sys.argv[1:] )

	if args.exclusive:
		getsize = lambda x: x.excl_size
	else:
		getsize = lambda x: x.incl_size

	if args.report:
		report( args.report, getsize, args )
	elif args.compare:
		slash_idx = args.compare.index( '/' )
		fileset0 = args.compare[:slash_idx]
		fileset1 = args.compare[slash_idx+1:]
		compare( fileset0, fileset1, getsize, args )
	elif args.callstacks:
		callstacks( args.callstacks[0], args.callstacks[1:], getsize, args )