import sys import argparse import re SUPPORTED_DUMP_VERSION=4 # Globals g_platform = [] g_build_version = [] class Function: def __init__(self, name, size): self.name = name self.size = size class Entry: def __init__(self, func, incl_size, excl_size, count, buckets): self.func = func self.incl_size = incl_size self.excl_size = excl_size self.count = count self.children = [] self.buckets = buckets def add(self, op): return Entry( self.func, self.incl_size + op.incl_size, self.excl_size + op.excl_size, self.count + op.count, sorted( list( set( self.buckets + op.buckets ) ) ) ) def sub(self, op): return Entry( self.func, self.incl_size - op.incl_size, self.excl_size - op.excl_size, self.count - op.count, sorted( list( set( self.buckets + op.buckets ) ) ) ) def __neg__(self): return Entry( self.func, -self.incl_size, -self.excl_size, -self.count, self.buckets ) def get_level(line): level = 0 while level < len(line) and line[level] == ' ': level += 1 return level def strip_entry(prefix, entry): if not prefix in entry.func: return entry value = prefix start = entry.func.rfind('>::') if start >= 0: end = entry.func.find('(', start) value += entry.func[start + 1:end] entry.func = value; return entry def read_entries(filename, args, metadata, header): file = open( filename, 'r' ) stack = [] root = None xbox = False for line in file.readlines(): # Does this line contain meta information? if line[0] == '#': header.append(line.strip()) # This is meta data. Process it, then skip the line. # Meta data is a key/value pair, so let's get them both. m = re.match(r"\s*(?P[^\:]+):\s*(?P.+)", line[1:]) if m.lastindex < 1: print 'Warning - unable to parse metadata ' + line[1:] else: key = m.group('key').strip().lower() value = m.group('value').strip() metadata[key] = value if 'platform' in key and 'Xbox 360' in value: xbox = True if key == 'dumpversion': if (int(value) > SUPPORTED_DUMP_VERSION): print "WARNING: Input used by this report was generated by a new version of memvisualize (using dumpversion " + value + "). Ensure this script is still compatible, then update SUPPORTED_DUMP_VERSION." continue # Get rid of whitespace buckets = [] if (line.strip().startswith('[')): parts = [ part.strip() for part in line.split('|') ] if (len(parts) > 3): buckets = [ bucket.strip() for bucket in parts[3].split(',') ] func = parts[0] + " | " + parts[1] alloc = parts[2] else: parts = [ part.strip() for part in line.split('|') ] func, alloc = parts[:2] if len(parts) > 2: buckets = [ bucket.strip() for bucket in parts[2].split(',') ] if args.noparams: func = re.sub( r'\(.*?\)', '()', func ) func = re.sub( r'<.*?>', '<>', func ) incl_size, count = [ int( x.strip() ) for x in alloc.split()[0], alloc.split()[2] ] entry = Entry( func, incl_size, None, count, buckets ) level = get_level( line ) if level == 0: root = entry else: del stack[ level: ] stack[-1].children.append( entry ) if args.storage_details: caller, match = None, None if func == 'atArray': caller = stack[-1].func match = re.match( r'rage::atArray<(.*)>::Construct', caller ) match = match or re.match( r'rage::atArray<(.*)>::Resize', caller ) match = match or re.match( r'rage::atArray<(.*)>::Reserve', caller ) elif 'rage::fwBasePool::AllocStorage' in func: caller = stack[-3].func match = re.match( r'(.*)::InitPool', stack[-3].func ) match = match or re.match( r'rage::fwAssetStore<(.*)>::FinalizeSize', caller ) storagetype = (match and match.group(1)) or caller if storagetype: entry.func = '{} [{}]'.format( func, storagetype ) stack.append( entry ) # Fucking shit-ass hack if xbox == True and args.xboxhack: candidates = ['atArray', 'rage::atArray', 'atMapMemory', 'rage::atMapMemory', 'atPtrCreator', 'rage::atPtrCreator'] for name in candidates: if func.startswith(name): i = 0 queue = [] while i < 4: item = strip_entry(name, stack.pop()) queue.append(item) i += 1 while len(queue) > 0: item = queue.pop() stack.append(item) return root def compute_exclusive_sizes(entry): entry.excl_size = entry.incl_size for child in entry.children: entry.excl_size -= child.incl_size for child in entry.children: compute_exclusive_sizes(child) def get_func_key(entry): bucket = get_bucket_key(entry) if bucket == None: return entry.func return entry.func + ' | ' + bucket def append_func_dict(entry, funcs): func_key = get_func_key(entry) if func_key in funcs: funcs[ func_key ] = funcs[ func_key ].add( entry ) else: funcs[ func_key ] = Entry( entry.func, entry.incl_size, entry.excl_size, entry.count, entry.buckets ) for child in entry.children: append_func_dict( child, funcs ) ## custom sort def comparator(x, y): if getsize(x) != getsize(y): if getsize(x) < getsize(y): return -1 else: return 1 return cmp(x.func.lower(), y.func.lower()) def compare_func_dicts(funcs0, funcs1, getsize): summary = [] for func in funcs0: if func in funcs1: summary.append( funcs1[func].sub( funcs0[func] ) ) else: summary.append( -funcs0[func] ) for func in funcs1: if func not in funcs0: summary.append( funcs1[func] ) summary.sort( comparator ) return summary def compare_functions(funcs0, funcs1, getsize): results = [] for func in funcs0: buckets = funcs0[func].buckets if len(buckets) > 0: if func in funcs1: entry = funcs1[func].sub( funcs0[func] ) if getsize(entry) != 0: data = Function(entry.func, getsize(entry)) results.append(data) else: entry = -funcs0[func] if getsize(entry) != 0: data = Function(entry.func, getsize(entry)) results.append(data) for func in funcs1: buckets = funcs1[func].buckets if len(buckets) > 0: if func not in funcs0: entry = funcs1[func] if getsize(entry) != 0: data = Function(entry.func, getsize(entry)) results.append(data) return results def get_bucket_key(entry): if len(entry.buckets) == 0: return None buckets = entry.buckets bucket = buckets[0] i = 1 while i < len(buckets): bucket += ' | ' bucket += buckets[i] i += 1 return bucket def compare_buckets(funcs0, funcs1, getsize): results = {} for func in funcs0: bucket = get_bucket_key(funcs0[func]) if bucket != None: if results.get(bucket) == None: results[bucket] = 0 if func in funcs1: entry = funcs1[func].sub( funcs0[func] ) if getsize(entry) != 0: results[bucket] += getsize(entry) else: entry = -funcs0[func] if getsize(entry) != 0: results[bucket] += getsize(entry) for func in funcs1: if func not in funcs0: entry = funcs1[func] if getsize(entry) != 0: bucket = get_bucket_key(funcs1[func]) if bucket != None: if results.get(bucket) == None: results[bucket] = 0 results[bucket] += getsize(entry) return results def alert_on_mismatch(metadatas): # Platform if 'platform' in metadatas[0] and 'platform' in metadatas[1]: global g_platform g_platform.append(metadatas[0]["platform"]) g_platform.append(metadatas[1]["platform"]) plat0 = metadatas[0]["platform"] plat1 = metadatas[1]["platform"] if plat0 != "" and plat1 != "": if plat0 != plat1: print "WARNING: Comparing different platforms: " + plat0 + " vs " + plat1 if 'configuration' in metadatas[0] and 'configuration' in metadatas[1]: config0 = metadatas[0]["configuration"] config1 = metadatas[1]["configuration"] if config0 != "" and config1 != "": if config0 != config1: print "WARNING: Comparing different configurations: " + config0 + " vs " + config1 if 'packagetype' in metadatas[0] and 'packagetype' in metadatas[1]: pack0 = metadatas[0]["packagetype"] pack1 = metadatas[1]["packagetype"] if pack0 != "" and pack1 != "": if pack0 != pack1: print "WARNING: Comparing different package types: " + pack0 + " vs " + pack1 # Build Version if 'buildversion' in metadatas[0] and 'buildversion' in metadatas[1]: global g_build_version g_build_version.append(metadatas[0]["buildversion"]) g_build_version.append(metadatas[1]["buildversion"]) build0 = metadatas[0]["buildversion"] build1 = metadatas[1]["buildversion"] if build0 != "" and build1 != "": if build0 != build1: print "WARNING: Comparing different build versions: " + build0 + " vs " + build1 def name_comparator(x, y): return cmp(x.name.lower(), y.name.lower()) def size_comparator(x, y): return cmp(x.size, y.size) def get_funcs_by_bucket(summary): results = {} for entry in summary: if getsize(entry) != 0: bucket_key = get_bucket_key(entry) if results.get(bucket_key) == None: results[bucket_key] = [] results[bucket_key].append(entry) return results def get_buckets(funcs): results = {} for func in funcs: bucket = get_bucket_key(funcs[func]) if bucket != None: if results.get(bucket) == None: results[bucket] = 0 entry = funcs[func] size = getsize(entry) results[bucket] += size return results def get_bucket_total(buckets): result = 0 for bucket in buckets: result += buckets[bucket] return result def print_buckets(source, target, delta, summary): global g_build_version print "" print "{0:{width}}".format("[BUCKETS", width=24), "{0:>14}{1:>10}".format(g_build_version[0], "KB"), "{0:>15}{1:>10}".format(g_build_version[1], "KB"), "{0:>11}{1:>10}".format("DELTA", "KB]") print("-------------------------------------------------------------------------------------------------") delta_sorted = sorted(list(delta)) for bucket in delta_sorted: source_size = 0; if source.get(bucket) != None: source_size = source[bucket] target_size = 0; if target.get(bucket) != None: target_size = target[bucket] print "{0:{width}}".format(bucket, width=24), ' {0:{width}}{1:{width}.2f} '.format(source_size, round(source_size / 1024.0, 2), width=10), ' {0:{width}}{1:{width}.2f} '.format(target_size, round(target_size / 1024.0, 2), width=10), '{0:{width}}{1:{width}.2f}'.format(delta[bucket], round(delta[bucket] / 1024.0, 2), width=10) print("-------------------------------------------------------------------------------------------------") print "{0:{width}}".format("TOTAL", width=24), ' {0:{width}}{1:{width}.2f} '.format(get_bucket_total(source), round(get_bucket_total(source) / 1024.0, 2), width=10), ' {0:{width}}{1:{width}.2f}'.format(get_bucket_total(target), round(get_bucket_total(target) / 1024.0, 2), width=10), ' {0:{width}}{1:{width}.2f}'.format(get_bucket_total(delta), round(get_bucket_total(delta) / 1024.0, 2), width=10) bucket_detail = get_funcs_by_bucket(summary) bucket_detail_sorted = sorted(list(bucket_detail)) for bucket in bucket_detail_sorted: if bucket == None: continue sys.stdout.write("\n[" + bucket + "]\n") print("-------------------------------------------------------------------------------------------------") entries = bucket_detail[bucket] for entry in entries: print '{0:{width}} {1}'.format(getsize(entry), entry.func, width=8) def compare(fileset0, fileset1, getsize, args): func_dicts = [] metadatas = [] header = [] for fileset in fileset0, fileset1: func_dict = {} for filename in fileset: metadata = {} entry_tree = read_entries( filename, args, metadata, header ) compute_exclusive_sizes( entry_tree ) append_func_dict( entry_tree, func_dict ) metadatas.append( metadata ) func_dicts.append( func_dict ) alert_on_mismatch(metadatas) # Build Version global g_build_version g_build_version.append(metadatas[0]["buildversion"]) g_build_version.append(metadatas[1]["buildversion"]) # Compare summary = compare_func_dicts( func_dicts[0], func_dicts[1], getsize ) # Buckets if args.buckets: source_buckets = get_buckets(func_dicts[0]) target_buckets = get_buckets(func_dicts[1]) delta_buckets = compare_buckets(func_dicts[0], func_dicts[1], getsize) print_buckets(source_buckets, target_buckets, delta_buckets, summary) print "\n[FUNCTION SUMMARY]" print("---------------------------------------------------------------------------------------") total_delta = 0 for entry in summary: if getsize(entry) != 0: print "{:10} {:10} {} - {}".format( getsize(entry), entry.count, entry.func, ', '.join( entry.buckets ) ) total_delta += getsize(entry) print 'Total delta: {0}'.format( total_delta ) def report(fileset, getsize, args): func_dict = {} metadatas = [] metadata = {} header = [] for filename in fileset: entry_tree = read_entries( filename, args, metadata, header) compute_exclusive_sizes( entry_tree ) append_func_dict( entry_tree, func_dict ) metadatas.append(metadata) summary = func_dict.values()[:] summary.sort( lambda x,y: getsize(x) - getsize(y) ) summary = summary[-10:] total_delta = 0 for entry in summary: print "{:10} {:10} {} - {}".format( getsize(entry), entry.count, entry.func, ', '.join( entry.buckets ) ) total_delta += getsize(entry) print 'Total delta: {0}'.format( total_delta ) def callstacks(function, fileset, getsize, args): def visit(entry, stack): stack.append( entry ) total = 0 if function in entry.func: total += getsize( entry ) for frame in reversed( stack ): if len(frame.buckets) <= 4: buckets = 'bucket(s) {}'.format( ', '.join(frame.buckets) ) else: buckets = '{} buckets'.format( len(frame.buckets) ) print '{} | {} bytes in {}'.format( frame.func, getsize(frame), buckets ) print '' for child in entry.children: total += visit( child, stack ) stack.pop() return total total = 0 metadatas = [] metadata = {} for filename in fileset: header = [] entry_tree = read_entries( filename, args, metadata, header) for item in header: print item print compute_exclusive_sizes( entry_tree ) total += visit( entry_tree, []) metadatas.append( metadata ) print 'Total: {}'.format( total ) def parse_args(argv): parser = argparse.ArgumentParser( description = 'Compares the heap memory allocations between two dumps of Memvisualize.' ) parser.add_argument( '-r', '--report', nargs = '+', help = 'Reports the 10 functions with biggest heap allocation in the given file set.' ) parser.add_argument( '-c', '--compare', nargs = '+', help = 'Prints heap allocation differences between two dump filesets (separe filesets with a single "/").' ) parser.add_argument( '-a', '--callstacks', nargs = '+', help = 'Prints the callstacks ending with the given function in the given fileset.' ) parser.add_argument( '-e', '--exclusive', action = 'store_true', help = 'Memory counters for each function are exclusive, i.e. they don\'t include the memory allocated by children functions. Default behaviour if neither of --exclusive and --inclusive is specified.' ) parser.add_argument( '-i', '--inclusive', action = 'store_true', help = 'Memory counters for each function are inclusive, i.e. they do include the memory allocated by children functions.' ) parser.add_argument( '-p', '--noparams', action = 'store_true', help = 'Remove function and template parameters from function signatures. Useful to make heap comparisons between PS3 and Xbox360, for example. Not compatible with --storage-details.' ) parser.add_argument( '-s', '--storage-details', action = 'store_true', help = 'Include type details for storage allocations like atArray and fwPool. Not compatible with --noparams.' ) parser.add_argument( '-b', '--buckets', action = 'store_true', help = 'Prints the bucket summary.' ) parser.add_argument( '-x', '--xboxhack', action = 'store_true', help = 'Xbox callstack hack for atArray and atMapMemory.' ) args = parser.parse_args( argv ) num_actions = sum( 1 if action else 0 for action in (args.report, args.compare, args.callstacks) ) if num_actions != 1: print 'Error: you must specify exactly one between --report, --compare and --callstacks. Use -h/--help for usage help.' sys.exit(1) if args.exclusive and args.inclusive: print 'Error: you must specify exactly one between --exclusive and --inclusive. Use -h/--help for usage help.' sys.exit(1) if args.noparams and args.storage_details: print 'Error: --noparams and --storage-details and incompatible with each other. Use -h/--help for usage help.' sys.exit(1) if args.compare: try: args.compare.index( '/' ) except: print 'Error: with -c/--compare you should provide two file sets separated by "/". Use -h/--help for usage help.' sys.exit(1) if not args.exclusive and not args.inclusive: args.exclusive = True return args if __name__ == '__main__': args = parse_args( sys.argv[1:] ) if args.exclusive: getsize = lambda x: x.excl_size else: getsize = lambda x: x.incl_size if args.report: report( args.report, getsize, args ) elif args.compare: slash_idx = args.compare.index( '/' ) fileset0 = args.compare[:slash_idx] fileset1 = args.compare[slash_idx+1:] compare( fileset0, fileset1, getsize, args ) elif args.callstacks: callstacks( args.callstacks[0], args.callstacks[1:], getsize, args )