import sys, re if len(sys.argv) != 3: print 'Usage: \nplacestats.py ' exit(2) infilename = sys.argv[1] outfilename = sys.argv[2] fin=open(infilename) fout=open(outfilename,'r+') #process the ragebuilder output to grab only those that dont load in place for aline in fin: if re.match(r'.*can not use in-place loading', aline): fout.write(aline) fin.close fout.seek(0) #lists names = [] schemas = [] #set frequency = {} fileTypeFrequency = {} #now parsing something like this: #Line 308: File memory:$03F63BF0,6004,0:packfile1:\des_cropduster.ctyp, schema CMapTypes (0xd98bb561) can not use in-place loading linenum = 0 for line in fout: linelist = line.split(',') linenum += 1 if len(linelist) < 3: print 'Error here ' , linenum, line, linelist filename = 'error' else: filename = re.sub('0:packfile1:', '', linelist[2]) filename = re.sub(r'^\\', '', filename) filename = re.sub('0:', '', filename) if len(linelist) < 4: print 'Error here ' , linenum, line, linelist schema = 'error' else: schema = re.sub('\(0x.*', '', linelist[3]) schema = re.sub('\r\n|\n', '', schema) schema = re.sub('schema ', '', schema) schema = re.sub(' ', '', schema) names.append(filename); schemas.append(schema); if schema in frequency: numUses = frequency[schema] frequency[schema] = numUses + 1 else: frequency[schema] = 1 fout.close() namesset = set(names) nameslist = list(namesset) nameslist.sort() schemaset = set(schemas) for fname in namesset: fnsplit = fname.split('.') filetype = fnsplit[len(fnsplit)-1] if filetype in fileTypeFrequency: numUses = fileTypeFrequency[filetype] fileTypeFrequency[filetype] = numUses + 1 else: fileTypeFrequency[filetype] = 1 print len(nameslist) , 'files can\'t load in place:' for x in nameslist: print x print '\ndue to' ,len(schemaset), 'schemas:', schemaset print '\nfrequency of mismatched schemas:' for w in sorted(frequency, key=frequency.get, reverse=True): print w, frequency[w] print '\n', len(nameslist) , 'files can\'t load in place freq:\n', for h in sorted(fileTypeFrequency, key=fileTypeFrequency.get, reverse=True): print h, fileTypeFrequency[h]