389 lines
12 KiB
Python
Executable File
389 lines
12 KiB
Python
Executable File
############################################################################
|
|
#
|
|
# binStar: IDAPython library of commands to aid binary analysis
|
|
#
|
|
#
|
|
############################################################################
|
|
|
|
import idaapi, idautils, idc
|
|
import re, json, os, cPickle
|
|
from pdbparse.symlookup import Lookup
|
|
|
|
|
|
__cfg_dict__ = None
|
|
__pdb_file__ = None
|
|
# __work_dir__ = None
|
|
# __report_file__ = None
|
|
# __log__ = None
|
|
goodLooks = None
|
|
timing = False
|
|
|
|
#---------------------------------Strings----------------------------------------------------------------------------------------------------------------------------
|
|
|
|
def regexStringSearch(list,pattern):
|
|
"""
|
|
Takes Perl-Regex and string list, searches for a match, returns results
|
|
|
|
@param list: List of strings to search through
|
|
@type list: list<str>
|
|
@param pattern: Perl-style regex to search for
|
|
@type pattern: string
|
|
@rtype: list<long>
|
|
@return: List of addresses(longs) of strings matching regex
|
|
"""
|
|
global goodLooks
|
|
addrs = []
|
|
regex = re.compile(pattern) #Compiles regex with supplied pattern
|
|
idx = 0
|
|
for string in list:
|
|
s = str(string)
|
|
result = regex.search(s) #Checks if string matches regex
|
|
if result != None:
|
|
if type(string) != str:
|
|
addrs.append((s,string.ea)) #Adds string and address to list
|
|
else:
|
|
addrs.append((s,goodLooks.locs.values()[0][idx])) #Hack. Need to just return the index
|
|
idx+=1
|
|
print "Matches identified: %d" % len(addrs)
|
|
return addrs
|
|
|
|
def strings():
|
|
"""
|
|
Function to return list of all strings in target binary
|
|
|
|
@rtype: list<str>
|
|
@return: List of strings within target binary
|
|
"""
|
|
s = idautils.Strings(False) #Inits Strings util
|
|
s.setup(strtypes=Strings.STR_UNICODE | Strings.STR_C) #Grabs C and Unicode strings
|
|
strs = []
|
|
for i, v in enumerate(s): #Enumerates strings
|
|
if v is None: #Checks if string is empty
|
|
print("Failed to retrieve string index %d" % i)
|
|
else:
|
|
strs.append(v) #Append to strings list
|
|
return strs
|
|
|
|
def stringRE(regex):
|
|
"""
|
|
Searches binary for strings matching regex
|
|
|
|
@param regex: Perl-style regex
|
|
@type regex: str
|
|
@rtype: list<(str,long)>
|
|
@return: list of string,address tuples
|
|
"""
|
|
list = strings()
|
|
addrs = regexStringSearch(list,regex)
|
|
return addrs
|
|
|
|
def pdbSymbol(regex):
|
|
"""
|
|
Searches PDB file for symbols matching regex
|
|
|
|
@param regex: Perl-style regex
|
|
@type regex: str
|
|
@rtype: list<(str,long)>
|
|
@return: list of string,address tuples
|
|
"""
|
|
symList = pdbSymbolList()
|
|
if symList == None:
|
|
return None
|
|
addrs = regexStringSearch(symList,regex)
|
|
return addrs
|
|
|
|
def runStringRegex(reg_list):
|
|
"""
|
|
"""
|
|
report("############## String Regex ##############\n\n")
|
|
for re in reg_list:
|
|
addrs = stringRE(re)
|
|
if len(addrs) == 0:
|
|
continue
|
|
report("Matches found in ASCII strings for: " + re + " at the following addresses:")
|
|
for addr in addrs:
|
|
x =1
|
|
report('\t'+addr[0]+" at Address: "+hex(addr[1]))
|
|
report("\n")
|
|
report('\n\n')
|
|
|
|
def runSymbolRegex(reg_list):
|
|
"""
|
|
"""
|
|
report("############## Symbol Regex ##############\n\n")
|
|
print "Does this even work!?"
|
|
for re in sig_matches:
|
|
addrs = pdbSymbol(re)
|
|
if addrs == None:
|
|
__log__.write("Symbol retrieval failed.")
|
|
report("No symbols returned. PDB was probably not loaded.")
|
|
continue
|
|
if len(addrs) == 0:
|
|
continue
|
|
report("Matches found in symbols for: " + re + " at the following addresses:")
|
|
for addr in addrs:
|
|
report('\t'+addr[0]+" at Address: "+hex(addr[1]))
|
|
report('\n')
|
|
report('\n\n')
|
|
|
|
|
|
|
|
#----------------------------Byte Pattern---------------------------------------------------------------------------------------------------------------------------------
|
|
|
|
def parseBytes(byteString,end=False):
|
|
"""
|
|
Function to make dealing with bytes (logically) input agnostic
|
|
e.g. I deal with the following input formats for byte strings:
|
|
"0xA1,0x23,0xF7"
|
|
"A2,45,F7"
|
|
"0xA3 0x45 0xF7"
|
|
"A4 45 F7"
|
|
"A54557"
|
|
"0xA60x450x57"
|
|
|
|
@param byteString: String of hex bytes
|
|
@type byteString: string
|
|
@param end: Optional flag to determine endianess of input string. Default: Big Endian
|
|
@type end: bool (Default=False)
|
|
@rtype string
|
|
@return: Hex string formatted correctly for "FindBinary()"
|
|
"""
|
|
buf = []
|
|
byteString = byteString.replace("0x","") #Remove all prepended 0x
|
|
for buff in byteString.split(): #Splits on any whitespace
|
|
for b in buff.split(','): #Splits on ,
|
|
if len(b)>2: #Triggers for no delimiter (e.g. "A54557")
|
|
buf = [chr(int(b[i:i+2],16)) for i in range(0,len(b),2)] #Splits every 2 char in string, converts hex string to character
|
|
break
|
|
temp = ''.join(chr(int(b,16))) #Converts hex string to char list, then to string
|
|
buf.append(temp) #Appends to list of char strings
|
|
if end:
|
|
buf = buf[::-1] #Reverse list for endianess
|
|
buff = ''.join(buf) #Convert char list to string
|
|
tlen = len(buff)
|
|
bin_str = ' '.join(["%02X" % ord(x) for x in buf]) #Convert to space separated hex string
|
|
return bin_str, tlen
|
|
|
|
def byteSig(pattern,end=False):
|
|
"""
|
|
Takes string of hex values, and searches binary for pattern
|
|
|
|
@param pattern: String of hex bytes
|
|
@type pattern: string
|
|
@param end: Optional flag to determine endianess of input string. Default: Big Endian
|
|
@type end: bool (Default=False)
|
|
@rtype: list<long>
|
|
@return: List of addresses matching signature
|
|
"""
|
|
bin_str, tlen = parseBytes(pattern,end) #Gets formatted hex string and pattern length
|
|
ea = MinEA() #Minimum effective address
|
|
ret = []
|
|
|
|
while True:
|
|
ea = FindBinary(ea, SEARCH_DOWN,bin_str) #Checks for binary string
|
|
#Note: Due to lack of documentation, I don't know whether this simply checks
|
|
#for the string at ea+0, or checks at each addr ea+(n)
|
|
#So far, it's found all the strings I've tested
|
|
if ea == idaapi.BADADDR: #IDAPython's version of NULL
|
|
break
|
|
ret.append(ea) #Append to list of addresses
|
|
Message(".")
|
|
ea += tlen #Move target address +length of string
|
|
if not ret: #If ret list is empty...
|
|
return (False, "Could not match[%s]" % bin_str)
|
|
Message("\n")
|
|
return ret
|
|
|
|
def runByteSigs(byte_sigs):
|
|
report("############## Byte Signatures ##############\n\n")
|
|
for sig in byte_sigs:
|
|
addrs = byteSig(sig)
|
|
if len(addrs) == 0:
|
|
continue
|
|
report("Byte pattern: " + sig + " found at the following addresses:")
|
|
for addr in addrs:
|
|
report('\t'+str(addr))
|
|
report("\n")
|
|
report("\n\n")
|
|
|
|
#----------------------------------PDB------------------------------------------------------------------------------------------------------------------------
|
|
|
|
def loadPdbFile(base=0x140000000):
|
|
"""
|
|
Loads pdb file into a lookup class object
|
|
|
|
@param base: Specifies the base address of the binary. Default value matches that of IDAPython
|
|
@type base: long
|
|
@return: void
|
|
"""
|
|
global goodLooks
|
|
global __pdb_file__
|
|
if __pdb_file__ == None:
|
|
fp = GetInputFilePath()
|
|
path = os.path.dirname(fp) #Grab input directory
|
|
file = os.path.basename(fp) #Input file name
|
|
file = file[:file.rfind('.')+1]+"pdb" #Replace extension with .pdb
|
|
__pdb_file__ = path+'\\'+file
|
|
pdb = [(__pdb_file__,base)] #Lookup takes a (pdb,baseAddress) tuple
|
|
print "Loading PDB file. This may take some time..."
|
|
goodLooks = Lookup(pdb) #Instantiate global Lookup object
|
|
print "PDB file %s loaded" % pdb
|
|
|
|
def pdbSymbolList():
|
|
"""
|
|
Extracts symbol list from Lookup class object.
|
|
|
|
@rtype: list<str>
|
|
@return: String list of the PDB symbol names
|
|
"""
|
|
if goodLooks == None: #Has the pdb file been loaded yet?
|
|
return None
|
|
symDict = goodLooks.names #Grab dictionary of Symbol names
|
|
symbolList = []
|
|
for s in symDict.values():
|
|
symbolList.extend(s) #Add each name to symbol list
|
|
return symbolList
|
|
|
|
def loadPdbPickle(pickle_file):
|
|
global goodLooks
|
|
pkl_f = open(pickle_file,'rb')
|
|
start = time.clock()
|
|
goodLooks = cPickle.load(pkl_f)
|
|
end = time.clock()
|
|
if timing:
|
|
print "PDB load took %f" % (end-start)
|
|
|
|
#---------------------------------Spot Check------------------------------------------------------------------------------------------------------------------------
|
|
|
|
def spotCheck(address,value):
|
|
"""
|
|
Takes
|
|
"""
|
|
addr = int(address,16)
|
|
bin_str, nStrLen = parseBytes(value) #Parse input
|
|
ea = FindBinary(addr,SEARCH_DOWN,bin_str) #Grab data at address
|
|
return ea == address #return if value matches
|
|
|
|
def runSpotChecks(spots):
|
|
report("############## Spot Checks ##############\n\n")
|
|
for spot in spots:
|
|
match = spotCheck(spot["address"],spot["value"])
|
|
report("Address '" + spot["address"] + "' contains value: '" + spot["value"] + "'? "+str(match))
|
|
report("\n")
|
|
report("\n\n")
|
|
|
|
#---------------------------------Class Utils------------------------------------------------------------------------------------------------------------------------
|
|
|
|
def cfgParse(cfg_file):
|
|
"""
|
|
Takes file path, and loads JSON data into config dictionary
|
|
Currently, the following entries are populated:
|
|
'binary': File path to target binary
|
|
'opts': Command-line options to run binary with
|
|
'pdb': File path to target's associated PDB file
|
|
'strings': File path to file containing newline delim'd regexes for regString()
|
|
'bytes': File path to file containing newline delim'd hex string byte patterns for byteSig()
|
|
'output': Directory to store working files and results report file
|
|
|
|
@param cfg_file: File path to json formatted config file
|
|
@type pattern: string
|
|
@rtype: dict
|
|
@return: Dictionary populated with data in config file
|
|
"""
|
|
fp = open(cfg_file)
|
|
cfg_dict = json.loads(fp.read().strip())
|
|
fp.close()
|
|
return cfg_dict
|
|
|
|
def report(data):
|
|
"""
|
|
"""
|
|
global __report_file__
|
|
fh = open(__report_file__,"a")
|
|
fh.write(data+'\n')
|
|
fh.close()
|
|
|
|
#---------------------------------Main------------------------------------------------------------------------------------------------------------------------
|
|
|
|
|
|
def runTests(config_file,idx):
|
|
global __log__
|
|
global __work_dir__
|
|
global __report_file__
|
|
import time
|
|
|
|
start = time.clock()
|
|
|
|
cfg_dict = cfgParse(cfg_file)
|
|
|
|
__work_dir__ = os.path.expandvars(cfg_dict["cfg"]["work_dir"])
|
|
log_file = os.path.expandvars(cfg_dict["cfg"]["log_file"])
|
|
if log_file == "":
|
|
__log__ = open(__work_dir__+"idaStar.log",'a')
|
|
else:
|
|
__log__ = open(log_file,'a')
|
|
|
|
#__log__.write(cfg_dict[0])
|
|
|
|
__log__.write("\n\n------------")
|
|
__log__.write(time.strftime("%d/%m/%y %H:%M"))
|
|
__log__.write("------------\n\n")
|
|
|
|
idx = 0
|
|
cfg = cfg_dict["target_binaries"]
|
|
target_cfg = cfg[idx]
|
|
|
|
__report_file__ = __work_dir__ + target_cfg["target"] + ".idaStar.report.txt"
|
|
fh = open(__report_file__,"w")
|
|
fh.close()
|
|
|
|
#load pickle
|
|
pickle_file = __work_dir__ + target_cfg["target"] + ".pkl"
|
|
if os.path.isfile(pickle_file):
|
|
loadPdbPickle(pickle_file)
|
|
__log__.write("Loaded pickle\n")
|
|
else:
|
|
__log__.write("No pickle file. Ignoring symbol tests")
|
|
|
|
#byte patterns
|
|
byte_sigs = target_cfg["byte_patterns"]
|
|
if len(byte_sigs) > 0 :
|
|
runByteSigs(byte_sigs)
|
|
__log__.write("Checked byte patterns\n")
|
|
#spot checks
|
|
spots = target_cfg["spot_checks"]
|
|
if len(spots) > 0 :
|
|
runSpotChecks(spots)
|
|
__log__.write("Ran spot checks\n")
|
|
|
|
#string regex
|
|
str_reg = target_cfg["string_regex_list"]
|
|
if len(str_reg) > 0 :
|
|
runStringRegex(str_reg)
|
|
__log__.write("Ran string regex\n")
|
|
|
|
#symbol regex
|
|
str_reg = target_cfg["symbol_regex_list"]
|
|
if len(str_reg) > 0:
|
|
runSymbolRegex(str_reg)
|
|
__log__.write("Ran symbol regex\n")
|
|
|
|
end = time.clock()
|
|
__log__.write("Analysis took %f\n" % (end-start))
|
|
__log__.write("\n\n\n\n")
|
|
__log__.close()
|
|
|
|
if __name__ == "__main__":
|
|
if len(idc.ARGV) > 0:
|
|
if len(idc.ARGV == 3):
|
|
cfg_file = idc.ARGV[1]
|
|
idx = int(idc.ARGV[2])
|
|
runTests(cfg_file,idx)
|
|
else:
|
|
log = open('idaStar.log','a')
|
|
log.write("\n\n####\n\nIncorrect arguments\n\n####\n\n")
|
|
idc.Exit(0)
|
|
else:
|
|
print "idaStar module has been loaded"
|
|
|