Files
2025-09-29 00:52:08 +02:00

389 lines
12 KiB
Python
Executable File

############################################################################
#
# binStar: IDAPython library of commands to aid binary analysis
#
#
############################################################################
import idaapi, idautils, idc
import re, json, os, cPickle
from pdbparse.symlookup import Lookup
__cfg_dict__ = None
__pdb_file__ = None
# __work_dir__ = None
# __report_file__ = None
# __log__ = None
goodLooks = None
timing = False
#---------------------------------Strings----------------------------------------------------------------------------------------------------------------------------
def regexStringSearch(list,pattern):
"""
Takes Perl-Regex and string list, searches for a match, returns results
@param list: List of strings to search through
@type list: list<str>
@param pattern: Perl-style regex to search for
@type pattern: string
@rtype: list<long>
@return: List of addresses(longs) of strings matching regex
"""
global goodLooks
addrs = []
regex = re.compile(pattern) #Compiles regex with supplied pattern
idx = 0
for string in list:
s = str(string)
result = regex.search(s) #Checks if string matches regex
if result != None:
if type(string) != str:
addrs.append((s,string.ea)) #Adds string and address to list
else:
addrs.append((s,goodLooks.locs.values()[0][idx])) #Hack. Need to just return the index
idx+=1
print "Matches identified: %d" % len(addrs)
return addrs
def strings():
"""
Function to return list of all strings in target binary
@rtype: list<str>
@return: List of strings within target binary
"""
s = idautils.Strings(False) #Inits Strings util
s.setup(strtypes=Strings.STR_UNICODE | Strings.STR_C) #Grabs C and Unicode strings
strs = []
for i, v in enumerate(s): #Enumerates strings
if v is None: #Checks if string is empty
print("Failed to retrieve string index %d" % i)
else:
strs.append(v) #Append to strings list
return strs
def stringRE(regex):
"""
Searches binary for strings matching regex
@param regex: Perl-style regex
@type regex: str
@rtype: list<(str,long)>
@return: list of string,address tuples
"""
list = strings()
addrs = regexStringSearch(list,regex)
return addrs
def pdbSymbol(regex):
"""
Searches PDB file for symbols matching regex
@param regex: Perl-style regex
@type regex: str
@rtype: list<(str,long)>
@return: list of string,address tuples
"""
symList = pdbSymbolList()
if symList == None:
return None
addrs = regexStringSearch(symList,regex)
return addrs
def runStringRegex(reg_list):
"""
"""
report("############## String Regex ##############\n\n")
for re in reg_list:
addrs = stringRE(re)
if len(addrs) == 0:
continue
report("Matches found in ASCII strings for: " + re + " at the following addresses:")
for addr in addrs:
x =1
report('\t'+addr[0]+" at Address: "+hex(addr[1]))
report("\n")
report('\n\n')
def runSymbolRegex(reg_list):
"""
"""
report("############## Symbol Regex ##############\n\n")
print "Does this even work!?"
for re in sig_matches:
addrs = pdbSymbol(re)
if addrs == None:
__log__.write("Symbol retrieval failed.")
report("No symbols returned. PDB was probably not loaded.")
continue
if len(addrs) == 0:
continue
report("Matches found in symbols for: " + re + " at the following addresses:")
for addr in addrs:
report('\t'+addr[0]+" at Address: "+hex(addr[1]))
report('\n')
report('\n\n')
#----------------------------Byte Pattern---------------------------------------------------------------------------------------------------------------------------------
def parseBytes(byteString,end=False):
"""
Function to make dealing with bytes (logically) input agnostic
e.g. I deal with the following input formats for byte strings:
"0xA1,0x23,0xF7"
"A2,45,F7"
"0xA3 0x45 0xF7"
"A4 45 F7"
"A54557"
"0xA60x450x57"
@param byteString: String of hex bytes
@type byteString: string
@param end: Optional flag to determine endianess of input string. Default: Big Endian
@type end: bool (Default=False)
@rtype string
@return: Hex string formatted correctly for "FindBinary()"
"""
buf = []
byteString = byteString.replace("0x","") #Remove all prepended 0x
for buff in byteString.split(): #Splits on any whitespace
for b in buff.split(','): #Splits on ,
if len(b)>2: #Triggers for no delimiter (e.g. "A54557")
buf = [chr(int(b[i:i+2],16)) for i in range(0,len(b),2)] #Splits every 2 char in string, converts hex string to character
break
temp = ''.join(chr(int(b,16))) #Converts hex string to char list, then to string
buf.append(temp) #Appends to list of char strings
if end:
buf = buf[::-1] #Reverse list for endianess
buff = ''.join(buf) #Convert char list to string
tlen = len(buff)
bin_str = ' '.join(["%02X" % ord(x) for x in buf]) #Convert to space separated hex string
return bin_str, tlen
def byteSig(pattern,end=False):
"""
Takes string of hex values, and searches binary for pattern
@param pattern: String of hex bytes
@type pattern: string
@param end: Optional flag to determine endianess of input string. Default: Big Endian
@type end: bool (Default=False)
@rtype: list<long>
@return: List of addresses matching signature
"""
bin_str, tlen = parseBytes(pattern,end) #Gets formatted hex string and pattern length
ea = MinEA() #Minimum effective address
ret = []
while True:
ea = FindBinary(ea, SEARCH_DOWN,bin_str) #Checks for binary string
#Note: Due to lack of documentation, I don't know whether this simply checks
#for the string at ea+0, or checks at each addr ea+(n)
#So far, it's found all the strings I've tested
if ea == idaapi.BADADDR: #IDAPython's version of NULL
break
ret.append(ea) #Append to list of addresses
Message(".")
ea += tlen #Move target address +length of string
if not ret: #If ret list is empty...
return (False, "Could not match[%s]" % bin_str)
Message("\n")
return ret
def runByteSigs(byte_sigs):
report("############## Byte Signatures ##############\n\n")
for sig in byte_sigs:
addrs = byteSig(sig)
if len(addrs) == 0:
continue
report("Byte pattern: " + sig + " found at the following addresses:")
for addr in addrs:
report('\t'+str(addr))
report("\n")
report("\n\n")
#----------------------------------PDB------------------------------------------------------------------------------------------------------------------------
def loadPdbFile(base=0x140000000):
"""
Loads pdb file into a lookup class object
@param base: Specifies the base address of the binary. Default value matches that of IDAPython
@type base: long
@return: void
"""
global goodLooks
global __pdb_file__
if __pdb_file__ == None:
fp = GetInputFilePath()
path = os.path.dirname(fp) #Grab input directory
file = os.path.basename(fp) #Input file name
file = file[:file.rfind('.')+1]+"pdb" #Replace extension with .pdb
__pdb_file__ = path+'\\'+file
pdb = [(__pdb_file__,base)] #Lookup takes a (pdb,baseAddress) tuple
print "Loading PDB file. This may take some time..."
goodLooks = Lookup(pdb) #Instantiate global Lookup object
print "PDB file %s loaded" % pdb
def pdbSymbolList():
"""
Extracts symbol list from Lookup class object.
@rtype: list<str>
@return: String list of the PDB symbol names
"""
if goodLooks == None: #Has the pdb file been loaded yet?
return None
symDict = goodLooks.names #Grab dictionary of Symbol names
symbolList = []
for s in symDict.values():
symbolList.extend(s) #Add each name to symbol list
return symbolList
def loadPdbPickle(pickle_file):
global goodLooks
pkl_f = open(pickle_file,'rb')
start = time.clock()
goodLooks = cPickle.load(pkl_f)
end = time.clock()
if timing:
print "PDB load took %f" % (end-start)
#---------------------------------Spot Check------------------------------------------------------------------------------------------------------------------------
def spotCheck(address,value):
"""
Takes
"""
addr = int(address,16)
bin_str, nStrLen = parseBytes(value) #Parse input
ea = FindBinary(addr,SEARCH_DOWN,bin_str) #Grab data at address
return ea == address #return if value matches
def runSpotChecks(spots):
report("############## Spot Checks ##############\n\n")
for spot in spots:
match = spotCheck(spot["address"],spot["value"])
report("Address '" + spot["address"] + "' contains value: '" + spot["value"] + "'? "+str(match))
report("\n")
report("\n\n")
#---------------------------------Class Utils------------------------------------------------------------------------------------------------------------------------
def cfgParse(cfg_file):
"""
Takes file path, and loads JSON data into config dictionary
Currently, the following entries are populated:
'binary': File path to target binary
'opts': Command-line options to run binary with
'pdb': File path to target's associated PDB file
'strings': File path to file containing newline delim'd regexes for regString()
'bytes': File path to file containing newline delim'd hex string byte patterns for byteSig()
'output': Directory to store working files and results report file
@param cfg_file: File path to json formatted config file
@type pattern: string
@rtype: dict
@return: Dictionary populated with data in config file
"""
fp = open(cfg_file)
cfg_dict = json.loads(fp.read().strip())
fp.close()
return cfg_dict
def report(data):
"""
"""
global __report_file__
fh = open(__report_file__,"a")
fh.write(data+'\n')
fh.close()
#---------------------------------Main------------------------------------------------------------------------------------------------------------------------
def runTests(config_file,idx):
global __log__
global __work_dir__
global __report_file__
import time
start = time.clock()
cfg_dict = cfgParse(cfg_file)
__work_dir__ = os.path.expandvars(cfg_dict["cfg"]["work_dir"])
log_file = os.path.expandvars(cfg_dict["cfg"]["log_file"])
if log_file == "":
__log__ = open(__work_dir__+"idaStar.log",'a')
else:
__log__ = open(log_file,'a')
#__log__.write(cfg_dict[0])
__log__.write("\n\n------------")
__log__.write(time.strftime("%d/%m/%y %H:%M"))
__log__.write("------------\n\n")
idx = 0
cfg = cfg_dict["target_binaries"]
target_cfg = cfg[idx]
__report_file__ = __work_dir__ + target_cfg["target"] + ".idaStar.report.txt"
fh = open(__report_file__,"w")
fh.close()
#load pickle
pickle_file = __work_dir__ + target_cfg["target"] + ".pkl"
if os.path.isfile(pickle_file):
loadPdbPickle(pickle_file)
__log__.write("Loaded pickle\n")
else:
__log__.write("No pickle file. Ignoring symbol tests")
#byte patterns
byte_sigs = target_cfg["byte_patterns"]
if len(byte_sigs) > 0 :
runByteSigs(byte_sigs)
__log__.write("Checked byte patterns\n")
#spot checks
spots = target_cfg["spot_checks"]
if len(spots) > 0 :
runSpotChecks(spots)
__log__.write("Ran spot checks\n")
#string regex
str_reg = target_cfg["string_regex_list"]
if len(str_reg) > 0 :
runStringRegex(str_reg)
__log__.write("Ran string regex\n")
#symbol regex
str_reg = target_cfg["symbol_regex_list"]
if len(str_reg) > 0:
runSymbolRegex(str_reg)
__log__.write("Ran symbol regex\n")
end = time.clock()
__log__.write("Analysis took %f\n" % (end-start))
__log__.write("\n\n\n\n")
__log__.close()
if __name__ == "__main__":
if len(idc.ARGV) > 0:
if len(idc.ARGV == 3):
cfg_file = idc.ARGV[1]
idx = int(idc.ARGV[2])
runTests(cfg_file,idx)
else:
log = open('idaStar.log','a')
log.write("\n\n####\n\nIncorrect arguments\n\n####\n\n")
idc.Exit(0)
else:
print "idaStar module has been loaded"