############################################################################ # # binStar: IDAPython library of commands to aid binary analysis # # ############################################################################ import idaapi, idautils, idc import re, json, os, cPickle from pdbparse.symlookup import Lookup __cfg_dict__ = None __pdb_file__ = None # __work_dir__ = None # __report_file__ = None # __log__ = None goodLooks = None timing = False #---------------------------------Strings---------------------------------------------------------------------------------------------------------------------------- def regexStringSearch(list,pattern): """ Takes Perl-Regex and string list, searches for a match, returns results @param list: List of strings to search through @type list: list @param pattern: Perl-style regex to search for @type pattern: string @rtype: list @return: List of addresses(longs) of strings matching regex """ global goodLooks addrs = [] regex = re.compile(pattern) #Compiles regex with supplied pattern idx = 0 for string in list: s = str(string) result = regex.search(s) #Checks if string matches regex if result != None: if type(string) != str: addrs.append((s,string.ea)) #Adds string and address to list else: addrs.append((s,goodLooks.locs.values()[0][idx])) #Hack. Need to just return the index idx+=1 print "Matches identified: %d" % len(addrs) return addrs def strings(): """ Function to return list of all strings in target binary @rtype: list @return: List of strings within target binary """ s = idautils.Strings(False) #Inits Strings util s.setup(strtypes=Strings.STR_UNICODE | Strings.STR_C) #Grabs C and Unicode strings strs = [] for i, v in enumerate(s): #Enumerates strings if v is None: #Checks if string is empty print("Failed to retrieve string index %d" % i) else: strs.append(v) #Append to strings list return strs def stringRE(regex): """ Searches binary for strings matching regex @param regex: Perl-style regex @type regex: str @rtype: list<(str,long)> @return: list of string,address tuples """ list = strings() addrs = regexStringSearch(list,regex) return addrs def pdbSymbol(regex): """ Searches PDB file for symbols matching regex @param regex: Perl-style regex @type regex: str @rtype: list<(str,long)> @return: list of string,address tuples """ symList = pdbSymbolList() if symList == None: return None addrs = regexStringSearch(symList,regex) return addrs def runStringRegex(reg_list): """ """ report("############## String Regex ##############\n\n") for re in reg_list: addrs = stringRE(re) if len(addrs) == 0: continue report("Matches found in ASCII strings for: " + re + " at the following addresses:") for addr in addrs: x =1 report('\t'+addr[0]+" at Address: "+hex(addr[1])) report("\n") report('\n\n') def runSymbolRegex(reg_list): """ """ report("############## Symbol Regex ##############\n\n") print "Does this even work!?" for re in sig_matches: addrs = pdbSymbol(re) if addrs == None: __log__.write("Symbol retrieval failed.") report("No symbols returned. PDB was probably not loaded.") continue if len(addrs) == 0: continue report("Matches found in symbols for: " + re + " at the following addresses:") for addr in addrs: report('\t'+addr[0]+" at Address: "+hex(addr[1])) report('\n') report('\n\n') #----------------------------Byte Pattern--------------------------------------------------------------------------------------------------------------------------------- def parseBytes(byteString,end=False): """ Function to make dealing with bytes (logically) input agnostic e.g. I deal with the following input formats for byte strings: "0xA1,0x23,0xF7" "A2,45,F7" "0xA3 0x45 0xF7" "A4 45 F7" "A54557" "0xA60x450x57" @param byteString: String of hex bytes @type byteString: string @param end: Optional flag to determine endianess of input string. Default: Big Endian @type end: bool (Default=False) @rtype string @return: Hex string formatted correctly for "FindBinary()" """ buf = [] byteString = byteString.replace("0x","") #Remove all prepended 0x for buff in byteString.split(): #Splits on any whitespace for b in buff.split(','): #Splits on , if len(b)>2: #Triggers for no delimiter (e.g. "A54557") buf = [chr(int(b[i:i+2],16)) for i in range(0,len(b),2)] #Splits every 2 char in string, converts hex string to character break temp = ''.join(chr(int(b,16))) #Converts hex string to char list, then to string buf.append(temp) #Appends to list of char strings if end: buf = buf[::-1] #Reverse list for endianess buff = ''.join(buf) #Convert char list to string tlen = len(buff) bin_str = ' '.join(["%02X" % ord(x) for x in buf]) #Convert to space separated hex string return bin_str, tlen def byteSig(pattern,end=False): """ Takes string of hex values, and searches binary for pattern @param pattern: String of hex bytes @type pattern: string @param end: Optional flag to determine endianess of input string. Default: Big Endian @type end: bool (Default=False) @rtype: list @return: List of addresses matching signature """ bin_str, tlen = parseBytes(pattern,end) #Gets formatted hex string and pattern length ea = MinEA() #Minimum effective address ret = [] while True: ea = FindBinary(ea, SEARCH_DOWN,bin_str) #Checks for binary string #Note: Due to lack of documentation, I don't know whether this simply checks #for the string at ea+0, or checks at each addr ea+(n) #So far, it's found all the strings I've tested if ea == idaapi.BADADDR: #IDAPython's version of NULL break ret.append(ea) #Append to list of addresses Message(".") ea += tlen #Move target address +length of string if not ret: #If ret list is empty... return (False, "Could not match[%s]" % bin_str) Message("\n") return ret def runByteSigs(byte_sigs): report("############## Byte Signatures ##############\n\n") for sig in byte_sigs: addrs = byteSig(sig) if len(addrs) == 0: continue report("Byte pattern: " + sig + " found at the following addresses:") for addr in addrs: report('\t'+str(addr)) report("\n") report("\n\n") #----------------------------------PDB------------------------------------------------------------------------------------------------------------------------ def loadPdbFile(base=0x140000000): """ Loads pdb file into a lookup class object @param base: Specifies the base address of the binary. Default value matches that of IDAPython @type base: long @return: void """ global goodLooks global __pdb_file__ if __pdb_file__ == None: fp = GetInputFilePath() path = os.path.dirname(fp) #Grab input directory file = os.path.basename(fp) #Input file name file = file[:file.rfind('.')+1]+"pdb" #Replace extension with .pdb __pdb_file__ = path+'\\'+file pdb = [(__pdb_file__,base)] #Lookup takes a (pdb,baseAddress) tuple print "Loading PDB file. This may take some time..." goodLooks = Lookup(pdb) #Instantiate global Lookup object print "PDB file %s loaded" % pdb def pdbSymbolList(): """ Extracts symbol list from Lookup class object. @rtype: list @return: String list of the PDB symbol names """ if goodLooks == None: #Has the pdb file been loaded yet? return None symDict = goodLooks.names #Grab dictionary of Symbol names symbolList = [] for s in symDict.values(): symbolList.extend(s) #Add each name to symbol list return symbolList def loadPdbPickle(pickle_file): global goodLooks pkl_f = open(pickle_file,'rb') start = time.clock() goodLooks = cPickle.load(pkl_f) end = time.clock() if timing: print "PDB load took %f" % (end-start) #---------------------------------Spot Check------------------------------------------------------------------------------------------------------------------------ def spotCheck(address,value): """ Takes """ addr = int(address,16) bin_str, nStrLen = parseBytes(value) #Parse input ea = FindBinary(addr,SEARCH_DOWN,bin_str) #Grab data at address return ea == address #return if value matches def runSpotChecks(spots): report("############## Spot Checks ##############\n\n") for spot in spots: match = spotCheck(spot["address"],spot["value"]) report("Address '" + spot["address"] + "' contains value: '" + spot["value"] + "'? "+str(match)) report("\n") report("\n\n") #---------------------------------Class Utils------------------------------------------------------------------------------------------------------------------------ def cfgParse(cfg_file): """ Takes file path, and loads JSON data into config dictionary Currently, the following entries are populated: 'binary': File path to target binary 'opts': Command-line options to run binary with 'pdb': File path to target's associated PDB file 'strings': File path to file containing newline delim'd regexes for regString() 'bytes': File path to file containing newline delim'd hex string byte patterns for byteSig() 'output': Directory to store working files and results report file @param cfg_file: File path to json formatted config file @type pattern: string @rtype: dict @return: Dictionary populated with data in config file """ fp = open(cfg_file) cfg_dict = json.loads(fp.read().strip()) fp.close() return cfg_dict def report(data): """ """ global __report_file__ fh = open(__report_file__,"a") fh.write(data+'\n') fh.close() #---------------------------------Main------------------------------------------------------------------------------------------------------------------------ def runTests(config_file,idx): global __log__ global __work_dir__ global __report_file__ import time start = time.clock() cfg_dict = cfgParse(cfg_file) __work_dir__ = os.path.expandvars(cfg_dict["cfg"]["work_dir"]) log_file = os.path.expandvars(cfg_dict["cfg"]["log_file"]) if log_file == "": __log__ = open(__work_dir__+"idaStar.log",'a') else: __log__ = open(log_file,'a') #__log__.write(cfg_dict[0]) __log__.write("\n\n------------") __log__.write(time.strftime("%d/%m/%y %H:%M")) __log__.write("------------\n\n") idx = 0 cfg = cfg_dict["target_binaries"] target_cfg = cfg[idx] __report_file__ = __work_dir__ + target_cfg["target"] + ".idaStar.report.txt" fh = open(__report_file__,"w") fh.close() #load pickle pickle_file = __work_dir__ + target_cfg["target"] + ".pkl" if os.path.isfile(pickle_file): loadPdbPickle(pickle_file) __log__.write("Loaded pickle\n") else: __log__.write("No pickle file. Ignoring symbol tests") #byte patterns byte_sigs = target_cfg["byte_patterns"] if len(byte_sigs) > 0 : runByteSigs(byte_sigs) __log__.write("Checked byte patterns\n") #spot checks spots = target_cfg["spot_checks"] if len(spots) > 0 : runSpotChecks(spots) __log__.write("Ran spot checks\n") #string regex str_reg = target_cfg["string_regex_list"] if len(str_reg) > 0 : runStringRegex(str_reg) __log__.write("Ran string regex\n") #symbol regex str_reg = target_cfg["symbol_regex_list"] if len(str_reg) > 0: runSymbolRegex(str_reg) __log__.write("Ran symbol regex\n") end = time.clock() __log__.write("Analysis took %f\n" % (end-start)) __log__.write("\n\n\n\n") __log__.close() if __name__ == "__main__": if len(idc.ARGV) > 0: if len(idc.ARGV == 3): cfg_file = idc.ARGV[1] idx = int(idc.ARGV[2]) runTests(cfg_file,idx) else: log = open('idaStar.log','a') log.write("\n\n####\n\nIncorrect arguments\n\n####\n\n") idc.Exit(0) else: print "idaStar module has been loaded"