import difflib import argparse import re import rsgdnd.p4 as p4 import rsgdnd.utils as utils re.unescape = utils.re_unescape # these rely on text replacement after table creation from_re = re.compile("""id="from[0-9]+_[0-9]+">([0-9]+)(.*?)""") to_re = re.compile("""id="to[0-9]+_[0-9]+">([0-9]+)(.*?)""") tag_re = re.compile("(<.*?>)") def get_diff_args(**kwargs): # Parse all the commandline options parser = argparse.ArgumentParser(description='Generate a diff of file A and B.', **kwargs) parser.add_argument("-a", '--file_a', metavar='a.txt', type=str, required=True, help='file A to diff') parser.add_argument("-b", '--file_b', metavar='b.txt', type=str, required=True, help='file B to diff') parser.add_argument("-ds", '--diffstyle', metavar='ds', type=str, default="html", choices=["html", "compressed", "unified", "ndiff"], help='html, compressed, unified, ndiff') parser.add_argument("-ha", '--header_a', metavar='HA', type=str, help='table header for A, defaults to --a input') parser.add_argument("-hb", '--header_b', metavar='HB', type=str, help='table header for B, defaults to --b input') parser.add_argument("-bl", '--blacklist', metavar="BL", type=str, nargs='*', help='lines containing the given strings \'BL\' will be removed entirely from the diff. Not case sensitive.') parser.add_argument("-wl", '--whitelist', metavar="WL", type=str, nargs='*', help='lines containing the given strings \'WL\' will be the only lines in the diff. Not case sensitive.') parser.add_argument("-il", '--ignorelist', metavar="IL", type=str, nargs='*', help='the given strings \'IL\' will not be considered as a difference. Not case sensitive.') parser.add_argument("-p4", '--perforce', action="store_true", help='should this be a perforce look up') parser.add_argument("-sa", '--stripafter', metavar="SA", type=str, help='anything after a given string will be removed') return parser def diff(file_a, file_b, diffstyle="html", header_a="", header_b="", blacklist=None, whitelist=None, ignorelist=None, perforce=False, stripafter=""): global tag_re global from_re global to_re ignorelist = [re.compile(re.escape(i), re.IGNORECASE) for i in ignorelist] if ignorelist else None blacklist = [re.compile(re.escape(i), re.IGNORECASE) for i in blacklist] if blacklist else None whitelist = [re.compile(re.escape(i), re.IGNORECASE) for i in whitelist] if whitelist else None if ignorelist: for i1 in ignorelist: for i2 in ignorelist: if i1 == i2: continue if i1.findall(i2.pattern): raise ValueError("ignorelist has strings that contain each other '"+i1.pattern+"' '"+i2.pattern+"'") utils.log("reading: "+file_a) if perforce: lines_a = p4.print_file(file_a) if any([x in lines_a[0] for x in ["no such file", "no file(s) at that revision"]]): raise ValueError(lines_a[0]) else: with open(file_a) as fa: lines_a = fa.readlines() utils.log("reading: "+file_b) if perforce: lines_b = p4.print_file(file_b) if any([x in lines_b[0] for x in ["no such file", "no file(s) at that revision"]]): raise ValueError(lines_b[0]) else: with open(file_b) as fb: lines_b = fb.readlines() if perforce: time_a = "" time_b = "" else: time_a = utils.file_mtime(file_a) time_b = utils.file_mtime(file_b) # remove unwanted lines and strings. utils.log("sanatising "+file_a) lines_a, reps_a, rep_lut_a = sanitise(lines_a, whitelist, blacklist, ignorelist, stripafter) utils.log("sanatising "+file_b) lines_b, reps_b, rep_lut_b = sanitise(lines_b, whitelist, blacklist, ignorelist, stripafter) header_a = header_a if header_a else file_a header_b = header_b if header_b else file_b # make the table utils.log("beginning "+diffstyle+" diff") if diffstyle == "html": differ = difflib.HtmlDiff() diff = differ.make_table(lines_a, lines_b, header_a, header_b, True, 1) # insert all the text we removed earlier during the sanatisation pass. utils.log("html post processing") fixed_diff = [] for line in diff.splitlines(): line = line.replace("", "") # the python lib maintainers should change this   inclusion, it's not helpful. line = line.replace(" ", " ") def process_reps(line, reg, reps, rep_lut): matches = [m for m in reg.finditer(line)] if matches: index = int(matches[0].group(1)) new_line = matches[0].group(2) new_line_start = matches[0].start(2) tags = [t for t in tag_re.finditer(new_line)] if index in reps: reps = reps[index] for rep in reps: pos = rep[1] pos += new_line_start new_pos = pos for t in tags: if pos >= new_line_start+t.start(): new_pos += len(t.group(0)) pos = new_pos line = utils.str_insert(line, rep_lut[rep[0]], pos) return line line = process_reps(line, from_re, reps_a, rep_lut_a) line = process_reps(line, to_re, reps_b, rep_lut_b) fixed_diff.append(line) diff = "\n".join(fixed_diff) elif diffstyle == "ndiff": # TODO: This doesn't fix the sanatisation pass! Workout how to do that, there appears to be no line nums. diff = difflib.ndiff(lines_a, lines_b) elif diffstyle == "unified": # TODO: This doesn't fix the sanatisation pass! Should be able to use line nums diff = difflib.unified_diff(lines_a, lines_b, file_a, file_b, time_a, time_b, 1) elif diffstyle == "context": # TODO: This doesn't fix the sanatisation pass! Should be able to use line nums diff = difflib.context_diff(lines_a, lines_b, file_a, file_b, time_a, time_b, 1) def fix_lines(lines, reps, rep_lut): for index in range(0, len(lines)): if index+1 in reps: for rep in reps[index+1]: pos = rep[1] lines[index] = utils.str_insert(lines[index], rep_lut[rep[0]], pos) fix_lines(lines_a, reps_a, rep_lut_a) fix_lines(lines_b, reps_b, rep_lut_b) return diff, lines_a, lines_b def sanitise(lines, whitelist=None, blacklist=None, ignorelist=None, stripafter=None): reps = {} rep_lut = [] ret = lines if whitelist: ret = [l for l in ret if any([w.findall(l) for w in whitelist])] if blacklist: ret = [l for l in ret if not any([b.findall(l) for b in blacklist])] if ignorelist: for l in range(0, len(ret)): line = ret[l] matches = [] for i in ignorelist: matches.extend([m for m in i.finditer(line)]) if matches: matches.sort(key=lambda x: x.start()) reps[l+1] = [] for m in matches: try: index = rep_lut.index(m.group(0)) reps[l+1].append((index, m.start())) except: rep_lut.append(m.group(0)) reps[l+1].append((len(rep_lut)-1, m.start())) for i in ignorelist: line = i.sub("", line) ret[l] = line if stripafter: ret = [l.split(stripafter)[0]+"\n" for l in ret] return ret, reps, rep_lut