201 lines
8.3 KiB
Python
Executable File
201 lines
8.3 KiB
Python
Executable File
import difflib
|
|
import argparse
|
|
import re
|
|
import rsgdnd.p4 as p4
|
|
import rsgdnd.utils as utils
|
|
|
|
re.unescape = utils.re_unescape
|
|
|
|
# these rely on text replacement after table creation
|
|
from_re = re.compile("""id="from[0-9]+_[0-9]+">([0-9]+)</td><td\ nowrap="nowrap" class="diff_line">(.*?)</td>""")
|
|
to_re = re.compile("""id="to[0-9]+_[0-9]+">([0-9]+)</td><td\ nowrap="nowrap" class="diff_line">(.*?)</td>""")
|
|
tag_re = re.compile("(<.*?>)")
|
|
|
|
def get_diff_args(**kwargs):
|
|
# Parse all the commandline options
|
|
parser = argparse.ArgumentParser(description='Generate a diff of file A and B.', **kwargs)
|
|
|
|
parser.add_argument("-a", '--file_a', metavar='a.txt', type=str, required=True,
|
|
help='file A to diff')
|
|
|
|
parser.add_argument("-b", '--file_b', metavar='b.txt', type=str, required=True,
|
|
help='file B to diff')
|
|
|
|
parser.add_argument("-ds", '--diffstyle', metavar='ds', type=str, default="html", choices=["html", "compressed", "unified", "ndiff"],
|
|
help='html, compressed, unified, ndiff')
|
|
|
|
parser.add_argument("-ha", '--header_a', metavar='HA', type=str,
|
|
help='table header for A, defaults to --a input')
|
|
|
|
parser.add_argument("-hb", '--header_b', metavar='HB', type=str,
|
|
help='table header for B, defaults to --b input')
|
|
|
|
parser.add_argument("-bl", '--blacklist', metavar="BL", type=str, nargs='*',
|
|
help='lines containing the given strings \'BL\' will be removed entirely from the diff. Not case sensitive.')
|
|
|
|
parser.add_argument("-wl", '--whitelist', metavar="WL", type=str, nargs='*',
|
|
help='lines containing the given strings \'WL\' will be the only lines in the diff. Not case sensitive.')
|
|
|
|
parser.add_argument("-il", '--ignorelist', metavar="IL", type=str, nargs='*',
|
|
help='the given strings \'IL\' will not be considered as a difference. Not case sensitive.')
|
|
|
|
parser.add_argument("-p4", '--perforce', action="store_true",
|
|
help='should this be a perforce look up')
|
|
|
|
parser.add_argument("-sa", '--stripafter', metavar="SA", type=str,
|
|
help='anything after a given string will be removed')
|
|
|
|
return parser
|
|
|
|
|
|
def diff(file_a, file_b,
|
|
diffstyle="html", header_a="", header_b="",
|
|
blacklist=None, whitelist=None, ignorelist=None,
|
|
perforce=False, stripafter=""):
|
|
|
|
global tag_re
|
|
global from_re
|
|
global to_re
|
|
|
|
ignorelist = [re.compile(re.escape(i), re.IGNORECASE) for i in ignorelist] if ignorelist else None
|
|
blacklist = [re.compile(re.escape(i), re.IGNORECASE) for i in blacklist] if blacklist else None
|
|
whitelist = [re.compile(re.escape(i), re.IGNORECASE) for i in whitelist] if whitelist else None
|
|
|
|
if ignorelist:
|
|
for i1 in ignorelist:
|
|
for i2 in ignorelist:
|
|
if i1 == i2:
|
|
continue
|
|
if i1.findall(i2.pattern):
|
|
raise ValueError("ignorelist has strings that contain each other '"+i1.pattern+"' '"+i2.pattern+"'")
|
|
|
|
utils.log("reading: "+file_a)
|
|
if perforce:
|
|
lines_a = p4.print_file(file_a)
|
|
if any([x in lines_a[0] for x in ["no such file", "no file(s) at that revision"]]):
|
|
raise ValueError(lines_a[0])
|
|
else:
|
|
with open(file_a) as fa:
|
|
lines_a = fa.readlines()
|
|
|
|
utils.log("reading: "+file_b)
|
|
if perforce:
|
|
lines_b = p4.print_file(file_b)
|
|
if any([x in lines_b[0] for x in ["no such file", "no file(s) at that revision"]]):
|
|
raise ValueError(lines_b[0])
|
|
else:
|
|
with open(file_b) as fb:
|
|
lines_b = fb.readlines()
|
|
|
|
if perforce:
|
|
time_a = ""
|
|
time_b = ""
|
|
else:
|
|
time_a = utils.file_mtime(file_a)
|
|
time_b = utils.file_mtime(file_b)
|
|
|
|
# remove unwanted lines and strings.
|
|
utils.log("sanatising "+file_a)
|
|
lines_a, reps_a, rep_lut_a = sanitise(lines_a, whitelist, blacklist, ignorelist, stripafter)
|
|
|
|
utils.log("sanatising "+file_b)
|
|
lines_b, reps_b, rep_lut_b = sanitise(lines_b, whitelist, blacklist, ignorelist, stripafter)
|
|
|
|
header_a = header_a if header_a else file_a
|
|
header_b = header_b if header_b else file_b
|
|
|
|
# make the table
|
|
utils.log("beginning "+diffstyle+" diff")
|
|
if diffstyle == "html":
|
|
differ = difflib.HtmlDiff()
|
|
diff = differ.make_table(lines_a, lines_b, header_a, header_b, True, 1)
|
|
# insert all the text we removed earlier during the sanatisation pass.
|
|
utils.log("html post processing")
|
|
fixed_diff = []
|
|
for line in diff.splitlines():
|
|
line = line.replace("<td nowrap=\"nowrap\">", "<td nowrap=\"nowrap\" class=\"diff_line\">")
|
|
# the python lib maintainers should change this inclusion, it's not helpful.
|
|
line = line.replace(" ", " ")
|
|
|
|
def process_reps(line, reg, reps, rep_lut):
|
|
matches = [m for m in reg.finditer(line)]
|
|
if matches:
|
|
index = int(matches[0].group(1))
|
|
new_line = matches[0].group(2)
|
|
new_line_start = matches[0].start(2)
|
|
tags = [t for t in tag_re.finditer(new_line)]
|
|
if index in reps:
|
|
reps = reps[index]
|
|
for rep in reps:
|
|
pos = rep[1]
|
|
pos += new_line_start
|
|
new_pos = pos
|
|
for t in tags:
|
|
if pos >= new_line_start+t.start():
|
|
new_pos += len(t.group(0))
|
|
pos = new_pos
|
|
|
|
line = utils.str_insert(line, rep_lut[rep[0]], pos)
|
|
return line
|
|
|
|
line = process_reps(line, from_re, reps_a, rep_lut_a)
|
|
line = process_reps(line, to_re, reps_b, rep_lut_b)
|
|
fixed_diff.append(line)
|
|
|
|
diff = "\n".join(fixed_diff)
|
|
|
|
elif diffstyle == "ndiff":
|
|
# TODO: This doesn't fix the sanatisation pass! Workout how to do that, there appears to be no line nums.
|
|
diff = difflib.ndiff(lines_a, lines_b)
|
|
elif diffstyle == "unified":
|
|
# TODO: This doesn't fix the sanatisation pass! Should be able to use line nums
|
|
diff = difflib.unified_diff(lines_a, lines_b, file_a, file_b, time_a, time_b, 1)
|
|
elif diffstyle == "context":
|
|
# TODO: This doesn't fix the sanatisation pass! Should be able to use line nums
|
|
diff = difflib.context_diff(lines_a, lines_b, file_a, file_b, time_a, time_b, 1)
|
|
|
|
def fix_lines(lines, reps, rep_lut):
|
|
for index in range(0, len(lines)):
|
|
if index+1 in reps:
|
|
for rep in reps[index+1]:
|
|
pos = rep[1]
|
|
lines[index] = utils.str_insert(lines[index], rep_lut[rep[0]], pos)
|
|
|
|
fix_lines(lines_a, reps_a, rep_lut_a)
|
|
fix_lines(lines_b, reps_b, rep_lut_b)
|
|
return diff, lines_a, lines_b
|
|
|
|
def sanitise(lines, whitelist=None, blacklist=None, ignorelist=None, stripafter=None):
|
|
reps = {}
|
|
rep_lut = []
|
|
ret = lines
|
|
|
|
if whitelist:
|
|
ret = [l for l in ret if any([w.findall(l) for w in whitelist])]
|
|
|
|
if blacklist:
|
|
ret = [l for l in ret if not any([b.findall(l) for b in blacklist])]
|
|
|
|
if ignorelist:
|
|
for l in range(0, len(ret)):
|
|
line = ret[l]
|
|
matches = []
|
|
for i in ignorelist:
|
|
matches.extend([m for m in i.finditer(line)])
|
|
if matches:
|
|
matches.sort(key=lambda x: x.start())
|
|
reps[l+1] = []
|
|
for m in matches:
|
|
try:
|
|
index = rep_lut.index(m.group(0))
|
|
reps[l+1].append((index, m.start()))
|
|
except:
|
|
rep_lut.append(m.group(0))
|
|
reps[l+1].append((len(rep_lut)-1, m.start()))
|
|
for i in ignorelist:
|
|
line = i.sub("", line)
|
|
ret[l] = line
|
|
if stripafter:
|
|
ret = [l.split(stripafter)[0]+"\n" for l in ret]
|
|
return ret, reps, rep_lut
|