gtav-src/tools_ng/script/gen9/rsgdnd/compare.py

import difflib
import argparse
import re
import rsgdnd.p4 as p4
import rsgdnd.utils as utils

re.unescape = utils.re_unescape

# these rely on text replacement after table creation
from_re = re.compile("""id="from[0-9]+_[0-9]+">([0-9]+)</td><td\ nowrap="nowrap" class="diff_line">(.*?)</td>""")
to_re = re.compile("""id="to[0-9]+_[0-9]+">([0-9]+)</td><td\ nowrap="nowrap" class="diff_line">(.*?)</td>""")
tag_re = re.compile("(<.*?>)")

def get_diff_args(**kwargs):
    # Parse all the commandline options
    parser = argparse.ArgumentParser(description='Generate a diff of file A and B.', **kwargs)

    parser.add_argument("-a", '--file_a', metavar='a.txt', type=str, required=True,
    help='file A to diff')

    parser.add_argument("-b", '--file_b', metavar='b.txt', type=str, required=True,
    help='file B to diff')

    parser.add_argument("-ds", '--diffstyle', metavar='ds', type=str, default="html", choices=["html", "compressed", "unified", "ndiff"],
    help='html, compressed, unified, ndiff')

    parser.add_argument("-ha", '--header_a', metavar='HA', type=str,
    help='table header for A, defaults to --a input')

    parser.add_argument("-hb", '--header_b', metavar='HB', type=str,
    help='table header for B, defaults to --b input')

    parser.add_argument("-bl", '--blacklist', metavar="BL", type=str, nargs='*',
    help='lines containing the given strings \'BL\' will be removed entirely from the diff. Not case sensitive.')

    parser.add_argument("-wl", '--whitelist', metavar="WL", type=str, nargs='*',
    help='lines containing the given strings \'WL\' will be the only lines in the diff. Not case sensitive.')

    parser.add_argument("-il", '--ignorelist', metavar="IL", type=str, nargs='*',
    help='the given strings \'IL\' will not be considered as a difference. Not case sensitive.')

    parser.add_argument("-p4", '--perforce', action="store_true",
    help='should this be a perforce look up')

    parser.add_argument("-sa", '--stripafter', metavar="SA", type=str,
    help='anything after a given string will be removed')

    return parser


def diff(file_a, file_b,
            diffstyle="html", header_a="", header_b="",
            blacklist=None, whitelist=None, ignorelist=None,
            perforce=False, stripafter=""):

    global tag_re
    global from_re
    global to_re

    ignorelist = [re.compile(re.escape(i), re.IGNORECASE) for i in ignorelist] if ignorelist else None
    blacklist = [re.compile(re.escape(i), re.IGNORECASE) for i in blacklist] if blacklist else None
    whitelist = [re.compile(re.escape(i), re.IGNORECASE) for i in whitelist] if whitelist else None

    if ignorelist:
        for i1 in ignorelist:
            for i2 in ignorelist:
                if i1 == i2:
                    continue
                if i1.findall(i2.pattern):
                    raise ValueError("ignorelist has strings that contain each other '"+i1.pattern+"' '"+i2.pattern+"'")

    utils.log("reading: "+file_a)
    if perforce:
        lines_a = p4.print_file(file_a)
        if any([x in lines_a[0] for x in ["no such file", "no file(s) at that revision"]]):
            raise ValueError(lines_a[0])
    else:
        with open(file_a) as fa:
            lines_a = fa.readlines()

    utils.log("reading: "+file_b)
    if perforce:
        lines_b = p4.print_file(file_b)
        if any([x in lines_b[0] for x in ["no such file", "no file(s) at that revision"]]):
            raise ValueError(lines_b[0])
    else:
        with open(file_b) as fb:
            lines_b = fb.readlines()

    if perforce:
        time_a = ""
        time_b = ""
    else:
        time_a = utils.file_mtime(file_a)
        time_b = utils.file_mtime(file_b)

    # remove unwanted lines and strings.
    utils.log("sanatising "+file_a)
    lines_a, reps_a, rep_lut_a = sanitise(lines_a, whitelist, blacklist, ignorelist, stripafter)

    utils.log("sanatising "+file_b)
    lines_b, reps_b, rep_lut_b = sanitise(lines_b, whitelist, blacklist, ignorelist, stripafter)

    header_a = header_a if header_a else file_a
    header_b = header_b if header_b else file_b

    # make the table
    utils.log("beginning "+diffstyle+" diff")
    if diffstyle == "html":
        differ = difflib.HtmlDiff()
        diff = differ.make_table(lines_a, lines_b, header_a, header_b, True, 1)
        # insert all the text we removed earlier during the sanatisation pass.
        utils.log("html post processing")
        fixed_diff = []
        for line in diff.splitlines():
            line = line.replace("<td nowrap=\"nowrap\">", "<td nowrap=\"nowrap\" class=\"diff_line\">")
            # the python lib maintainers should change this &nbsp; inclusion, it's not helpful.
            line = line.replace("&nbsp;", " ")

            def process_reps(line, reg, reps, rep_lut):
                matches = [m for m in reg.finditer(line)]
                if matches:
                    index = int(matches[0].group(1))
                    new_line = matches[0].group(2)
                    new_line_start = matches[0].start(2)
                    tags = [t for t in tag_re.finditer(new_line)]
                    if index in reps:
                        reps = reps[index]
                        for rep in reps:
                            pos = rep[1]
                            pos += new_line_start
                            new_pos = pos
                            for t in tags:
                                if pos >= new_line_start+t.start():
                                    new_pos += len(t.group(0))
                            pos = new_pos

                            line = utils.str_insert(line, rep_lut[rep[0]], pos)
                return line

            line = process_reps(line, from_re, reps_a, rep_lut_a)
            line = process_reps(line, to_re, reps_b, rep_lut_b)
            fixed_diff.append(line)

        diff = "\n".join(fixed_diff)

    elif diffstyle == "ndiff":
        # TODO: This doesn't fix the sanatisation pass! Workout how to do that, there appears to be no line nums.
        diff = difflib.ndiff(lines_a, lines_b)
    elif diffstyle == "unified":
        # TODO: This doesn't fix the sanatisation pass! Should be able to use line nums
        diff = difflib.unified_diff(lines_a, lines_b, file_a, file_b, time_a, time_b, 1)
    elif diffstyle == "context":
        # TODO: This doesn't fix the sanatisation pass! Should be able to use line nums
        diff = difflib.context_diff(lines_a, lines_b, file_a, file_b, time_a, time_b, 1)

    def fix_lines(lines, reps, rep_lut):
        for index in range(0, len(lines)):
            if index+1 in reps:
                for rep in reps[index+1]:
                    pos = rep[1]
                    lines[index] = utils.str_insert(lines[index], rep_lut[rep[0]], pos)

    fix_lines(lines_a, reps_a, rep_lut_a)
    fix_lines(lines_b, reps_b, rep_lut_b)
    return diff, lines_a, lines_b

def sanitise(lines, whitelist=None, blacklist=None, ignorelist=None, stripafter=None):
        reps = {}
        rep_lut = []
        ret = lines

        if whitelist:
            ret = [l for l in ret if any([w.findall(l) for w in whitelist])]

        if blacklist:
            ret = [l for l in ret if not any([b.findall(l) for b in blacklist])]

        if ignorelist:
            for l in range(0, len(ret)):
                line = ret[l]
                matches = []
                for i in ignorelist:
                    matches.extend([m for m in i.finditer(line)])
                if matches:
                    matches.sort(key=lambda x: x.start())
                    reps[l+1] = []
                    for m in matches:
                        try:
                            index = rep_lut.index(m.group(0))
                            reps[l+1].append((index, m.start()))
                        except:
                            rep_lut.append(m.group(0))
                            reps[l+1].append((len(rep_lut)-1, m.start()))
                    for i in ignorelist:
                        line = i.sub("", line)
                ret[l] = line
        if stripafter:
            ret = [l.split(stripafter)[0]+"\n" for l in ret]
        return ret, reps, rep_lut