gtav-src/tools_ng/script/coding/cinclude2dot.py


#### Portions of this code inspired by / based on
#### cinclude2dot.pl by Darxus\@ChaosReigns.com, francis\@flourish.org
#### GPLed code available from http://www.flourish.org/cinclude2dot/

import os, sys, optparse, re, math, glob
import networkx as nx
import sqlite3 as sql
from stat import *

G = nx.DiGraph()

opt = optparse.OptionParser(usage="Usage: %prog [options] inputfile.cpp outfile.dot")
opt.add_option("-I", dest="includePaths", action="append")
opt.add_option("--modules", dest="modules", action="store_true")
opt.add_option("--systemincludes", dest="systemIncludes", action="store_true")
opt.add_option("--nodeScore", dest="nodeScore", action="store", default=1.0)
opt.add_option("--edgeScore", dest="edgeScore", action="store", default=0.0)
opt.add_option("--branchMult", dest="branchMult", action="store", default=1.0)
opt.add_option("--branchPower", dest="branchPower", action="store", default=1.0)
opt.add_option("--scoreBySize", dest="scoreBySize", action="store_true", default=False)
opt.add_option("--unknownSizeScore", dest="unknownSizeScore", action="store", default=10.0)
opt.add_option("--updatedb", dest="updateDb", action="store_true", default=False)
opt.add_option("--nocpp", dest="noCpp", action="store_true", default=False)
opt.add_option("--findpathsto", dest="findPathsTo", action="store", default=None)

(options, outargs) = opt.parse_args(sys.argv[1:])

sqlcur = None
if options.updateDb:
    sqlconn = sql.connect(r"C:\cinclude.sql")
    sqlcur = sqlconn.cursor()

    # try to create the tables we need
    sqlcur.execute('CREATE TABLE edge_costs (root_file text, from_edge text, to_edge text, cost real)')
    sqlcur.execute('CREATE TABLE file_info (file_name text, total_edges integer, rage_includes integer, std_includes integer)')


nodeScore = float(options.nodeScore)
edgeScore = float(options.edgeScore)
branchMult = float(options.branchMult)
branchPower = float(options.branchPower)
scoreBySize = options.scoreBySize
unknownSizeScore = float(options.unknownSizeScore)
noCpp = options.noCpp

outfile = file(outargs[1], "w")


def add_edge(nodeFrom, nodeTo):
    nfl = nodeFrom
    nft = nodeTo
    if nfl == nft:
        return
    G.add_node(nfl, score=0)
    G.add_node(nft, score=0)
    G.add_edge(nft, nfl) # backwards link, for topo sort to work right


def include_search(inc, filename):
    dirname = os.path.dirname(filename)
    testFile = os.path.normpath(os.path.join(dirname, inc))
    if os.path.exists(testFile):
        return testFile

    # try user-specified include paths
    if options.includePaths:
        for dirname in options.includePaths:
            testFile = os.path.normpath(os.path.join(dirname, inc))
            if os.path.exists(testFile):
                return testFile

    return None

def file_display(filename, issystem):
    if issystem:
        return "<" + os.path.basename(filename) + ">"
    return os.path.basename(os.path.dirname(filename)) + "/" + os.path.basename(filename)

visitedFiles = {}
stdIncludes = {}
unvisitedFiles = []

fileList = [(os.path.abspath(x), False) for x in glob.glob(outargs[0])]
unvisitedFiles += fileList
if len(fileList) > 1:
    root = "<root>"
    for (f, issystem) in fileList:
        add_edge(root, file_display(f, issystem))
else:
    root = file_display(fileList[0][0], False)


while len(unvisitedFiles) > 0:
    files = unvisitedFiles[:]
    unvisitedFiles = []

    for (f,issystem) in files:
        visitedFiles[f] = 0
        try:
            stats = os.stat(f)
            G.add_node(file_display(f, issystem), filesize=float(stats[ST_SIZE]) / 1024.0)
            cFile = file(f)
        except:
            continue
        for line in cFile:
            m = re.match(r'^\s*#\s*include\s+<(\S+)>', line)
            if m:
                add_edge(file_display(f, issystem), "<" + m.group(1) + ">")
                if options.systemIncludes:
                    includefile = include_search(m.group(1), f)
                    if includefile and not visitedFiles.has_key(includefile):
                        unvisitedFiles.append((includefile, True))

                stdIncludes[f] = 0
            m = re.match(r'^\s*#\s*include\s+"(\S+)"', line)
            if m:
                includefile = include_search(m.group(1), f)
                if includefile:
                    if noCpp and includefile.endswith(".cpp"):
                        continue
                    targetName = file_display(includefile, issystem)
                    moduleName = targetName.split('/')[0]
                    add_edge(file_display(f, issystem), targetName)
                    if not visitedFiles.has_key(includefile):
                        unvisitedFiles.append((includefile, issystem))
                else:
                    print "couldn't open include file", m.group(1), "included from", f
                    add_edge(file_display(f, issystem), m.group(1))


print "Files:"
for i in visitedFiles.iterkeys():
    print i
print ""

from collections import defaultdict # new in Python 2.5
def circuits(G):
    """Find elementary circuits of a directed graph.

    An elementary circuit is a closed path where no node appears twice, except
    that the first and last node are the same. Two elementary circuits are
    distinct if they are not cyclic permutations of each other.

    Parameters
    ----------
    G : NetworkX Graph
       A directed graph.

    Returns
    -------
    A list of circuits, where each circuit is a list of nodes, with the first
    and last node being the same.

    Example:
    >>> G = nx.DiGraph([(0, 0), (0, 1), (0, 2), (1, 2), (2, 0), (2, 1), (2, 2)])
    >>> circuits(G)
    [[0, 0], [0, 1, 2, 0], [0, 2, 0], [1, 2, 1], [2, 2]]

    See Also
    --------
    cycles (for undirected graphs)

    References
    ----------

    The implementation follows pp. 79-80 of:
    .. [1] Finding all the elementary circuits of a directed graph.
       D.B. Johnson
       SIAM Journal on Computing 4, no. 1: 77-84.
       http://dx.doi.org/10.1137/0204007

    Jon Olav Vik, 2010-08-09
    """
    path = [] # stack of nodes (vertices) in current path
    blocked = defaultdict(bool) # vertex: blocked from search?
    B = defaultdict(list) # graph portions that yield no elementary circuit
    result = [] # list to accumulate the circuits found

    def unblock(thisnode):
        """Recursively unblock and remove nodes from B[thisnode]."""
        if blocked[thisnode]:
            blocked[thisnode] = False
            while B[thisnode]:
                unblock(B[thisnode].pop())

    def circuit(thisnode, startnode, component):
        closed = False # set to True if elementary path is closed
        path.append(thisnode)
        blocked[thisnode] = True
        for nextnode in component[thisnode]: # direct successors of thisnode
            if nextnode == startnode:
                result.append(path + [startnode])
                closed = True
            elif not blocked[nextnode]:
                if circuit(nextnode, startnode, component):
                    closed = True
        if closed:
            unblock(thisnode)
        else:
            for nextnode in component[thisnode]:
                if thisnode not in B[nextnode]: # TODO: use set for speedup?
                    B[nextnode].append(thisnode)
        path.pop() # remove thisnode from path
        return closed

    # Johnson's algorithm requires some ordering of the vertices
    for s in sorted(G):
        # Find the strong component K with least vertex (i.e. node)
        # in the subgraph induced by s and its following nodes.
        subgraph = G.subgraph(node for node in G if node >= s)
        strongcomp = nx.strongly_connected_components(subgraph)
        component = G.subgraph(min(strongcomp, key=min))
        if component:
            startnode = min(component.nodes())
            for node in component:
                blocked[node] = False
                B[node][:] = []
            circuit(startnode, startnode, component)
        else:
            break

    return result


if not nx.is_directed_acyclic_graph(G):
    # get rid of loops - somehow
    print "Not a DAG - removing cycles..."
    cycleList = circuits(G)
    print cycleList

    # go through each cycle, remove the edge with the longest distance from the root
    for cycle in cycleList:
        maxDist = -1
        maxNodeIndex = -1
        for node in range(len(cycle)-1):
            path = nx.shortest_path(G, cycle[node], root) # remember, edges are backwards
            dist = len(path)
            if dist > maxDist:
                maxDist = dist
                maxNodeIndex = node
        print "  Removed", cycle[maxNodeIndex], "->", cycle[maxNodeIndex+1]
        G.remove_edge(cycle[maxNodeIndex], cycle[maxNodeIndex+1])


# if necessary, trim out all nodes except the ones forming a path from the source to the findPathsTo node
if options.findPathsTo:
    done = False
    while not done:
        nodesToCull = [node for (node, deg) in G.in_degree_iter() if deg == 0 and options.findPathsTo not in node]
        print "Removing %d nodes" % (len(nodesToCull))
        if len(nodesToCull) > 0:
            G.remove_nodes_from(nodesToCull)
        else:
            done = True

# include cost heuristic
# the cost of a node is 1 plus the cost of all the incoming edges
# the cost of an edge is the cost of a node, divided by the number of outgoing edges from that node

# i.e. start with all leaf nodes at '1'
# divide the score on each of those nodes by the number of outgoing edges
# propogate that reduced score to each of the parent nodes
# add '1' to each of the parent nodes and repeat


ordered_nodes = nx.topological_sort(G)
for n in ordered_nodes:
    attrDict = G.node[n]
    score = nodeScore
    if scoreBySize:
        if attrDict.has_key('filesize'):
            score = G.node[n]['filesize']
        else:
            score = unknownSizeScore
    G.node[n]['score'] = score

for n in ordered_nodes:
    score = G.node[n]['score']
    outDeg = G.out_degree(n)
    if outDeg == 0:
        outDeg = 1
    parentScore = branchMult * (score / math.pow(outDeg, branchPower)) + edgeScore
    for (nodeFrom, nodeTo) in G.out_edges_iter(n):
        G[nodeFrom][nodeTo]['score'] = parentScore
        G.node[nodeTo]['score'] = parentScore + G.node[nodeTo]['score']

# now sort by score
edges = G.edges(data=True)
edges.sort(key=lambda x: -x[2]['score'])

topEdge = edges[0]
topScore = G[topEdge[0]][topEdge[1]]['score']

print "Top 100 Dependency Costs:"
for e in edges[:100]:
    print e[1], "->", e[0], ":", G[e[0]][e[1]]['score']


print ""
print "%d includes\n %d rage files\n %d std files" % (G.number_of_edges(), len(visitedFiles), len(stdIncludes))

outfile.write('''
digraph "source tree" {
    overlap=scale;
    size="8,10";
    ratio="fill";
    fontsize="16";
    fontname="Helvetica";
	clusterrank="local";
''')

filesPerModule = {}

if options.modules:
    for (node,data) in G.nodes_iter(data=True):
        modName = None
        if node[0] == '<':
            modName = "standard_lib"
        else:
            nodeComps = node.split("/")
            if len(nodeComps) > 1:
                modName =  nodeComps[0]

        if modName:
            if filesPerModule.has_key(modName):
                filesPerModule[modName].append(node)
            else:
                filesPerModule[modName] = [node]

    for (module, files) in filesPerModule.iteritems():
        outfile.write('''
    subgraph cluster%s {
''' % (module))
        for i in files:
            outfile.write('\t\t"%s"\n' % i)
        outfile.write('''
    }
    ''')


for (node, neighbor, data) in G.edges_iter(data=True):
    scoreProp = data['score'] / topScore
    outfile.write('''"%s" -> "%s" [label="%f" color="0.0 1.0 %f" penwidth=%f]\n''' % (neighbor, node, data['score'], scoreProp, 1.0 + 40.0 * scoreProp))

outfile.write('''}
''')