348 lines
12 KiB
Python
Executable File
348 lines
12 KiB
Python
Executable File
|
|
#### Portions of this code inspired by / based on
|
|
#### cinclude2dot.pl by Darxus\@ChaosReigns.com, francis\@flourish.org
|
|
#### GPLed code available from http://www.flourish.org/cinclude2dot/
|
|
|
|
import os, sys, optparse, re, math, glob
|
|
import networkx as nx
|
|
import sqlite3 as sql
|
|
from stat import *
|
|
|
|
G = nx.DiGraph()
|
|
|
|
opt = optparse.OptionParser(usage="Usage: %prog [options] inputfile.cpp outfile.dot")
|
|
opt.add_option("-I", dest="includePaths", action="append")
|
|
opt.add_option("--modules", dest="modules", action="store_true")
|
|
opt.add_option("--systemincludes", dest="systemIncludes", action="store_true")
|
|
opt.add_option("--nodeScore", dest="nodeScore", action="store", default=1.0)
|
|
opt.add_option("--edgeScore", dest="edgeScore", action="store", default=0.0)
|
|
opt.add_option("--branchMult", dest="branchMult", action="store", default=1.0)
|
|
opt.add_option("--branchPower", dest="branchPower", action="store", default=1.0)
|
|
opt.add_option("--scoreBySize", dest="scoreBySize", action="store_true", default=False)
|
|
opt.add_option("--unknownSizeScore", dest="unknownSizeScore", action="store", default=10.0)
|
|
opt.add_option("--updatedb", dest="updateDb", action="store_true", default=False)
|
|
opt.add_option("--nocpp", dest="noCpp", action="store_true", default=False)
|
|
opt.add_option("--findpathsto", dest="findPathsTo", action="store", default=None)
|
|
|
|
(options, outargs) = opt.parse_args(sys.argv[1:])
|
|
|
|
sqlcur = None
|
|
if options.updateDb:
|
|
sqlconn = sql.connect(r"C:\cinclude.sql")
|
|
sqlcur = sqlconn.cursor()
|
|
|
|
# try to create the tables we need
|
|
sqlcur.execute('CREATE TABLE edge_costs (root_file text, from_edge text, to_edge text, cost real)')
|
|
sqlcur.execute('CREATE TABLE file_info (file_name text, total_edges integer, rage_includes integer, std_includes integer)')
|
|
|
|
|
|
nodeScore = float(options.nodeScore)
|
|
edgeScore = float(options.edgeScore)
|
|
branchMult = float(options.branchMult)
|
|
branchPower = float(options.branchPower)
|
|
scoreBySize = options.scoreBySize
|
|
unknownSizeScore = float(options.unknownSizeScore)
|
|
noCpp = options.noCpp
|
|
|
|
outfile = file(outargs[1], "w")
|
|
|
|
|
|
def add_edge(nodeFrom, nodeTo):
|
|
nfl = nodeFrom
|
|
nft = nodeTo
|
|
if nfl == nft:
|
|
return
|
|
G.add_node(nfl, score=0)
|
|
G.add_node(nft, score=0)
|
|
G.add_edge(nft, nfl) # backwards link, for topo sort to work right
|
|
|
|
|
|
def include_search(inc, filename):
|
|
dirname = os.path.dirname(filename)
|
|
testFile = os.path.normpath(os.path.join(dirname, inc))
|
|
if os.path.exists(testFile):
|
|
return testFile
|
|
|
|
# try user-specified include paths
|
|
if options.includePaths:
|
|
for dirname in options.includePaths:
|
|
testFile = os.path.normpath(os.path.join(dirname, inc))
|
|
if os.path.exists(testFile):
|
|
return testFile
|
|
|
|
return None
|
|
|
|
def file_display(filename, issystem):
|
|
if issystem:
|
|
return "<" + os.path.basename(filename) + ">"
|
|
return os.path.basename(os.path.dirname(filename)) + "/" + os.path.basename(filename)
|
|
|
|
visitedFiles = {}
|
|
stdIncludes = {}
|
|
unvisitedFiles = []
|
|
|
|
fileList = [(os.path.abspath(x), False) for x in glob.glob(outargs[0])]
|
|
unvisitedFiles += fileList
|
|
if len(fileList) > 1:
|
|
root = "<root>"
|
|
for (f, issystem) in fileList:
|
|
add_edge(root, file_display(f, issystem))
|
|
else:
|
|
root = file_display(fileList[0][0], False)
|
|
|
|
|
|
while len(unvisitedFiles) > 0:
|
|
files = unvisitedFiles[:]
|
|
unvisitedFiles = []
|
|
|
|
for (f,issystem) in files:
|
|
visitedFiles[f] = 0
|
|
try:
|
|
stats = os.stat(f)
|
|
G.add_node(file_display(f, issystem), filesize=float(stats[ST_SIZE]) / 1024.0)
|
|
cFile = file(f)
|
|
except:
|
|
continue
|
|
for line in cFile:
|
|
m = re.match(r'^\s*#\s*include\s+<(\S+)>', line)
|
|
if m:
|
|
add_edge(file_display(f, issystem), "<" + m.group(1) + ">")
|
|
if options.systemIncludes:
|
|
includefile = include_search(m.group(1), f)
|
|
if includefile and not visitedFiles.has_key(includefile):
|
|
unvisitedFiles.append((includefile, True))
|
|
|
|
stdIncludes[f] = 0
|
|
m = re.match(r'^\s*#\s*include\s+"(\S+)"', line)
|
|
if m:
|
|
includefile = include_search(m.group(1), f)
|
|
if includefile:
|
|
if noCpp and includefile.endswith(".cpp"):
|
|
continue
|
|
targetName = file_display(includefile, issystem)
|
|
moduleName = targetName.split('/')[0]
|
|
add_edge(file_display(f, issystem), targetName)
|
|
if not visitedFiles.has_key(includefile):
|
|
unvisitedFiles.append((includefile, issystem))
|
|
else:
|
|
print "couldn't open include file", m.group(1), "included from", f
|
|
add_edge(file_display(f, issystem), m.group(1))
|
|
|
|
|
|
print "Files:"
|
|
for i in visitedFiles.iterkeys():
|
|
print i
|
|
print ""
|
|
|
|
from collections import defaultdict # new in Python 2.5
|
|
def circuits(G):
|
|
"""Find elementary circuits of a directed graph.
|
|
|
|
An elementary circuit is a closed path where no node appears twice, except
|
|
that the first and last node are the same. Two elementary circuits are
|
|
distinct if they are not cyclic permutations of each other.
|
|
|
|
Parameters
|
|
----------
|
|
G : NetworkX Graph
|
|
A directed graph.
|
|
|
|
Returns
|
|
-------
|
|
A list of circuits, where each circuit is a list of nodes, with the first
|
|
and last node being the same.
|
|
|
|
Example:
|
|
>>> G = nx.DiGraph([(0, 0), (0, 1), (0, 2), (1, 2), (2, 0), (2, 1), (2, 2)])
|
|
>>> circuits(G)
|
|
[[0, 0], [0, 1, 2, 0], [0, 2, 0], [1, 2, 1], [2, 2]]
|
|
|
|
See Also
|
|
--------
|
|
cycles (for undirected graphs)
|
|
|
|
References
|
|
----------
|
|
|
|
The implementation follows pp. 79-80 of:
|
|
.. [1] Finding all the elementary circuits of a directed graph.
|
|
D.B. Johnson
|
|
SIAM Journal on Computing 4, no. 1: 77-84.
|
|
http://dx.doi.org/10.1137/0204007
|
|
|
|
Jon Olav Vik, 2010-08-09
|
|
"""
|
|
path = [] # stack of nodes (vertices) in current path
|
|
blocked = defaultdict(bool) # vertex: blocked from search?
|
|
B = defaultdict(list) # graph portions that yield no elementary circuit
|
|
result = [] # list to accumulate the circuits found
|
|
|
|
def unblock(thisnode):
|
|
"""Recursively unblock and remove nodes from B[thisnode]."""
|
|
if blocked[thisnode]:
|
|
blocked[thisnode] = False
|
|
while B[thisnode]:
|
|
unblock(B[thisnode].pop())
|
|
|
|
def circuit(thisnode, startnode, component):
|
|
closed = False # set to True if elementary path is closed
|
|
path.append(thisnode)
|
|
blocked[thisnode] = True
|
|
for nextnode in component[thisnode]: # direct successors of thisnode
|
|
if nextnode == startnode:
|
|
result.append(path + [startnode])
|
|
closed = True
|
|
elif not blocked[nextnode]:
|
|
if circuit(nextnode, startnode, component):
|
|
closed = True
|
|
if closed:
|
|
unblock(thisnode)
|
|
else:
|
|
for nextnode in component[thisnode]:
|
|
if thisnode not in B[nextnode]: # TODO: use set for speedup?
|
|
B[nextnode].append(thisnode)
|
|
path.pop() # remove thisnode from path
|
|
return closed
|
|
|
|
# Johnson's algorithm requires some ordering of the vertices
|
|
for s in sorted(G):
|
|
# Find the strong component K with least vertex (i.e. node)
|
|
# in the subgraph induced by s and its following nodes.
|
|
subgraph = G.subgraph(node for node in G if node >= s)
|
|
strongcomp = nx.strongly_connected_components(subgraph)
|
|
component = G.subgraph(min(strongcomp, key=min))
|
|
if component:
|
|
startnode = min(component.nodes())
|
|
for node in component:
|
|
blocked[node] = False
|
|
B[node][:] = []
|
|
circuit(startnode, startnode, component)
|
|
else:
|
|
break
|
|
|
|
return result
|
|
|
|
|
|
if not nx.is_directed_acyclic_graph(G):
|
|
# get rid of loops - somehow
|
|
print "Not a DAG - removing cycles..."
|
|
cycleList = circuits(G)
|
|
print cycleList
|
|
|
|
# go through each cycle, remove the edge with the longest distance from the root
|
|
for cycle in cycleList:
|
|
maxDist = -1
|
|
maxNodeIndex = -1
|
|
for node in range(len(cycle)-1):
|
|
path = nx.shortest_path(G, cycle[node], root) # remember, edges are backwards
|
|
dist = len(path)
|
|
if dist > maxDist:
|
|
maxDist = dist
|
|
maxNodeIndex = node
|
|
print " Removed", cycle[maxNodeIndex], "->", cycle[maxNodeIndex+1]
|
|
G.remove_edge(cycle[maxNodeIndex], cycle[maxNodeIndex+1])
|
|
|
|
|
|
# if necessary, trim out all nodes except the ones forming a path from the source to the findPathsTo node
|
|
if options.findPathsTo:
|
|
done = False
|
|
while not done:
|
|
nodesToCull = [node for (node, deg) in G.in_degree_iter() if deg == 0 and options.findPathsTo not in node]
|
|
print "Removing %d nodes" % (len(nodesToCull))
|
|
if len(nodesToCull) > 0:
|
|
G.remove_nodes_from(nodesToCull)
|
|
else:
|
|
done = True
|
|
|
|
# include cost heuristic
|
|
# the cost of a node is 1 plus the cost of all the incoming edges
|
|
# the cost of an edge is the cost of a node, divided by the number of outgoing edges from that node
|
|
|
|
# i.e. start with all leaf nodes at '1'
|
|
# divide the score on each of those nodes by the number of outgoing edges
|
|
# propogate that reduced score to each of the parent nodes
|
|
# add '1' to each of the parent nodes and repeat
|
|
|
|
|
|
ordered_nodes = nx.topological_sort(G)
|
|
for n in ordered_nodes:
|
|
attrDict = G.node[n]
|
|
score = nodeScore
|
|
if scoreBySize:
|
|
if attrDict.has_key('filesize'):
|
|
score = G.node[n]['filesize']
|
|
else:
|
|
score = unknownSizeScore
|
|
G.node[n]['score'] = score
|
|
|
|
for n in ordered_nodes:
|
|
score = G.node[n]['score']
|
|
outDeg = G.out_degree(n)
|
|
if outDeg == 0:
|
|
outDeg = 1
|
|
parentScore = branchMult * (score / math.pow(outDeg, branchPower)) + edgeScore
|
|
for (nodeFrom, nodeTo) in G.out_edges_iter(n):
|
|
G[nodeFrom][nodeTo]['score'] = parentScore
|
|
G.node[nodeTo]['score'] = parentScore + G.node[nodeTo]['score']
|
|
|
|
# now sort by score
|
|
edges = G.edges(data=True)
|
|
edges.sort(key=lambda x: -x[2]['score'])
|
|
|
|
topEdge = edges[0]
|
|
topScore = G[topEdge[0]][topEdge[1]]['score']
|
|
|
|
print "Top 100 Dependency Costs:"
|
|
for e in edges[:100]:
|
|
print e[1], "->", e[0], ":", G[e[0]][e[1]]['score']
|
|
|
|
|
|
print ""
|
|
print "%d includes\n %d rage files\n %d std files" % (G.number_of_edges(), len(visitedFiles), len(stdIncludes))
|
|
|
|
outfile.write('''
|
|
digraph "source tree" {
|
|
overlap=scale;
|
|
size="8,10";
|
|
ratio="fill";
|
|
fontsize="16";
|
|
fontname="Helvetica";
|
|
clusterrank="local";
|
|
''')
|
|
|
|
filesPerModule = {}
|
|
|
|
if options.modules:
|
|
for (node,data) in G.nodes_iter(data=True):
|
|
modName = None
|
|
if node[0] == '<':
|
|
modName = "standard_lib"
|
|
else:
|
|
nodeComps = node.split("/")
|
|
if len(nodeComps) > 1:
|
|
modName = nodeComps[0]
|
|
|
|
if modName:
|
|
if filesPerModule.has_key(modName):
|
|
filesPerModule[modName].append(node)
|
|
else:
|
|
filesPerModule[modName] = [node]
|
|
|
|
for (module, files) in filesPerModule.iteritems():
|
|
outfile.write('''
|
|
subgraph cluster%s {
|
|
''' % (module))
|
|
for i in files:
|
|
outfile.write('\t\t"%s"\n' % i)
|
|
outfile.write('''
|
|
}
|
|
''')
|
|
|
|
|
|
for (node, neighbor, data) in G.edges_iter(data=True):
|
|
scoreProp = data['score'] / topScore
|
|
outfile.write('''"%s" -> "%s" [label="%f" color="0.0 1.0 %f" penwidth=%f]\n''' % (neighbor, node, data['score'], scoreProp, 1.0 + 40.0 * scoreProp))
|
|
|
|
outfile.write('''}
|
|
''')
|