#### Portions of this code inspired by / based on #### cinclude2dot.pl by Darxus\@ChaosReigns.com, francis\@flourish.org #### GPLed code available from http://www.flourish.org/cinclude2dot/ import os, sys, optparse, re, math, glob import networkx as nx import sqlite3 as sql from stat import * G = nx.DiGraph() opt = optparse.OptionParser(usage="Usage: %prog [options] inputfile.cpp outfile.dot") opt.add_option("-I", dest="includePaths", action="append") opt.add_option("--modules", dest="modules", action="store_true") opt.add_option("--systemincludes", dest="systemIncludes", action="store_true") opt.add_option("--nodeScore", dest="nodeScore", action="store", default=1.0) opt.add_option("--edgeScore", dest="edgeScore", action="store", default=0.0) opt.add_option("--branchMult", dest="branchMult", action="store", default=1.0) opt.add_option("--branchPower", dest="branchPower", action="store", default=1.0) opt.add_option("--scoreBySize", dest="scoreBySize", action="store_true", default=False) opt.add_option("--unknownSizeScore", dest="unknownSizeScore", action="store", default=10.0) opt.add_option("--updatedb", dest="updateDb", action="store_true", default=False) opt.add_option("--nocpp", dest="noCpp", action="store_true", default=False) opt.add_option("--findpathsto", dest="findPathsTo", action="store", default=None) (options, outargs) = opt.parse_args(sys.argv[1:]) sqlcur = None if options.updateDb: sqlconn = sql.connect(r"C:\cinclude.sql") sqlcur = sqlconn.cursor() # try to create the tables we need sqlcur.execute('CREATE TABLE edge_costs (root_file text, from_edge text, to_edge text, cost real)') sqlcur.execute('CREATE TABLE file_info (file_name text, total_edges integer, rage_includes integer, std_includes integer)') nodeScore = float(options.nodeScore) edgeScore = float(options.edgeScore) branchMult = float(options.branchMult) branchPower = float(options.branchPower) scoreBySize = options.scoreBySize unknownSizeScore = float(options.unknownSizeScore) noCpp = options.noCpp outfile = file(outargs[1], "w") def add_edge(nodeFrom, nodeTo): nfl = nodeFrom nft = nodeTo if nfl == nft: return G.add_node(nfl, score=0) G.add_node(nft, score=0) G.add_edge(nft, nfl) # backwards link, for topo sort to work right def include_search(inc, filename): dirname = os.path.dirname(filename) testFile = os.path.normpath(os.path.join(dirname, inc)) if os.path.exists(testFile): return testFile # try user-specified include paths if options.includePaths: for dirname in options.includePaths: testFile = os.path.normpath(os.path.join(dirname, inc)) if os.path.exists(testFile): return testFile return None def file_display(filename, issystem): if issystem: return "<" + os.path.basename(filename) + ">" return os.path.basename(os.path.dirname(filename)) + "/" + os.path.basename(filename) visitedFiles = {} stdIncludes = {} unvisitedFiles = [] fileList = [(os.path.abspath(x), False) for x in glob.glob(outargs[0])] unvisitedFiles += fileList if len(fileList) > 1: root = "" for (f, issystem) in fileList: add_edge(root, file_display(f, issystem)) else: root = file_display(fileList[0][0], False) while len(unvisitedFiles) > 0: files = unvisitedFiles[:] unvisitedFiles = [] for (f,issystem) in files: visitedFiles[f] = 0 try: stats = os.stat(f) G.add_node(file_display(f, issystem), filesize=float(stats[ST_SIZE]) / 1024.0) cFile = file(f) except: continue for line in cFile: m = re.match(r'^\s*#\s*include\s+<(\S+)>', line) if m: add_edge(file_display(f, issystem), "<" + m.group(1) + ">") if options.systemIncludes: includefile = include_search(m.group(1), f) if includefile and not visitedFiles.has_key(includefile): unvisitedFiles.append((includefile, True)) stdIncludes[f] = 0 m = re.match(r'^\s*#\s*include\s+"(\S+)"', line) if m: includefile = include_search(m.group(1), f) if includefile: if noCpp and includefile.endswith(".cpp"): continue targetName = file_display(includefile, issystem) moduleName = targetName.split('/')[0] add_edge(file_display(f, issystem), targetName) if not visitedFiles.has_key(includefile): unvisitedFiles.append((includefile, issystem)) else: print "couldn't open include file", m.group(1), "included from", f add_edge(file_display(f, issystem), m.group(1)) print "Files:" for i in visitedFiles.iterkeys(): print i print "" from collections import defaultdict # new in Python 2.5 def circuits(G): """Find elementary circuits of a directed graph. An elementary circuit is a closed path where no node appears twice, except that the first and last node are the same. Two elementary circuits are distinct if they are not cyclic permutations of each other. Parameters ---------- G : NetworkX Graph A directed graph. Returns ------- A list of circuits, where each circuit is a list of nodes, with the first and last node being the same. Example: >>> G = nx.DiGraph([(0, 0), (0, 1), (0, 2), (1, 2), (2, 0), (2, 1), (2, 2)]) >>> circuits(G) [[0, 0], [0, 1, 2, 0], [0, 2, 0], [1, 2, 1], [2, 2]] See Also -------- cycles (for undirected graphs) References ---------- The implementation follows pp. 79-80 of: .. [1] Finding all the elementary circuits of a directed graph. D.B. Johnson SIAM Journal on Computing 4, no. 1: 77-84. http://dx.doi.org/10.1137/0204007 Jon Olav Vik, 2010-08-09 """ path = [] # stack of nodes (vertices) in current path blocked = defaultdict(bool) # vertex: blocked from search? B = defaultdict(list) # graph portions that yield no elementary circuit result = [] # list to accumulate the circuits found def unblock(thisnode): """Recursively unblock and remove nodes from B[thisnode].""" if blocked[thisnode]: blocked[thisnode] = False while B[thisnode]: unblock(B[thisnode].pop()) def circuit(thisnode, startnode, component): closed = False # set to True if elementary path is closed path.append(thisnode) blocked[thisnode] = True for nextnode in component[thisnode]: # direct successors of thisnode if nextnode == startnode: result.append(path + [startnode]) closed = True elif not blocked[nextnode]: if circuit(nextnode, startnode, component): closed = True if closed: unblock(thisnode) else: for nextnode in component[thisnode]: if thisnode not in B[nextnode]: # TODO: use set for speedup? B[nextnode].append(thisnode) path.pop() # remove thisnode from path return closed # Johnson's algorithm requires some ordering of the vertices for s in sorted(G): # Find the strong component K with least vertex (i.e. node) # in the subgraph induced by s and its following nodes. subgraph = G.subgraph(node for node in G if node >= s) strongcomp = nx.strongly_connected_components(subgraph) component = G.subgraph(min(strongcomp, key=min)) if component: startnode = min(component.nodes()) for node in component: blocked[node] = False B[node][:] = [] circuit(startnode, startnode, component) else: break return result if not nx.is_directed_acyclic_graph(G): # get rid of loops - somehow print "Not a DAG - removing cycles..." cycleList = circuits(G) print cycleList # go through each cycle, remove the edge with the longest distance from the root for cycle in cycleList: maxDist = -1 maxNodeIndex = -1 for node in range(len(cycle)-1): path = nx.shortest_path(G, cycle[node], root) # remember, edges are backwards dist = len(path) if dist > maxDist: maxDist = dist maxNodeIndex = node print " Removed", cycle[maxNodeIndex], "->", cycle[maxNodeIndex+1] G.remove_edge(cycle[maxNodeIndex], cycle[maxNodeIndex+1]) # if necessary, trim out all nodes except the ones forming a path from the source to the findPathsTo node if options.findPathsTo: done = False while not done: nodesToCull = [node for (node, deg) in G.in_degree_iter() if deg == 0 and options.findPathsTo not in node] print "Removing %d nodes" % (len(nodesToCull)) if len(nodesToCull) > 0: G.remove_nodes_from(nodesToCull) else: done = True # include cost heuristic # the cost of a node is 1 plus the cost of all the incoming edges # the cost of an edge is the cost of a node, divided by the number of outgoing edges from that node # i.e. start with all leaf nodes at '1' # divide the score on each of those nodes by the number of outgoing edges # propogate that reduced score to each of the parent nodes # add '1' to each of the parent nodes and repeat ordered_nodes = nx.topological_sort(G) for n in ordered_nodes: attrDict = G.node[n] score = nodeScore if scoreBySize: if attrDict.has_key('filesize'): score = G.node[n]['filesize'] else: score = unknownSizeScore G.node[n]['score'] = score for n in ordered_nodes: score = G.node[n]['score'] outDeg = G.out_degree(n) if outDeg == 0: outDeg = 1 parentScore = branchMult * (score / math.pow(outDeg, branchPower)) + edgeScore for (nodeFrom, nodeTo) in G.out_edges_iter(n): G[nodeFrom][nodeTo]['score'] = parentScore G.node[nodeTo]['score'] = parentScore + G.node[nodeTo]['score'] # now sort by score edges = G.edges(data=True) edges.sort(key=lambda x: -x[2]['score']) topEdge = edges[0] topScore = G[topEdge[0]][topEdge[1]]['score'] print "Top 100 Dependency Costs:" for e in edges[:100]: print e[1], "->", e[0], ":", G[e[0]][e[1]]['score'] print "" print "%d includes\n %d rage files\n %d std files" % (G.number_of_edges(), len(visitedFiles), len(stdIncludes)) outfile.write(''' digraph "source tree" { overlap=scale; size="8,10"; ratio="fill"; fontsize="16"; fontname="Helvetica"; clusterrank="local"; ''') filesPerModule = {} if options.modules: for (node,data) in G.nodes_iter(data=True): modName = None if node[0] == '<': modName = "standard_lib" else: nodeComps = node.split("/") if len(nodeComps) > 1: modName = nodeComps[0] if modName: if filesPerModule.has_key(modName): filesPerModule[modName].append(node) else: filesPerModule[modName] = [node] for (module, files) in filesPerModule.iteritems(): outfile.write(''' subgraph cluster%s { ''' % (module)) for i in files: outfile.write('\t\t"%s"\n' % i) outfile.write(''' } ''') for (node, neighbor, data) in G.edges_iter(data=True): scoreProp = data['score'] / topScore outfile.write('''"%s" -> "%s" [label="%f" color="0.0 1.0 %f" penwidth=%f]\n''' % (neighbor, node, data['score'], scoreProp, 1.0 + 40.0 * scoreProp)) outfile.write('''} ''')