blob: fe9b7780c390f83255a10f94b7b0b05daa1ddd51 [file] [log] [blame]
Jacob Biesingerc4083582013-07-08 13:55:59 -07001#!/usr/bin/env python
2"""
3Convert a graph to graphviz format and run `dot` on it.
4
Jake Biesinger4bb565e2013-07-10 14:12:24 -07005Kmer sequences are included
Jacob Biesingerc4083582013-07-08 13:55:59 -07006"""
7
8__author__ = "Jacob Biesinger"
9__copyright__ = "Copyright 2009-2013, The Regents of the University of California"
10__license__ = "Apache"
11
12
13import sys
14import os
Jacob Biesingerc4083582013-07-08 13:55:59 -070015import re
16import string
Jake Biesinger4bb565e2013-07-10 14:12:24 -070017import argparse
Jacob Biesingerc4083582013-07-08 13:55:59 -070018
19import pydot
20
21
22element_re = re.compile(r"\d+,\d+|\w+")
23#edge_colors = dict(FF='black', FR='red', RF='blue', RR='gray')
24edge_colors = dict(FF='#DD1E2F', FR='#EBB035', RF='#06A2CB', RR='#218559')
25
Jake Biesinger4bb565e2013-07-10 14:12:24 -070026
Jacob Biesingerc4083582013-07-08 13:55:59 -070027def reverse_complement(kmer, _table=string.maketrans('ACGT', 'TGCA')):
28 return string.translate(kmer, _table)[::-1]
29
Jake Biesinger4bb565e2013-07-10 14:12:24 -070030
Jacob Biesingerc4083582013-07-08 13:55:59 -070031def add_legend(graph):
32 legend = pydot.Subgraph('cluster_legend', splines='line', rankdir='LR', label='legend', rank='min')
33 for i, (edgetype, edgecolor) in enumerate(sorted(edge_colors.items())):
34 legend.add_node(pydot.Node('legend_0_' + str(i), label='', shape='point'))
35 legend.add_node(pydot.Node('legend_1_' + str(i), label='', shape='point'))
36 legend.add_edge(pydot.Edge('legend_0_' + str(i), 'legend_1_' + str(i), label=edgetype, color=edgecolor))
37 graph.add_subgraph(legend)
38 return graph
39
Jake Biesinger4bb565e2013-07-10 14:12:24 -070040
Jacob Biesingerc4083582013-07-08 13:55:59 -070041def graph_from_file(filename, legend=True, kmers=True, flag=True):
42 graph_name = os.path.split(filename)[1].replace('.', '_')
43 graph = pydot.Dot(graph_name, graph_type='digraph', rankdir='LR', splines='ortho', weight='2')
44 if legend:
45 add_legend(graph)
46
47 # annoyingly, order matters. add nodes before any edges or else properties aren't set right
48 nodes = {}
49 edges = []
50 for line in open(filename):
51 nodeid, ff, fr, rf, rr, kmer, flag = map(element_re.findall, line.strip().split('\t'))
52 nodeid, kmer, flag = nodeid[0], kmer[0], flag[0]
53 readid = nodeid.split(',')[0]
54 flag = '--%s' % flag if flag else ''
55 FF_kmer = '<TR><TD BGCOLOR="%s">%s</TD></TR>' % (edge_colors['FF'], kmer) if kmers else ''
56 RR_kmer = '<TR><TD BGCOLOR="%s">%s</TD></TR>' % (edge_colors['RR'], reverse_complement(kmer)) if kmers else ''
57 node_label = r'''<<FONT POINT-SIZE="10"><TABLE ALIGN="CENTER" BORDER="0" CELLBORDER="0" CELLSPACING="0">
Jacob Biesingerc4083582013-07-08 13:55:59 -070058 <TR><TD>{nodeid}{flag}</TD></TR>
Jake Biesinger67e67aa2013-07-10 11:45:39 -070059 {FF_kmer}
Jacob Biesingerc4083582013-07-08 13:55:59 -070060 {RR_kmer}
61 </TABLE></FONT>>'''.format(**locals())
62 node = pydot.Node(nodeid, rank=readid, group=readid, label=node_label)
63 nodes.setdefault(readid, []).append(node)
64 for edgename, edgelist in [('FF', ff), ('FR', fr), ('RF', rf), ('RR', rr)]:
65 for e in edgelist:
66 edges.append(pydot.Edge(nodeid, e, color=edge_colors[edgename]))
Jake Biesinger4bb565e2013-07-10 14:12:24 -070067
Jacob Biesingerc4083582013-07-08 13:55:59 -070068 for readid, subnodes in nodes.items():
69 subg = pydot.Subgraph('cluster_' + readid, fillcolor='lightgray')
70 for node in subnodes:
71 subg.add_node(node)
72 graph.add_subgraph(subg)
Jake Biesinger4bb565e2013-07-10 14:12:24 -070073
Jacob Biesingerc4083582013-07-08 13:55:59 -070074 for e in edges:
75 graph.add_edge(e)
Jake Biesinger4bb565e2013-07-10 14:12:24 -070076
Jacob Biesingerc4083582013-07-08 13:55:59 -070077 return graph
78
Jake Biesinger4bb565e2013-07-10 14:12:24 -070079def recursive_plot(topdir, suffix='.txt', **kwargs):
Jacob Biesingerc4083582013-07-08 13:55:59 -070080 "Recursively plot any files matching `suffix`"
Jacob Biesingerc4083582013-07-08 13:55:59 -070081 for root, dirnames, filenames in os.walk(topdir):
Jake Biesinger4bb565e2013-07-10 14:12:24 -070082 for filename in filenames:
83 f = os.path.join(root, filename)
84 if not os.path.isfile(f) or not f.endswith(suffix):
85 continue
Jacob Biesingerc4083582013-07-08 13:55:59 -070086 try:
Jake Biesinger4bb565e2013-07-10 14:12:24 -070087 graph = graph_from_file(f, **kwargs)
Jacob Biesingerc4083582013-07-08 13:55:59 -070088 except Exception:
Jake Biesinger4bb565e2013-07-10 14:12:24 -070089 pass
Jacob Biesingerc4083582013-07-08 13:55:59 -070090 else:
Jake Biesinger4bb565e2013-07-10 14:12:24 -070091 print 'plotting', f
Jacob Biesingerc4083582013-07-08 13:55:59 -070092 graph.write_png(f + '.png')
Jake Biesinger4bb565e2013-07-10 14:12:24 -070093
94
95def get_parser():
96 parser = argparse.ArgumentParser()
97 parser.add_argument('--no-legend', action='store_true')
98 parser.add_argument('--no-kmers', action='store_true')
99 parser.add_argument('--no-flag', action='store_true')
100
101 parser.add_argument('txt_graphs', nargs='*')
102 parser.add_argument('--directory', '-d', help='Recurse here and plot all '
103 'graphs that are found.', nargs='+', default=[])
104 return parser
105
Jacob Biesingerc4083582013-07-08 13:55:59 -0700106
107def main(args):
Jake Biesinger4bb565e2013-07-10 14:12:24 -0700108 parser = get_parser()
109 args = parser.parse_args(args)
110 kwargs = dict(legend=not args.no_legend, kmers=not args.no_kmers,
111 flag=not args.no_flag)
112 for filename in args.txt_graphs:
113 graph = graph_from_file(filename, **kwargs)
114 print 'plotting', filename
115 graph.write_png(filename + '.png')
116
117 for d in args.directory:
118 recursive_plot(d, **kwargs)
Jacob Biesingerc4083582013-07-08 13:55:59 -0700119
120if __name__ == '__main__':
121 main(sys.argv[1:])