blob: 39ca66000a5ce458be4e11c64d390e65b997dcb3 [file] [log] [blame]
Jacob Biesingerc4083582013-07-08 13:55:59 -07001#!/usr/bin/env python
2"""
3Convert a graph to graphviz format and run `dot` on it.
4
Jake Biesinger4bb565e2013-07-10 14:12:24 -07005Kmer sequences are included
Jacob Biesingerc4083582013-07-08 13:55:59 -07006"""
7
8__author__ = "Jacob Biesinger"
9__copyright__ = "Copyright 2009-2013, The Regents of the University of California"
10__license__ = "Apache"
11
12
13import sys
14import os
Jacob Biesingerc4083582013-07-08 13:55:59 -070015import re
16import string
Jake Biesinger4bb565e2013-07-10 14:12:24 -070017import argparse
Jacob Biesingerc4083582013-07-08 13:55:59 -070018
19import pydot
20
21
22element_re = re.compile(r"\d+,\d+|\w+")
23#edge_colors = dict(FF='black', FR='red', RF='blue', RR='gray')
24edge_colors = dict(FF='#DD1E2F', FR='#EBB035', RF='#06A2CB', RR='#218559')
25
Jake Biesinger4bb565e2013-07-10 14:12:24 -070026
Jacob Biesingerc4083582013-07-08 13:55:59 -070027def reverse_complement(kmer, _table=string.maketrans('ACGT', 'TGCA')):
28 return string.translate(kmer, _table)[::-1]
29
Jake Biesinger4bb565e2013-07-10 14:12:24 -070030
Jacob Biesingerc4083582013-07-08 13:55:59 -070031def add_legend(graph):
32 legend = pydot.Subgraph('cluster_legend', splines='line', rankdir='LR', label='legend', rank='min')
33 for i, (edgetype, edgecolor) in enumerate(sorted(edge_colors.items())):
34 legend.add_node(pydot.Node('legend_0_' + str(i), label='', shape='point'))
35 legend.add_node(pydot.Node('legend_1_' + str(i), label='', shape='point'))
36 legend.add_edge(pydot.Edge('legend_0_' + str(i), 'legend_1_' + str(i), label=edgetype, color=edgecolor))
37 graph.add_subgraph(legend)
38 return graph
39
Jake Biesinger4bb565e2013-07-10 14:12:24 -070040
Jacob Biesingerc4083582013-07-08 13:55:59 -070041def graph_from_file(filename, legend=True, kmers=True, flag=True):
42 graph_name = os.path.split(filename)[1].replace('.', '_')
43 graph = pydot.Dot(graph_name, graph_type='digraph', rankdir='LR', splines='ortho', weight='2')
44 if legend:
45 add_legend(graph)
46
47 # annoyingly, order matters. add nodes before any edges or else properties aren't set right
48 nodes = {}
49 edges = []
50 for line in open(filename):
51 nodeid, ff, fr, rf, rr, kmer, flag = map(element_re.findall, line.strip().split('\t'))
52 nodeid, kmer, flag = nodeid[0], kmer[0], flag[0]
53 readid = nodeid.split(',')[0]
54 flag = '--%s' % flag if flag else ''
55 FF_kmer = '<TR><TD BGCOLOR="%s">%s</TD></TR>' % (edge_colors['FF'], kmer) if kmers else ''
56 RR_kmer = '<TR><TD BGCOLOR="%s">%s</TD></TR>' % (edge_colors['RR'], reverse_complement(kmer)) if kmers else ''
57 node_label = r'''<<FONT POINT-SIZE="10"><TABLE ALIGN="CENTER" BORDER="0" CELLBORDER="0" CELLSPACING="0">
Jacob Biesingerc4083582013-07-08 13:55:59 -070058 <TR><TD>{nodeid}{flag}</TD></TR>
Jake Biesinger67e67aa2013-07-10 11:45:39 -070059 {FF_kmer}
Jacob Biesingerc4083582013-07-08 13:55:59 -070060 {RR_kmer}
61 </TABLE></FONT>>'''.format(**locals())
62 node = pydot.Node(nodeid, rank=readid, group=readid, label=node_label)
63 nodes.setdefault(readid, []).append(node)
64 for edgename, edgelist in [('FF', ff), ('FR', fr), ('RF', rf), ('RR', rr)]:
65 for e in edgelist:
66 edges.append(pydot.Edge(nodeid, e, color=edge_colors[edgename]))
Jake Biesinger4bb565e2013-07-10 14:12:24 -070067
Jacob Biesingerc4083582013-07-08 13:55:59 -070068 for readid, subnodes in nodes.items():
69 subg = pydot.Subgraph('cluster_' + readid, fillcolor='lightgray')
70 for node in subnodes:
71 subg.add_node(node)
72 graph.add_subgraph(subg)
Jake Biesinger4bb565e2013-07-10 14:12:24 -070073
Jacob Biesingerc4083582013-07-08 13:55:59 -070074 for e in edges:
75 graph.add_edge(e)
Jake Biesinger4bb565e2013-07-10 14:12:24 -070076
Jacob Biesingerc4083582013-07-08 13:55:59 -070077 return graph
78
Jake Biesinger4bb565e2013-07-10 14:12:24 -070079def recursive_plot(topdir, suffix='.txt', **kwargs):
Jacob Biesingerc4083582013-07-08 13:55:59 -070080 "Recursively plot any files matching `suffix`"
Jake Biesinger48c0cec2013-07-12 13:28:57 -070081 out_type = kwargs.get('out_type', 'svg')
Jacob Biesingerc4083582013-07-08 13:55:59 -070082 for root, dirnames, filenames in os.walk(topdir):
Jake Biesinger4bb565e2013-07-10 14:12:24 -070083 for filename in filenames:
84 f = os.path.join(root, filename)
85 if not os.path.isfile(f) or not f.endswith(suffix):
86 continue
Jacob Biesingerc4083582013-07-08 13:55:59 -070087 try:
Jake Biesinger4bb565e2013-07-10 14:12:24 -070088 graph = graph_from_file(f, **kwargs)
Jacob Biesingerc4083582013-07-08 13:55:59 -070089 except Exception:
Jake Biesinger4bb565e2013-07-10 14:12:24 -070090 pass
Jacob Biesingerc4083582013-07-08 13:55:59 -070091 else:
Jake Biesinger48c0cec2013-07-12 13:28:57 -070092 print 'plotting', f +'.' + out_type
93 graph.write(f + '.' + out_type, format=out_type)
Jake Biesinger4bb565e2013-07-10 14:12:24 -070094
95
96def get_parser():
97 parser = argparse.ArgumentParser()
98 parser.add_argument('--no-legend', action='store_true')
99 parser.add_argument('--no-kmers', action='store_true')
100 parser.add_argument('--no-flag', action='store_true')
101
102 parser.add_argument('txt_graphs', nargs='*')
103 parser.add_argument('--directory', '-d', help='Recurse here and plot all '
Jake Biesingerac4a3fb2013-07-11 18:29:09 -0700104 'graphs that are found.', action='append', default=[])
Jake Biesinger48c0cec2013-07-12 13:28:57 -0700105 parser.add_argument('--out-type', type=str, default='svg')
Jake Biesinger4bb565e2013-07-10 14:12:24 -0700106 return parser
107
Jacob Biesingerc4083582013-07-08 13:55:59 -0700108
109def main(args):
Jake Biesinger4bb565e2013-07-10 14:12:24 -0700110 parser = get_parser()
111 args = parser.parse_args(args)
112 kwargs = dict(legend=not args.no_legend, kmers=not args.no_kmers,
113 flag=not args.no_flag)
114 for filename in args.txt_graphs:
115 graph = graph_from_file(filename, **kwargs)
Jake Biesinger48c0cec2013-07-12 13:28:57 -0700116 print 'plotting', filename + args.out_type
117 graph.write(filename + args.out_type, format=args.out_type)
Jake Biesinger4bb565e2013-07-10 14:12:24 -0700118
119 for d in args.directory:
120 recursive_plot(d, **kwargs)
Jacob Biesingerc4083582013-07-08 13:55:59 -0700121
122if __name__ == '__main__':
123 main(sys.argv[1:])