Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | """ |
| 3 | Convert a graph to graphviz format and run `dot` on it. |
| 4 | |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 5 | Kmer sequences are included |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 6 | """ |
| 7 | |
| 8 | __author__ = "Jacob Biesinger" |
| 9 | __copyright__ = "Copyright 2009-2013, The Regents of the University of California" |
| 10 | __license__ = "Apache" |
| 11 | |
| 12 | |
| 13 | import sys |
| 14 | import os |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 15 | import re |
| 16 | import string |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 17 | import argparse |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 18 | |
| 19 | import pydot |
| 20 | |
| 21 | |
| 22 | element_re = re.compile(r"\d+,\d+|\w+") |
| 23 | #edge_colors = dict(FF='black', FR='red', RF='blue', RR='gray') |
| 24 | edge_colors = dict(FF='#DD1E2F', FR='#EBB035', RF='#06A2CB', RR='#218559') |
| 25 | |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 26 | |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 27 | def reverse_complement(kmer, _table=string.maketrans('ACGT', 'TGCA')): |
| 28 | return string.translate(kmer, _table)[::-1] |
| 29 | |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 30 | |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 31 | def add_legend(graph): |
| 32 | legend = pydot.Subgraph('cluster_legend', splines='line', rankdir='LR', label='legend', rank='min') |
| 33 | for i, (edgetype, edgecolor) in enumerate(sorted(edge_colors.items())): |
| 34 | legend.add_node(pydot.Node('legend_0_' + str(i), label='', shape='point')) |
| 35 | legend.add_node(pydot.Node('legend_1_' + str(i), label='', shape='point')) |
| 36 | legend.add_edge(pydot.Edge('legend_0_' + str(i), 'legend_1_' + str(i), label=edgetype, color=edgecolor)) |
| 37 | graph.add_subgraph(legend) |
| 38 | return graph |
| 39 | |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 40 | |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 41 | def graph_from_file(filename, legend=True, kmers=True, flag=True): |
| 42 | graph_name = os.path.split(filename)[1].replace('.', '_') |
| 43 | graph = pydot.Dot(graph_name, graph_type='digraph', rankdir='LR', splines='ortho', weight='2') |
| 44 | if legend: |
| 45 | add_legend(graph) |
| 46 | |
| 47 | # annoyingly, order matters. add nodes before any edges or else properties aren't set right |
| 48 | nodes = {} |
| 49 | edges = [] |
| 50 | for line in open(filename): |
| 51 | nodeid, ff, fr, rf, rr, kmer, flag = map(element_re.findall, line.strip().split('\t')) |
| 52 | nodeid, kmer, flag = nodeid[0], kmer[0], flag[0] |
| 53 | readid = nodeid.split(',')[0] |
| 54 | flag = '--%s' % flag if flag else '' |
| 55 | FF_kmer = '<TR><TD BGCOLOR="%s">%s</TD></TR>' % (edge_colors['FF'], kmer) if kmers else '' |
| 56 | RR_kmer = '<TR><TD BGCOLOR="%s">%s</TD></TR>' % (edge_colors['RR'], reverse_complement(kmer)) if kmers else '' |
| 57 | node_label = r'''<<FONT POINT-SIZE="10"><TABLE ALIGN="CENTER" BORDER="0" CELLBORDER="0" CELLSPACING="0"> |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 58 | <TR><TD>{nodeid}{flag}</TD></TR> |
Jake Biesinger | 67e67aa | 2013-07-10 11:45:39 -0700 | [diff] [blame] | 59 | {FF_kmer} |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 60 | {RR_kmer} |
| 61 | </TABLE></FONT>>'''.format(**locals()) |
| 62 | node = pydot.Node(nodeid, rank=readid, group=readid, label=node_label) |
| 63 | nodes.setdefault(readid, []).append(node) |
| 64 | for edgename, edgelist in [('FF', ff), ('FR', fr), ('RF', rf), ('RR', rr)]: |
| 65 | for e in edgelist: |
| 66 | edges.append(pydot.Edge(nodeid, e, color=edge_colors[edgename])) |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 67 | |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 68 | for readid, subnodes in nodes.items(): |
| 69 | subg = pydot.Subgraph('cluster_' + readid, fillcolor='lightgray') |
| 70 | for node in subnodes: |
| 71 | subg.add_node(node) |
| 72 | graph.add_subgraph(subg) |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 73 | |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 74 | for e in edges: |
| 75 | graph.add_edge(e) |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 76 | |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 77 | return graph |
| 78 | |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 79 | def recursive_plot(topdir, suffix='.txt', **kwargs): |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 80 | "Recursively plot any files matching `suffix`" |
Jake Biesinger | 48c0cec | 2013-07-12 13:28:57 -0700 | [diff] [blame] | 81 | out_type = kwargs.get('out_type', 'svg') |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 82 | for root, dirnames, filenames in os.walk(topdir): |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 83 | for filename in filenames: |
| 84 | f = os.path.join(root, filename) |
| 85 | if not os.path.isfile(f) or not f.endswith(suffix): |
| 86 | continue |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 87 | try: |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 88 | graph = graph_from_file(f, **kwargs) |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 89 | except Exception: |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 90 | pass |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 91 | else: |
Jake Biesinger | 48c0cec | 2013-07-12 13:28:57 -0700 | [diff] [blame] | 92 | print 'plotting', f +'.' + out_type |
| 93 | graph.write(f + '.' + out_type, format=out_type) |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 94 | |
| 95 | |
| 96 | def get_parser(): |
| 97 | parser = argparse.ArgumentParser() |
| 98 | parser.add_argument('--no-legend', action='store_true') |
| 99 | parser.add_argument('--no-kmers', action='store_true') |
| 100 | parser.add_argument('--no-flag', action='store_true') |
| 101 | |
| 102 | parser.add_argument('txt_graphs', nargs='*') |
| 103 | parser.add_argument('--directory', '-d', help='Recurse here and plot all ' |
Jake Biesinger | ac4a3fb | 2013-07-11 18:29:09 -0700 | [diff] [blame] | 104 | 'graphs that are found.', action='append', default=[]) |
Jake Biesinger | 48c0cec | 2013-07-12 13:28:57 -0700 | [diff] [blame] | 105 | parser.add_argument('--out-type', type=str, default='svg') |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 106 | return parser |
| 107 | |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 108 | |
| 109 | def main(args): |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 110 | parser = get_parser() |
| 111 | args = parser.parse_args(args) |
| 112 | kwargs = dict(legend=not args.no_legend, kmers=not args.no_kmers, |
| 113 | flag=not args.no_flag) |
| 114 | for filename in args.txt_graphs: |
| 115 | graph = graph_from_file(filename, **kwargs) |
Jake Biesinger | 48c0cec | 2013-07-12 13:28:57 -0700 | [diff] [blame] | 116 | print 'plotting', filename + args.out_type |
| 117 | graph.write(filename + args.out_type, format=args.out_type) |
Jake Biesinger | 4bb565e | 2013-07-10 14:12:24 -0700 | [diff] [blame] | 118 | |
| 119 | for d in args.directory: |
| 120 | recursive_plot(d, **kwargs) |
Jacob Biesinger | c408358 | 2013-07-08 13:55:59 -0700 | [diff] [blame] | 121 | |
| 122 | if __name__ == '__main__': |
| 123 | main(sys.argv[1:]) |