Add python script for visualizing graphs
diff --git a/genomix/genomix-hadoop/src/test/python/convert_graphviz.py b/genomix/genomix-hadoop/src/test/python/convert_graphviz.py
new file mode 100644
index 0000000..abd38ce
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/python/convert_graphviz.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+"""
+Convert a graph to graphviz format and run `dot` on it.
+
+Kmer sequences are included 
+"""
+
+__author__ = "Jacob Biesinger"
+__copyright__ = "Copyright 2009-2013, The Regents of the University of California"
+__license__ = "Apache"
+
+
+import sys
+import os
+import glob
+import re
+import string
+
+import pydot
+
+
+element_re = re.compile(r"\d+,\d+|\w+")
+#edge_colors = dict(FF='black', FR='red', RF='blue', RR='gray')
+edge_colors = dict(FF='#DD1E2F', FR='#EBB035', RF='#06A2CB', RR='#218559')
+
+def reverse_complement(kmer, _table=string.maketrans('ACGT', 'TGCA')):
+    return string.translate(kmer, _table)[::-1]
+
+def add_legend(graph):
+    legend = pydot.Subgraph('cluster_legend', splines='line', rankdir='LR', label='legend', rank='min')
+    for i, (edgetype, edgecolor) in enumerate(sorted(edge_colors.items())):
+        legend.add_node(pydot.Node('legend_0_' + str(i), label='', shape='point'))
+        legend.add_node(pydot.Node('legend_1_' + str(i), label='', shape='point'))
+        legend.add_edge(pydot.Edge('legend_0_' + str(i), 'legend_1_' + str(i), label=edgetype, color=edgecolor))
+    graph.add_subgraph(legend)
+    return graph
+
+def graph_from_file(filename, legend=True, kmers=True, flag=True):
+    graph_name = os.path.split(filename)[1].replace('.', '_')
+    graph = pydot.Dot(graph_name, graph_type='digraph', rankdir='LR', splines='ortho', weight='2')
+    if legend:
+        add_legend(graph)
+
+    # annoyingly, order matters. add nodes before any edges or else properties aren't set right
+    nodes = {}
+    edges = []
+    for line in open(filename):
+        nodeid, ff, fr, rf, rr, kmer, flag = map(element_re.findall, line.strip().split('\t'))
+        nodeid, kmer, flag = nodeid[0], kmer[0], flag[0]
+        readid = nodeid.split(',')[0]
+        flag = '--%s' % flag if flag else ''
+        FF_kmer = '<TR><TD BGCOLOR="%s">%s</TD></TR>' % (edge_colors['FF'], kmer) if kmers else ''
+        RR_kmer = '<TR><TD BGCOLOR="%s">%s</TD></TR>' % (edge_colors['RR'], reverse_complement(kmer)) if kmers else ''
+        node_label = r'''<<FONT POINT-SIZE="10"><TABLE ALIGN="CENTER" BORDER="0" CELLBORDER="0" CELLSPACING="0">
+        {FF_kmer}
+        <TR><TD>{nodeid}{flag}</TD></TR>
+        {RR_kmer}
+        </TABLE></FONT>>'''.format(**locals())
+        node = pydot.Node(nodeid, rank=readid, group=readid, label=node_label)
+        nodes.setdefault(readid, []).append(node)
+        for edgename, edgelist in [('FF', ff), ('FR', fr), ('RF', rf), ('RR', rr)]:
+            for e in edgelist:
+                edges.append(pydot.Edge(nodeid, e, color=edge_colors[edgename]))
+    
+    for readid, subnodes in nodes.items():
+        subg = pydot.Subgraph('cluster_' + readid, fillcolor='lightgray')
+        for node in subnodes:
+            subg.add_node(node)
+        graph.add_subgraph(subg)
+    
+    for e in edges:
+        graph.add_edge(e)
+    
+    return graph
+
+def recursive_plot(topdir, suffix='.txt'):
+    "Recursively plot any files matching `suffix`"
+    def matches(f):
+        return os.path.isfile(f) and f.endswith(suffix)
+    
+    for root, dirnames, filenames in os.walk(topdir):
+        for filename in filter(matches, filenames):
+            try:
+                graph = graph_from_file(os.path.join(root, filename))
+            except Exception:
+                raise
+            else:
+                graph.write_png(f + '.png')
+    
+
+def main(args):
+    for f in args:
+        try:
+            graph = graph_from_file(f)
+        except Exception as e:
+            raise
+        else:
+            graph.write_png(f + '.png')
+
+if __name__ == '__main__':
+    main(sys.argv[1:])