Use different initial and subsequent mappers in H3
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3.java
index 66dac41..ddb2a64 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3.java
@@ -8,6 +8,7 @@
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.JobClient;
@@ -22,6 +23,7 @@
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 
+import edu.uci.ics.genomix.hadoop.pmcommon.MessageWritableNodeWithFlag;
 import edu.uci.ics.genomix.type.NodeWritable;
 import edu.uci.ics.genomix.type.PositionWritable;
 
@@ -52,20 +54,18 @@
     }
 
     /*
-     * Mapper class: Partition the graph according  pseudoheads send themselves to their 
-     * successors, and all others map themselves.
+     * Common functionality for the two mapper types needed.  See javadoc for MergePathsH3MapperSubsequent.
      */
-    private static class MergePathsH3Mapper extends MapReduceBase implements
-            Mapper<PositionWritable, MessageWritableH3, PositionWritable, MessageWritableH3> {
+    private static class MergePathsH3MapperBase extends MapReduceBase {
 
-        private static long randSeed;
-        private Random randGenerator;
-        private float probBeingRandomHead;
+        protected static long randSeed;
+        protected Random randGenerator;
+        protected float probBeingRandomHead;
 
-        private int KMER_SIZE;
-        private PositionWritable outputKey;
-        private MessageWritableH3 outputValue;
-        private NodeWritable curNode;
+        protected int KMER_SIZE;
+        protected PositionWritable outputKey;
+        protected MessageWritableNodeWithFlag outputValue;
+        protected NodeWritable curNode;
 
         public void configure(JobConf conf) {
             randSeed = conf.getLong("randomSeed", 0);
@@ -73,27 +73,35 @@
             probBeingRandomHead = conf.getFloat("probBeingRandomHead", 0.5f);
 
             KMER_SIZE = conf.getInt("sizeKmer", 0);
-            outputValue = new MessageWritableH3(KMER_SIZE);
+            outputValue = new MessageWritableNodeWithFlag(KMER_SIZE);
             outputKey = new PositionWritable();
             curNode = new NodeWritable(KMER_SIZE);
         }
 
-        private boolean isNodeRandomHead(PositionWritable nodeID) {
+        protected boolean isNodeRandomHead(PositionWritable nodeID) {
             // "deterministically random", based on node id
             randGenerator.setSeed(randSeed ^ nodeID.hashCode());
             return randGenerator.nextFloat() < probBeingRandomHead;
         }
+    }
 
+    /*
+     * Mapper class: Partition the graph using random pseudoheads.
+     * Heads send themselves to their successors, and all others map themselves.
+     */
+    private static class MergePathsH3MapperSubsequent extends MergePathsH3MapperBase implements
+            Mapper<PositionWritable, MessageWritableNodeWithFlag, PositionWritable, MessageWritableNodeWithFlag> {
         @Override
-        public void map(PositionWritable key, MessageWritableH3 value,
-                OutputCollector<PositionWritable, MessageWritableH3> output, Reporter reporter) throws IOException {
+        public void map(PositionWritable key, MessageWritableNodeWithFlag value,
+                OutputCollector<PositionWritable, MessageWritableNodeWithFlag> output, Reporter reporter)
+                throws IOException {
             curNode = value.getNode();
             // Map all path vertices; tail nodes are sent to their predecessors
             if (curNode.isPathNode()) {
                 boolean isHead = (value.getFlag() & MessageFlag.IS_HEAD) == MessageFlag.IS_HEAD;
                 if (isHead || isNodeRandomHead(curNode.getNodeID())) {
                     // head nodes send themselves to their successor
-                    outputKey = curNode.getOutgoingList().getPosition(0); // TODO: does this need to be a .set call?
+                    outputKey.set(curNode.getOutgoingList().getPosition(0));
                     outputValue.set((byte) (MessageFlag.FROM_PREDECESSOR | MessageFlag.IS_HEAD), curNode);
                     output.collect(outputKey, outputValue);
                 } else {
@@ -106,28 +114,55 @@
     }
 
     /*
+     * Mapper used for the first iteration.  See javadoc for MergePathsH3MapperSubsequent.
+     */
+    private static class MergePathsH3MapperInitial extends MergePathsH3MapperBase implements
+            Mapper<NodeWritable, NullWritable, PositionWritable, MessageWritableNodeWithFlag> {
+        @Override
+        public void map(NodeWritable key, NullWritable value,
+                OutputCollector<PositionWritable, MessageWritableNodeWithFlag> output, Reporter reporter)
+                throws IOException {
+            curNode = key;
+            // Map all path vertices; tail nodes are sent to their predecessors
+            if (curNode.isPathNode()) {
+                if (isNodeRandomHead(curNode.getNodeID())) {
+                    // head nodes send themselves to their successor
+                    outputKey.set(curNode.getOutgoingList().getPosition(0));
+                    outputValue.set((byte) (MessageFlag.FROM_PREDECESSOR | MessageFlag.IS_HEAD), curNode);
+                    output.collect(outputKey, outputValue);
+                } else {
+                    // tail nodes map themselves
+                    outputValue.set(MessageFlag.FROM_SELF, curNode);
+                    output.collect(key.getNodeID(), outputValue);
+                }
+            }
+        }
+    }
+
+    /*
      * Reducer class: merge nodes that co-occur; for singletons, remap the original nodes 
      */
     private static class MergePathsH3Reducer extends MapReduceBase implements
-            Reducer<PositionWritable, MessageWritableH3, PositionWritable, MessageWritableH3> {
+            Reducer<PositionWritable, MessageWritableNodeWithFlag, PositionWritable, MessageWritableNodeWithFlag> {
 
         private int KMER_SIZE;
-        private MessageWritableH3 inputValue;
-        private MessageWritableH3 outputValue;
+        private MessageWritableNodeWithFlag inputValue;
+        private MessageWritableNodeWithFlag outputValue;
         private NodeWritable headNode;
         private NodeWritable tailNode;
         private int count;
 
         public void configure(JobConf conf) {
             KMER_SIZE = conf.getInt("sizeKmer", 0);
-            outputValue = new MessageWritableH3(KMER_SIZE);
+            outputValue = new MessageWritableNodeWithFlag(KMER_SIZE);
             headNode = new NodeWritable(KMER_SIZE);
             tailNode = new NodeWritable(KMER_SIZE);
         }
 
         @Override
-        public void reduce(PositionWritable key, Iterator<MessageWritableH3> values,
-                OutputCollector<PositionWritable, MessageWritableH3> output, Reporter reporter) throws IOException {
+        public void reduce(PositionWritable key, Iterator<MessageWritableNodeWithFlag> values,
+                OutputCollector<PositionWritable, MessageWritableNodeWithFlag> output, Reporter reporter)
+                throws IOException {
 
             inputValue = values.next();
             if (!values.hasNext()) {
@@ -142,20 +177,25 @@
             } else {
                 // multiple inputs => a merge will take place. Aggregate both, then collect the merged path
                 count = 0;
-                do {
+                while (true) { // process values; break when no more
+                    count++;
                     if ((inputValue.getFlag() & MessageFlag.FROM_PREDECESSOR) == MessageFlag.FROM_PREDECESSOR) {
                         headNode.set(inputValue.getNode());
                     } else {
                         tailNode.set(inputValue.getNode());
                     }
-                    count++;
-                } while (values.hasNext());
+                    if (!values.hasNext()) {
+                        break;
+                    } else {
+                        inputValue = values.next();
+                    }
+                }
                 if (count != 2) {
                     throw new IOException("Expected two nodes in MergePathsH3 reduce; saw " + String.valueOf(count));
                 }
-                // merge the head and tail as saved output
-                tailNode.mergePreviousWithinOneRead(headNode);
-                outputValue.set(inputValue.getFlag(), tailNode);
+                // merge the head and tail as saved output, this merged node is now a head 
+                headNode.mergeNext(tailNode, KMER_SIZE);
+                outputValue.set(MessageFlag.IS_HEAD, headNode);
                 output.collect(key, outputValue);
             }
         }
@@ -171,17 +211,21 @@
 
         FileInputFormat.addInputPath(conf, new Path(inputPath));
         FileOutputFormat.setOutputPath(conf, new Path(outputPath));
-        
 
-        //TODO: verify input format
         conf.setInputFormat(SequenceFileInputFormat.class);
 
         conf.setMapOutputKeyClass(PositionWritable.class);
-        conf.setMapOutputValueClass(MessageWritableH3.class);
+        conf.setMapOutputValueClass(MessageWritableNodeWithFlag.class);
         conf.setOutputKeyClass(PositionWritable.class);
-        conf.setOutputValueClass(MessageWritableH3.class);
+        conf.setOutputValueClass(MessageWritableNodeWithFlag.class);
 
-        conf.setMapperClass(MergePathsH3Mapper.class);
+        // on the first iteration, we have to transform from a node-oriented graph 
+        // to a Position-oriented graph
+        if (conf.getInt("iMerge", 1) == 1) {
+            conf.setMapperClass(MergePathsH3MapperInitial.class);
+        } else {
+            conf.setMapperClass(MergePathsH3MapperSubsequent.class);
+        }
         conf.setReducerClass(MergePathsH3Reducer.class);
 
         FileSystem.get(conf).delete(new Path(outputPath), true);
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3Driver.java
index 3637a71..4982439 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3Driver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3Driver.java
@@ -58,6 +58,7 @@
 
         String tmpOutputPath = "NO_JOBS_DONE";
         for (int iMerge = 1; iMerge <= mergeRound; iMerge++) {
+            baseConf.setInt("iMerge", iMerge);
             MergePathsH3 merger = new MergePathsH3();
             tmpOutputPath = inputPath + ".mergepathsH3." + String.valueOf(iMerge);
             merger.run(prevOutput, tmpOutputPath, baseConf);
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MessageWritableH3.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MessageWritableNodeWithFlag.java
similarity index 80%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MessageWritableH3.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MessageWritableNodeWithFlag.java
index 84c1049..5a3076c 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MessageWritableH3.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MessageWritableNodeWithFlag.java
@@ -1,4 +1,4 @@
-package edu.uci.ics.genomix.hadoop.graphclean.mergepaths.h3;
+package edu.uci.ics.genomix.hadoop.pmcommon;
 
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -9,21 +9,21 @@
 
 import edu.uci.ics.genomix.type.NodeWritable;
 
-public class MessageWritableH3 extends BinaryComparable implements WritableComparable<BinaryComparable> {
+public class MessageWritableNodeWithFlag extends BinaryComparable implements WritableComparable<BinaryComparable> {
     private byte flag;
     private NodeWritable node;
 
-    public MessageWritableH3(int k) {
+    public MessageWritableNodeWithFlag(int k) {
         this.flag = 0;
         this.node = new NodeWritable(k);
     }
 
-    public MessageWritableH3(byte flag, int kmerSize) {
+    public MessageWritableNodeWithFlag(byte flag, int kmerSize) {
         this.flag = flag;
         this.node = new NodeWritable(kmerSize);
     }
 
-    public void set(MessageWritableH3 right) {
+    public void set(MessageWritableNodeWithFlag right) {
         set(right.getFlag(), right.getNode());
     }