Merge branch 'nanzhang/genomix' into genomix/fullstack_genomix

Conflicts:
	genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
	genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index 5c2a212..c0b00a7 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -43,14 +43,15 @@
         incomingList.set(incoming);
     }
 
-    public void setKmer(KmerBytesWritable kmer) {
-        this.kmer = kmer;
-    }
 
     public void setOutgoingList(PositionListWritable outgoing) {
         outgoingList.set(outgoing);
     }
 
+    public void setKmer(KmerBytesWritable right) {
+        this.kmer.set(right);
+    }
+    
     public void reset(int kmerSize) {
         nodeID.set(0, (byte) 0);
         incomingList.reset();
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
index 132b464..f77c844 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
@@ -126,6 +126,19 @@
         }
     }
 
+    public static class FirstComparator extends WritableComparator {
+        public FirstComparator() {
+            super(PositionWritable.class);
+        }
+
+        public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+            int thisValue = Marshal.getInt(b1, s1);
+            int thatValue = Marshal.getInt(b2, s2);
+            int diff = thisValue - thatValue;
+            return diff;
+        }
+    }
+    
     static { // register this comparator
         WritableComparator.define(PositionWritable.class, new Comparator());
     }
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingMapper.java
deleted file mode 100644
index 591e3c7..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingMapper.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package edu.uci.ics.genomix.hadoop.valvetgraphbuilding;
-
-import java.io.IOException;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.PositionListWritable;
-
-@SuppressWarnings("deprecation")
-public class DeepGraphBuildingMapper extends MapReduceBase implements
-        Mapper<KmerBytesWritable, PositionListWritable, IntWritable, LineBasedmappingWritable> {
-    IntWritable  numLine = new IntWritable();
-    LineBasedmappingWritable lineBasedWriter = new LineBasedmappingWritable();
-    @Override
-    public void map(KmerBytesWritable key, PositionListWritable value, OutputCollector<IntWritable, LineBasedmappingWritable> output,
-            Reporter reporter) throws IOException {
-    }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingReducer.java
deleted file mode 100644
index 4b971a7..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingReducer.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package edu.uci.ics.genomix.hadoop.valvetgraphbuilding;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.NodeWritable;
-
-@SuppressWarnings("deprecation")
-public class DeepGraphBuildingReducer extends MapReduceBase implements
-        Reducer<IntWritable, LineBasedmappingWritable, NodeWritable, NullWritable> {
-
-/*    public ArrayList<LineBasedmappingWritable> lineElementsSet = new ArrayList<LineBasedmappingWritable>();
-    public Position outputVerID = new Position();
-    public VertexAdjacentWritable outputAdjacentList = new VertexAdjacentWritable();
-    public PositionList srcVtexAdjList = new PositionList();
-    public PositionList desVtexAdjList = new PositionList();
-    public VertexIDListWritable srcAdjListWritable = new VertexIDListWritable();
-    public VKmerBytesWritable desKmer = new VKmerBytesWritable(1);
-    public VKmerBytesWritableFactory kmerFactory = new VKmerBytesWritableFactory(1);
-    public VKmerBytesWritable srcKmer = new VKmerBytesWritable(1);*/
-    @Override
-    public void reduce(IntWritable key, Iterator<LineBasedmappingWritable> values,
-            OutputCollector<NodeWritable, NullWritable> output, Reporter reporter) throws IOException {
-/*        while (values.hasNext()) {
-            lineElementsSet.add(values.next());
-        }
-        int[] orderLineTable = new int[lineElementsSet.size()];
-        for (int i = 0; i < lineElementsSet.size(); i++) {
-            int posInInvertedIndex = lineElementsSet.get(i).getPosInInvertedIndex();
-            orderLineTable[lineElementsSet.get(i).getAdjVertexList().get().getPosinReadListElement(posInInvertedIndex)] = i;
-        }
-        //the first node in this read
-        int posInInvertedIndex = lineElementsSet.get(orderLineTable[0]).getPosInInvertedIndex();
-        outputVerID.set(
-                lineElementsSet.get(orderLineTable[0]).getAdjVertexList().get().getReadListElement(posInInvertedIndex),
-                (byte) 0);
-        desVtexAdjList.set(lineElementsSet.get(orderLineTable[1]).getAdjVertexList().get());
-        for (int i = 0; i < desVtexAdjList.getUsedSize(); i++) {
-            if (desVtexAdjList.getPosinReadListElement(i) == (byte) 0) {
-                srcVtexAdjList.addELementToList(desVtexAdjList.getReadListElement(i), (byte) 0);
-            }
-        }
-        srcVtexAdjList.addELementToList(key.get(), (byte) 1);
-        outputVerID.set(
-                lineElementsSet.get(orderLineTable[0]).getAdjVertexList().get().getReadListElement(posInInvertedIndex),
-                (byte) 0);
-        srcAdjListWritable.set(srcVtexAdjList);
-        outputAdjacentList.set(srcAdjListWritable, lineElementsSet.get(orderLineTable[0]).getVkmer());
-        output.collect(outputVerID, outputAdjacentList);
-        //srcVtexAdjList reset!!!!
-
-        for (int i = 1; i < lineElementsSet.size(); i++) {
-            desVtexAdjList.set(lineElementsSet.get(orderLineTable[i + 1]).getAdjVertexList().get());
-            boolean flag = false;
-            for (int j = 0; j < desVtexAdjList.getUsedSize(); j++) {
-                if (desVtexAdjList.getPosinReadListElement(j) == (byte) 0) {
-                    srcVtexAdjList.addELementToList(desVtexAdjList.getReadListElement(i), (byte) 0);
-                    flag = true;
-                }
-            }
-            if (flag = true) {
-                //doesm't merge
-                srcVtexAdjList.addELementToList(key.get(), (byte) (i + 1));
-                outputVerID.set(
-                        lineElementsSet.get(orderLineTable[i]).getAdjVertexList().get()
-                                .getReadListElement(posInInvertedIndex), lineElementsSet.get(orderLineTable[i])
-                                .getAdjVertexList().get().getPosinReadListElement(posInInvertedIndex));
-                srcAdjListWritable.set(srcVtexAdjList);
-                outputAdjacentList.set(srcAdjListWritable, lineElementsSet.get(orderLineTable[i]).getVkmer());
-            }
-            else {
-                //merge
-                desKmer.set(kmerFactory.getFirstKmerFromChain(1, lineElementsSet.get(orderLineTable[i+1]).getVkmer()));
-                srcKmer.set(lineElementsSet.get(orderLineTable[i]).getVkmer());
-                lineElementsSet.get(orderLineTable[i+1]).getVkmer().set(kmerFactory.mergeTwoKmer(srcKmer, desKmer));
-                orderLineTable[i+1] = orderLineTable[i];
-            }
-        }*/
-    }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingMapper.java
new file mode 100644
index 0000000..1a758c7
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingMapper.java
@@ -0,0 +1,55 @@
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
+
+import java.io.IOException;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+@SuppressWarnings("deprecation")
+public class DeepGraphBuildingMapper extends MapReduceBase implements
+        Mapper<KmerBytesWritable, PositionListWritable, PositionWritable, PositionListAndKmerWritable> {
+    
+    public PositionWritable VertexID;
+    public PositionListWritable listPosZeroInRead;
+    public PositionListWritable listPosNonZeroInRead;
+    public PositionListAndKmerWritable outputListAndKmer;
+    @Override
+    public void configure(JobConf job) {
+        VertexID = new PositionWritable();
+        listPosZeroInRead = new PositionListWritable();
+        listPosNonZeroInRead = new PositionListWritable();
+        outputListAndKmer = new PositionListAndKmerWritable();
+    }
+    @Override
+    public void map(KmerBytesWritable key, PositionListWritable value, OutputCollector<PositionWritable, PositionListAndKmerWritable> output,
+            Reporter reporter) throws IOException {
+        listPosZeroInRead.reset();
+        listPosNonZeroInRead.reset();
+        outputListAndKmer.reset();
+        for(int i = 0; i < value.getLength(); i++) {
+            VertexID.set(value.getPosition(i));
+            if(VertexID.getPosInRead() == 0) {
+                listPosZeroInRead.append(VertexID);
+            }
+            else {
+                listPosNonZeroInRead.append(VertexID);
+            }
+        }
+        for(int i = 0; i < listPosZeroInRead.getCountOfPosition(); i++) {
+            VertexID.set(listPosZeroInRead.getPosition(i));
+            outputListAndKmer.set(listPosNonZeroInRead, key);
+            output.collect(VertexID, outputListAndKmer);
+        }
+        for(int i = 0; i < listPosNonZeroInRead.getCountOfPosition(); i++) {
+            VertexID.set(listPosNonZeroInRead.getPosition(i));
+            outputListAndKmer.set(listPosZeroInRead, key);
+            output.collect(VertexID, outputListAndKmer);
+        }
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingReducer.java
new file mode 100644
index 0000000..1da7d83
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingReducer.java
@@ -0,0 +1,150 @@
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+
+import edu.uci.ics.genomix.hadoop.oldtype.VKmerBytesWritable;
+import edu.uci.ics.genomix.hadoop.oldtype.VKmerBytesWritableFactory;
+import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+@SuppressWarnings("deprecation")
+public class DeepGraphBuildingReducer extends MapReduceBase implements
+        Reducer<PositionWritable, PositionListAndKmerWritable, NodeWritable, NullWritable> {
+
+    private PositionListAndKmerWritable nodeListAndKmer = new PositionListAndKmerWritable();
+    private PositionListAndKmerWritable nodeSuccListAndKmer = new PositionListAndKmerWritable();
+    private NodeWritable startNodeInRead = new NodeWritable();
+    private NodeWritable nodeToMerge = new NodeWritable();
+    private NodeWritable nodeToBeMerged = new NodeWritable();
+    private PositionListWritable incomingList = new PositionListWritable();
+    private PositionListWritable outgoingList = new PositionListWritable();
+    private NullWritable nullWritable = NullWritable.get();
+    private int KMER_SIZE;
+    
+    public void configure(JobConf job) {
+        KMER_SIZE = job.getInt("sizeKmer", 0);
+    }
+    public enum nodeToMergeState {
+        SRC_UPDATE_FROM_VALUES,
+        SRC_NODE_NON_UPDATE,
+        SRC_ASSIGNED_BY_RIGHTNODE;
+    }
+
+    @Override
+    public void reduce(PositionWritable key, Iterator<PositionListAndKmerWritable> values,
+            OutputCollector<NodeWritable, NullWritable> output, Reporter reporter) throws IOException {
+        //initialize the Start point in Read
+        int readID = key.getReadID();
+        byte posInRead = 0;
+        if (values.hasNext()) {
+            nodeListAndKmer.set(values.next());
+            incomingList.set(nodeSuccListAndKmer.getVertexIDList());
+        }
+        if (values.hasNext()) {
+            nodeSuccListAndKmer.set(values.next());
+            if (nodeSuccListAndKmer.getVertexIDList() != null)
+                outgoingList.set(nodeSuccListAndKmer.getVertexIDList());
+        }
+        outgoingList.append(readID, (byte) (posInRead + 1));
+        startNodeInRead.setNodeID(readID, posInRead);
+        startNodeInRead.setIncomingList(incomingList);
+        startNodeInRead.setOutgoingList(outgoingList);
+        startNodeInRead.setKmer(nodeListAndKmer.getKmer());
+        output.collect(startNodeInRead, nullWritable);
+        posInRead++;
+        //----initialize the nodeToMerge
+        nodeListAndKmer.set(nodeSuccListAndKmer);
+        incomingList.reset();
+        incomingList.append(key.getReadID(), key.getPosInRead());
+        if (values.hasNext()) {
+            nodeSuccListAndKmer.set(values.next());
+            if (nodeSuccListAndKmer.getVertexIDList() != null)
+                outgoingList.set(nodeSuccListAndKmer.getVertexIDList());
+        }
+        outgoingList.append(readID, (byte) (posInRead + 1));
+        nodeToMerge.setNodeID(readID, (byte) posInRead);
+        nodeToMerge.setIncomingList(incomingList);
+        nodeToMerge.setOutgoingList(outgoingList);
+        nodeToMerge.setKmer(nodeListAndKmer.getKmer());
+        posInRead++;
+        //----LOOP
+        nodeToMergeState srcState = nodeToMergeState.SRC_NODE_NON_UPDATE;
+        boolean srcOrTarget = true;
+        while (values.hasNext()) {
+            switch (srcState.toString()) {
+                case "SRC_UPDATE_FROM_VALUES":
+                    srcOrTarget = true;
+                    getNodeFromValues(readID, posInRead, values, srcOrTarget);
+                    posInRead++;
+                    srcOrTarget = false;
+                    getNodeFromValues(readID, posInRead, values, srcOrTarget);
+                    posInRead++;
+                    break;
+                case "SRC_NODE_NON_UPDATE":
+                    srcOrTarget = false;
+                    getNodeFromValues(readID, posInRead, values, srcOrTarget);
+                    posInRead++;
+                    break;
+                case "SRC_ASSIGNED_BY_RIGHTNODE":
+                    nodeToMerge.set(nodeToBeMerged);
+                    srcOrTarget = false;
+                    getNodeFromValues(readID, posInRead, values, srcOrTarget);
+                    posInRead++;
+                    break;
+            }
+            if(nodeToMerge.isPathNode() == true && nodeToBeMerged.isPathNode() == true){
+                nodeToMerge.mergeNext(nodeToBeMerged, KMER_SIZE);
+                srcState = nodeToMergeState.SRC_NODE_NON_UPDATE;
+            }
+            else{
+                if(nodeToMerge.isPathNode() == false && nodeToBeMerged.isPathNode() == true){
+                    
+                    srcState = nodeToMergeState.SRC_ASSIGNED_BY_RIGHTNODE;
+                    output.collect(nodeToBeMerged, nullWritable);
+                }
+                else {
+                    srcState = nodeToMergeState.SRC_UPDATE_FROM_VALUES;
+                    output.collect(nodeToMerge, nullWritable);
+                    output.collect(nodeToBeMerged, nullWritable);
+                }
+            }
+
+        }
+    }
+
+    public void getNodeFromValues(int readID, byte posInRead, Iterator<PositionListAndKmerWritable> values,
+            boolean srcOrTarget) {
+        if (values.hasNext()) {
+            nodeListAndKmer.set(values.next());
+            incomingList.reset();
+            incomingList.append(readID, (byte) (posInRead - 1));
+        }
+        if (values.hasNext()) {
+            nodeSuccListAndKmer.set(values.next());
+            if (nodeSuccListAndKmer.getVertexIDList() != null)
+                outgoingList.set(nodeSuccListAndKmer.getVertexIDList());
+        }
+        outgoingList.append(readID, (byte) (posInRead + 1));
+        if (srcOrTarget == true) {
+            nodeToMerge.setNodeID(readID, (byte) posInRead);
+            nodeToMerge.setIncomingList(incomingList);
+            nodeToMerge.setOutgoingList(outgoingList);
+            nodeToMerge.setKmer(nodeListAndKmer.getKmer());
+        } else {
+            nodeToBeMerged.setNodeID(readID, (byte) posInRead);
+            nodeToBeMerged.setIncomingList(incomingList);
+            nodeToBeMerged.setOutgoingList(outgoingList);
+            nodeToBeMerged.setKmer(nodeListAndKmer.getKmer());
+        }
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
new file mode 100644
index 0000000..acbc3f1
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
@@ -0,0 +1,112 @@
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Partitioner;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+@SuppressWarnings("deprecation")
+public class GraphBuildingDriver {
+
+    private static class Options {
+        @Option(name = "-inputpath", usage = "the input path", required = true)
+        public String inputPath;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+
+        @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+        public int sizeKmer;
+
+    }
+   
+    public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath)
+            throws IOException {
+
+        JobConf conf = new JobConf(GraphBuildingDriver.class);
+        conf.setInt("sizeKmer", sizeKmer);
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+
+        conf.setJobName("graph building");
+        conf.setMapperClass(GraphInvertedIndexBuildingMapper.class);
+        conf.setReducerClass(GraphInvertedIndexBuildingReducer.class);
+
+        conf.setMapOutputKeyClass(KmerBytesWritable.class);
+        conf.setMapOutputValueClass(PositionWritable.class);
+
+        conf.setInputFormat(TextInputFormat.class);
+        conf.setOutputFormat(SequenceFileOutputFormat.class);
+        
+        conf.setOutputKeyClass(KmerBytesWritable.class);
+        conf.setOutputValueClass(PositionListWritable.class);
+        
+        FileInputFormat.setInputPaths(conf, new Path(inputPath));
+        FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
+        conf.setNumReduceTasks(numReducers);
+
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(inputPath + "-step1"), true);
+        JobClient.runJob(conf);
+        
+        //-------------
+        conf = new JobConf(GraphBuildingDriver.class);
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+        conf.setJobName("deep build");
+        
+        conf.setMapperClass(DeepGraphBuildingMapper.class);
+        conf.setReducerClass(DeepGraphBuildingReducer.class);
+        
+        conf.setMapOutputKeyClass(PositionWritable.class);
+        conf.setMapOutputValueClass(PositionListAndKmerWritable.class);
+        
+        conf.setPartitionerClass(ReadIDPartitioner.class);
+        
+        conf.setOutputKeyComparatorClass(PositionWritable.Comparator.class);
+        conf.setOutputValueGroupingComparator(PositionWritable.FirstComparator.class);
+        
+        conf.setInputFormat(SequenceFileInputFormat.class);
+        conf.setOutputFormat(TextOutputFormat.class);
+        
+        conf.setOutputKeyClass(NodeWritable.class);
+        conf.setOutputValueClass(NullWritable.class);
+        
+        FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+        conf.setNumReduceTasks(1);
+        dfs.delete(new Path(outputPath), true);
+        JobClient.runJob(conf);
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        GraphBuildingDriver driver = new GraphBuildingDriver();
+        driver.run(options.inputPath, options.outputPath, options.numReducers, options.sizeKmer, null);
+    }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
similarity index 83%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
index 7b65158..d5924c8 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
@@ -1,4 +1,4 @@
-package edu.uci.ics.genomix.hadoop.valvetgraphbuilding;
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
 
 import java.io.IOException;
 import org.apache.hadoop.io.LongWritable;
@@ -33,17 +33,16 @@
         /** first kmer */
         byte[] array = geneLine.getBytes();
         outputKmer.setByRead(array, 0);
+        System.out.println(key.get());
         outputVertexID.set((int)key.get(), (byte)0);
         output.collect(outputKmer, outputVertexID);
         /** middle kmer */
-        for (int i = KMER_SIZE; i < array.length - 1; i++) {
+        int i = 0; 
+        for (i = KMER_SIZE; i < array.length; i++) {
             outputKmer.shiftKmerWithNextChar(array[i]);
+            System.out.println((int)key.get());
             outputVertexID.set((int)key.get(), (byte)(i - KMER_SIZE + 1));
             output.collect(outputKmer, outputVertexID);
         }
-        /** last kmer */
-        outputKmer.shiftKmerWithNextChar(array[array.length - 1]);
-        outputVertexID.set((int)key.get(), (byte)(array.length - 1 + 1));
-        output.collect(outputKmer, outputVertexID);
     }
 }
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
similarity index 92%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
index 72827f2..d2b6476 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
@@ -1,4 +1,4 @@
-package edu.uci.ics.genomix.hadoop.valvetgraphbuilding;
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
 
 import java.io.IOException;
 import java.util.Iterator;
@@ -18,6 +18,7 @@
     @Override
     public void reduce(KmerBytesWritable key, Iterator<PositionWritable> values,
             OutputCollector<KmerBytesWritable, PositionListWritable> output, Reporter reporter) throws IOException {
+        outputlist.reset();
         while (values.hasNext()) {
             outputlist.append(values.next());
         }
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/LineBasedmappingWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/LineBasedmappingWritable.java
similarity index 94%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/LineBasedmappingWritable.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/LineBasedmappingWritable.java
index 1e44903..0b6b0a8 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/LineBasedmappingWritable.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/LineBasedmappingWritable.java
@@ -1,4 +1,4 @@
-package edu.uci.ics.genomix.hadoop.valvetgraphbuilding;
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
 
 import java.io.DataInput;
 import java.io.DataOutput;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/PositionListAndKmerWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/PositionListAndKmerWritable.java
new file mode 100644
index 0000000..fff3faf
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/PositionListAndKmerWritable.java
@@ -0,0 +1,81 @@
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import org.apache.hadoop.io.WritableComparable;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
+
+public class PositionListAndKmerWritable implements WritableComparable<PositionListAndKmerWritable> {
+    
+    private PositionListWritable vertexIDList;
+    private KmerBytesWritable kmer;
+    private int countOfKmer;
+    
+    public PositionListAndKmerWritable(){
+        countOfKmer = 0;
+        vertexIDList = new PositionListWritable();
+        kmer = new KmerBytesWritable();
+    }
+
+    public PositionListAndKmerWritable(int kmerSize) {
+        countOfKmer = 0;
+        vertexIDList = new PositionListWritable();
+        kmer = new KmerBytesWritable(kmerSize);
+    }
+
+    public int getCount() {
+        return countOfKmer;
+    }
+
+    public void setCount(int count) {
+        this.countOfKmer = count;
+    }
+
+    public void setvertexIDList(PositionListWritable posList) {
+        vertexIDList.set(posList);
+    }
+
+    public void reset() {
+        vertexIDList.reset();
+        countOfKmer = 0;
+    }
+
+    public PositionListWritable getVertexIDList() {
+        return vertexIDList;
+    }
+
+    public KmerBytesWritable getKmer() {
+        return kmer;
+    }
+
+    public void set(PositionListAndKmerWritable right) {
+        this.countOfKmer = right.countOfKmer;
+        this.vertexIDList.set(right.vertexIDList);
+        this.kmer.set(right.kmer);
+    }
+
+    public void set(PositionListWritable list, KmerBytesWritable kmer) {
+        this.vertexIDList.set(list);
+        this.kmer.set(kmer);
+    }
+    @Override
+    public void readFields(DataInput in) throws IOException {
+        this.countOfKmer = in.readInt();
+        this.vertexIDList.readFields(in);
+        this.kmer.readFields(in);
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+        out.writeInt(this.countOfKmer);
+        this.vertexIDList.write(out);
+        this.kmer.write(out);
+    }
+
+    @Override
+    public int compareTo(PositionListAndKmerWritable o) {
+        return 0;
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/ReadIDPartitioner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/ReadIDPartitioner.java
new file mode 100644
index 0000000..362ebf3
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/ReadIDPartitioner.java
@@ -0,0 +1,19 @@
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
+
+//import org.apache.hadoop.mapreduce.Partitioner;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Partitioner;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+public class ReadIDPartitioner implements Partitioner<PositionWritable, PositionListWritable>{
+    
+    @Override
+    public  int getPartition(PositionWritable key, PositionListWritable value, int numPartitions){
+        return (key.getReadID() & Integer.MAX_VALUE) % numPartitions;
+    }
+
+    @Override
+    public void configure(JobConf arg0) {
+    }
+}