add Reducer and change PositionWritable to PositionListWritable in NodeWritable
diff --git a/genomix/genomix-hadoop/data/webmap/test.txt b/genomix/genomix-hadoop/data/webmap/test.txt
new file mode 100644
index 0000000..17770fa
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/test.txt
@@ -0,0 +1 @@
+1 AATAGAAG
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
index 8c9cfc8..50223e2 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
@@ -15,6 +15,7 @@
import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.KmerListWritable;
import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
import edu.uci.ics.genomix.type.PositionWritable;
@SuppressWarnings("deprecation")
@@ -33,10 +34,11 @@
private KmerBytesWritable curReverseKmer;
private KmerBytesWritable nextForwardKmer;
private KmerBytesWritable nextReverseKmer;
- private NodeWritable outputNode;
private PositionWritable nodeId;
+ private PositionListWritable nodeIdList;
private KmerListWritable edgeListForPreKmer;
private KmerListWritable edgeListForNextKmer;
+ private NodeWritable outputNode;
private KmerDir preKmerDir;
private KmerDir curKmerDir;
@@ -53,10 +55,11 @@
curReverseKmer = new KmerBytesWritable(KMER_SIZE);
nextForwardKmer = new KmerBytesWritable(KMER_SIZE);
nextReverseKmer = new KmerBytesWritable(KMER_SIZE);
- outputNode = new NodeWritable();
nodeId = new PositionWritable();
+ nodeIdList = new PositionListWritable();
edgeListForPreKmer = new KmerListWritable();
edgeListForNextKmer = new KmerListWritable();
+ outputNode = new NodeWritable();
preKmerDir = KmerDir.FORWARD;
curKmerDir = KmerDir.FORWARD;
nextKmerDir = KmerDir.FORWARD;
@@ -83,13 +86,13 @@
}
/** first kmer **/
+ outputNode.reset(0);
curForwardKmer.setByRead(array, 0);
curReverseKmer.setByReadReverse(array, 0);
curKmerDir = curForwardKmer.compareTo(curReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
setNextKmer(array[KMER_SIZE]);
//set value.nodeId
- nodeId.set(mateId, readID, 1);
- outputNode.setNodeId(nodeId);
+ setNodeId(mateId, readID, 1);
//set value.edgeList
setEdgeListForNextKmer();
//output mapper result
@@ -97,12 +100,12 @@
/** middle kmer **/
for (int i = KMER_SIZE + 1; i < array.length; i++) {
+ outputNode.reset(0);
setPreKmerByOldCurKmer();
setCurKmerByOldNextKmer();
setNextKmer(array[i]);
//set value.nodeId
- nodeId.set(mateId, readID, i - KMER_SIZE + 1);
- outputNode.setNodeId(nodeId);
+ setNodeId(mateId, readID, i - KMER_SIZE + 1);
//set value.edgeList
setEdgeListForPreKmer();
setEdgeListForNextKmer();
@@ -111,11 +114,11 @@
}
/** last kmer **/
+ outputNode.reset(0);
setPreKmerByOldCurKmer();
setCurKmerByOldNextKmer();
//set value.nodeId
- nodeId.set(mateId, readID, array.length - KMER_SIZE + 1);
- outputNode.setNodeId(nodeId);
+ setNodeId(mateId, readID, array.length - KMER_SIZE + 1);
//set value.edgeList
setEdgeListForPreKmer();
//output mapper result
@@ -123,6 +126,13 @@
}
}
+ public void setNodeId(byte mateId, long readID, int posId){
+ nodeId.set(mateId, readID, posId);
+ nodeIdList.reset();
+ nodeIdList.append(nodeId);
+ outputNode.setNodeIdList(nodeIdList);
+ }
+
public void setEdgeListForPreKmer(){
switch(curKmerDir){
case FORWARD:
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
index d53e3bf..625984d 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
@@ -24,12 +24,19 @@
outputNode.reset(GenomixMapper.KMER_SIZE);
//copy first item to outputNode
- if(values.hasNext())
- outputNode.set(values.next());
+ if(values.hasNext()){
+ NodeWritable tmpNode = values.next();
+ outputNode.set(tmpNode);
+ }
while (values.hasNext()) {
NodeWritable tmpNode = values.next();
+ outputNode.getNodeIdList().appendList(tmpNode.getNodeIdList());
outputNode.getFFList().appendList(tmpNode.getFFList());
+ outputNode.getFRList().appendList(tmpNode.getFRList());
+ outputNode.getRFList().appendList(tmpNode.getRFList());
+ outputNode.getRRList().appendList(tmpNode.getRRList());
}
+ output.collect(key,outputNode);
}
}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
index b756839..8aef9a8 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
@@ -43,7 +43,7 @@
public void TestMapKmerToNode() throws Exception {
GenomixDriver driver = new GenomixDriver();
- driver.run(HDFS_PATH, RESULT_PATH, 0, SIZE_KMER, READ_LENGTH, false, HADOOP_CONF_PATH);
+ driver.run(HDFS_PATH, RESULT_PATH, 2, SIZE_KMER, READ_LENGTH, false, HADOOP_CONF_PATH);
dumpResult();
}