add Reducer and change PositionWritable to PositionListWritable in NodeWritable
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/IntermediateNodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/IntermediateNodeWritable.java
new file mode 100644
index 0000000..82eb51c
--- /dev/null
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/IntermediateNodeWritable.java
@@ -0,0 +1,152 @@
+package edu.uci.ics.genomix.oldtype;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.type.KmerListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+public class IntermediateNodeWritable implements WritableComparable<IntermediateNodeWritable>, Serializable{
+
+ private static final long serialVersionUID = 1L;
+ public static final IntermediateNodeWritable EMPTY_NODE = new IntermediateNodeWritable();
+
+ private KmerListWritable forwardForwardList;
+ private KmerListWritable forwardReverseList;
+ private KmerListWritable reverseForwardList;
+ private KmerListWritable reverseReverseList;
+ private PositionWritable nodeId;
+
+ public IntermediateNodeWritable(){
+ forwardForwardList = new KmerListWritable();
+ forwardReverseList = new KmerListWritable();
+ reverseForwardList = new KmerListWritable();
+ reverseReverseList = new KmerListWritable();
+ nodeId = new PositionWritable();
+ }
+
+ public IntermediateNodeWritable(KmerListWritable FFList, KmerListWritable FRList,
+ KmerListWritable RFList, KmerListWritable RRList, PositionWritable uniqueKey) {
+ this();
+ set(FFList, FRList, RFList, RRList, uniqueKey);
+ }
+
+ public void set(IntermediateNodeWritable node){
+ set(node.forwardForwardList, node.forwardReverseList, node.reverseForwardList,
+ node.reverseReverseList, node.nodeId);
+ }
+
+ public void set(KmerListWritable FFList, KmerListWritable FRList,
+ KmerListWritable RFList, KmerListWritable RRList, PositionWritable uniqueKey) {
+ this.forwardForwardList.set(FFList);
+ this.forwardReverseList.set(FRList);
+ this.reverseForwardList.set(RFList);
+ this.reverseReverseList.set(RRList);
+ this.nodeId.set(uniqueKey);
+ }
+
+ public void reset(int kmerSize) {
+ forwardForwardList.reset();
+ forwardReverseList.reset();
+ reverseForwardList.reset();
+ reverseReverseList.reset();
+ nodeId.reset();
+ }
+
+ public KmerListWritable getFFList() {
+ return forwardForwardList;
+ }
+
+ public void setFFList(KmerListWritable forwardForwardList) {
+ this.forwardForwardList.set(forwardForwardList);
+ }
+
+ public KmerListWritable getFRList() {
+ return forwardReverseList;
+ }
+
+ public void setFRList(KmerListWritable forwardReverseList) {
+ this.forwardReverseList.set(forwardReverseList);
+ }
+
+ public KmerListWritable getRFList() {
+ return reverseForwardList;
+ }
+
+ public void setRFList(KmerListWritable reverseForwardList) {
+ this.reverseForwardList.set(reverseForwardList);
+ }
+
+ public KmerListWritable getRRList() {
+ return reverseReverseList;
+ }
+
+ public void setRRList(KmerListWritable reverseReverseList) {
+ this.reverseReverseList.set(reverseReverseList);
+ }
+
+ public PositionWritable getNodeId() {
+ return nodeId;
+ }
+
+ public void setNodeId(PositionWritable nodeId) {
+ this.nodeId.set(nodeId);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ this.forwardForwardList.readFields(in);
+ this.forwardReverseList.readFields(in);
+ this.reverseForwardList.readFields(in);
+ this.reverseReverseList.readFields(in);
+ this.nodeId.readFields(in);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ this.forwardForwardList.write(out);
+ this.forwardReverseList.write(out);
+ this.reverseForwardList.write(out);
+ this.reverseReverseList.write(out);
+ this.nodeId.write(out);
+ }
+
+ @Override
+ public int compareTo(IntermediateNodeWritable other) {
+ // TODO Auto-generated method stub
+ return this.nodeId.compareTo(other.nodeId);
+ }
+
+ @Override
+ public int hashCode() {
+ return this.nodeId.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof IntermediateNodeWritable) {
+ IntermediateNodeWritable nw = (IntermediateNodeWritable) o;
+ return (this.forwardForwardList.equals(nw.forwardForwardList)
+ && this.forwardReverseList.equals(nw.forwardReverseList)
+ && this.reverseForwardList.equals(nw.reverseForwardList)
+ && this.reverseReverseList.equals(nw.reverseReverseList) && (this.nodeId.equals(nw.nodeId)));
+ }
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sbuilder = new StringBuilder();
+ sbuilder.append('(');
+ sbuilder.append(nodeId.toString()).append('\t');
+ sbuilder.append(forwardForwardList.toString()).append('\t');
+ sbuilder.append(forwardReverseList.toString()).append('\t');
+ sbuilder.append(reverseForwardList.toString()).append('\t');
+ sbuilder.append(reverseReverseList.toString()).append('\t').append(')');
+ return sbuilder.toString();
+ }
+}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index e265fa9..161d31f 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -10,16 +10,21 @@
public class NodeWritable implements WritableComparable<NodeWritable>, Serializable{
+ public static class KMER{
+ public static final byte EXIST = 0;
+ public static final byte NON_EXIST = 1;
+ }
+
private static final long serialVersionUID = 1L;
public static final NodeWritable EMPTY_NODE = new NodeWritable(0);
- private PositionListWritable nodeId;
+ private PositionListWritable nodeIdList;
private KmerListWritable forwardForwardList;
private KmerListWritable forwardReverseList;
private KmerListWritable reverseForwardList;
private KmerListWritable reverseReverseList;
private KmerBytesWritable kmer;
-
+ private byte kmerMark;
// merge/update directions
public static class DirectionFlag {
@@ -35,42 +40,54 @@
}
public NodeWritable(int kmerSize) {
- nodeId = new PositionListWritable();
+ nodeIdList = new PositionListWritable();
forwardForwardList = new KmerListWritable();
forwardReverseList = new KmerListWritable();
reverseForwardList = new KmerListWritable();
reverseReverseList = new KmerListWritable();
kmer = new KmerBytesWritable(kmerSize);
+ kmerMark = KMER.NON_EXIST;
}
- public NodeWritable(PositionListWritable nodeId, KmerListWritable FFList, KmerListWritable FRList,
+ public NodeWritable(PositionListWritable nodeIdList, KmerListWritable FFList, KmerListWritable FRList,
KmerListWritable RFList, KmerListWritable RRList, KmerBytesWritable kmer) {
this(kmer.getKmerLength());
- set(nodeId, FFList, FRList, RFList, RRList, kmer);
+ set(nodeIdList, FFList, FRList, RFList, RRList, kmer);
}
public void set(NodeWritable node){
- set(node.nodeId, node.forwardForwardList, node.forwardReverseList, node.reverseForwardList,
+ set(node.nodeIdList, node.forwardForwardList, node.forwardReverseList, node.reverseForwardList,
node.reverseReverseList, node.kmer);
}
- public void set(PositionListWritable nodeId, KmerListWritable FFList, KmerListWritable FRList,
+ public void set(PositionListWritable nodeIdList, KmerListWritable FFList, KmerListWritable FRList,
KmerListWritable RFList, KmerListWritable RRList, KmerBytesWritable kmer) {
- this.nodeId.set(nodeId);
+ this.nodeIdList.set(nodeIdList);
this.forwardForwardList.set(FFList);
this.forwardReverseList.set(FRList);
this.reverseForwardList.set(RFList);
this.reverseReverseList.set(RRList);
this.kmer.set(kmer);
+ kmerMark = KMER.EXIST;
}
public void reset(int kmerSize) {
- nodeId.reset();
+ nodeIdList.reset();
forwardForwardList.reset();
forwardReverseList.reset();
reverseForwardList.reset();
reverseReverseList.reset();
kmer.reset(kmerSize);
+ kmerMark = KMER.NON_EXIST;
+ }
+
+
+ public PositionListWritable getNodeIdList() {
+ return nodeIdList;
+ }
+
+ public void setNodeIdList(PositionListWritable nodeIdList) {
+ this.nodeIdList.set(nodeIdList);
}
public KmerBytesWritable getKmer() {
@@ -78,7 +95,8 @@
}
public void setKmer(KmerBytesWritable kmer) {
- this.kmer = kmer;
+ kmerMark = KMER.EXIST;
+ this.kmer.set(kmer);
}
public int getCount() {
@@ -102,19 +120,19 @@
}
public void setFFList(KmerListWritable forwardForwardList) {
- this.forwardForwardList = forwardForwardList;
+ this.forwardForwardList.set(forwardForwardList);
}
public void setFRList(KmerListWritable forwardReverseList) {
- this.forwardReverseList = forwardReverseList;
+ this.forwardReverseList.set(forwardReverseList);
}
public void setRFList(KmerListWritable reverseForwardList) {
- this.reverseForwardList = reverseForwardList;
+ this.reverseForwardList.set(reverseForwardList);
}
public void setRRList(KmerListWritable reverseReverseList) {
- this.reverseReverseList = reverseReverseList;
+ this.reverseReverseList.set(reverseReverseList);
}
public KmerListWritable getListFromDir(byte dir) {
@@ -133,22 +151,26 @@
}
@Override
public void write(DataOutput out) throws IOException {
- this.nodeId.write(out);
+ out.writeByte(kmerMark);
+ this.nodeIdList.write(out);
this.forwardForwardList.write(out);
this.forwardReverseList.write(out);
this.reverseForwardList.write(out);
this.reverseReverseList.write(out);
- this.kmer.write(out);
+ if(kmerMark == KMER.EXIST)
+ this.kmer.write(out);
}
@Override
public void readFields(DataInput in) throws IOException {
- this.nodeId.readFields(in);
+ kmerMark = in.readByte();
+ this.nodeIdList.readFields(in);
this.forwardForwardList.readFields(in);
this.forwardReverseList.readFields(in);
this.reverseForwardList.readFields(in);
this.reverseReverseList.readFields(in);
- this.kmer.readFields(in);
+ if(kmerMark == KMER.EXIST)
+ this.kmer.readFields(in);
}
@Override
@@ -165,7 +187,7 @@
public boolean equals(Object o) {
if (o instanceof NodeWritable) {
NodeWritable nw = (NodeWritable) o;
- return (this.nodeId.equals(nw.nodeId)
+ return (this.nodeIdList.equals(nw.nodeIdList)
&& this.forwardForwardList.equals(nw.forwardForwardList)
&& this.forwardReverseList.equals(nw.forwardReverseList)
&& this.reverseForwardList.equals(nw.reverseForwardList)
@@ -178,7 +200,7 @@
public String toString() {
StringBuilder sbuilder = new StringBuilder();
sbuilder.append('(');
- sbuilder.append(nodeId.toString()).append('\t');
+ sbuilder.append(nodeIdList.toString()).append('\t');
sbuilder.append(forwardForwardList.toString()).append('\t');
sbuilder.append(forwardReverseList.toString()).append('\t');
sbuilder.append(reverseForwardList.toString()).append('\t');
diff --git a/genomix/genomix-hadoop/data/webmap/test.txt b/genomix/genomix-hadoop/data/webmap/test.txt
new file mode 100644
index 0000000..17770fa
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/test.txt
@@ -0,0 +1 @@
+1 AATAGAAG
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
index 8c9cfc8..50223e2 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
@@ -15,6 +15,7 @@
import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.KmerListWritable;
import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
import edu.uci.ics.genomix.type.PositionWritable;
@SuppressWarnings("deprecation")
@@ -33,10 +34,11 @@
private KmerBytesWritable curReverseKmer;
private KmerBytesWritable nextForwardKmer;
private KmerBytesWritable nextReverseKmer;
- private NodeWritable outputNode;
private PositionWritable nodeId;
+ private PositionListWritable nodeIdList;
private KmerListWritable edgeListForPreKmer;
private KmerListWritable edgeListForNextKmer;
+ private NodeWritable outputNode;
private KmerDir preKmerDir;
private KmerDir curKmerDir;
@@ -53,10 +55,11 @@
curReverseKmer = new KmerBytesWritable(KMER_SIZE);
nextForwardKmer = new KmerBytesWritable(KMER_SIZE);
nextReverseKmer = new KmerBytesWritable(KMER_SIZE);
- outputNode = new NodeWritable();
nodeId = new PositionWritable();
+ nodeIdList = new PositionListWritable();
edgeListForPreKmer = new KmerListWritable();
edgeListForNextKmer = new KmerListWritable();
+ outputNode = new NodeWritable();
preKmerDir = KmerDir.FORWARD;
curKmerDir = KmerDir.FORWARD;
nextKmerDir = KmerDir.FORWARD;
@@ -83,13 +86,13 @@
}
/** first kmer **/
+ outputNode.reset(0);
curForwardKmer.setByRead(array, 0);
curReverseKmer.setByReadReverse(array, 0);
curKmerDir = curForwardKmer.compareTo(curReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
setNextKmer(array[KMER_SIZE]);
//set value.nodeId
- nodeId.set(mateId, readID, 1);
- outputNode.setNodeId(nodeId);
+ setNodeId(mateId, readID, 1);
//set value.edgeList
setEdgeListForNextKmer();
//output mapper result
@@ -97,12 +100,12 @@
/** middle kmer **/
for (int i = KMER_SIZE + 1; i < array.length; i++) {
+ outputNode.reset(0);
setPreKmerByOldCurKmer();
setCurKmerByOldNextKmer();
setNextKmer(array[i]);
//set value.nodeId
- nodeId.set(mateId, readID, i - KMER_SIZE + 1);
- outputNode.setNodeId(nodeId);
+ setNodeId(mateId, readID, i - KMER_SIZE + 1);
//set value.edgeList
setEdgeListForPreKmer();
setEdgeListForNextKmer();
@@ -111,11 +114,11 @@
}
/** last kmer **/
+ outputNode.reset(0);
setPreKmerByOldCurKmer();
setCurKmerByOldNextKmer();
//set value.nodeId
- nodeId.set(mateId, readID, array.length - KMER_SIZE + 1);
- outputNode.setNodeId(nodeId);
+ setNodeId(mateId, readID, array.length - KMER_SIZE + 1);
//set value.edgeList
setEdgeListForPreKmer();
//output mapper result
@@ -123,6 +126,13 @@
}
}
+ public void setNodeId(byte mateId, long readID, int posId){
+ nodeId.set(mateId, readID, posId);
+ nodeIdList.reset();
+ nodeIdList.append(nodeId);
+ outputNode.setNodeIdList(nodeIdList);
+ }
+
public void setEdgeListForPreKmer(){
switch(curKmerDir){
case FORWARD:
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
index d53e3bf..625984d 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
@@ -24,12 +24,19 @@
outputNode.reset(GenomixMapper.KMER_SIZE);
//copy first item to outputNode
- if(values.hasNext())
- outputNode.set(values.next());
+ if(values.hasNext()){
+ NodeWritable tmpNode = values.next();
+ outputNode.set(tmpNode);
+ }
while (values.hasNext()) {
NodeWritable tmpNode = values.next();
+ outputNode.getNodeIdList().appendList(tmpNode.getNodeIdList());
outputNode.getFFList().appendList(tmpNode.getFFList());
+ outputNode.getFRList().appendList(tmpNode.getFRList());
+ outputNode.getRFList().appendList(tmpNode.getRFList());
+ outputNode.getRRList().appendList(tmpNode.getRRList());
}
+ output.collect(key,outputNode);
}
}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
index b756839..8aef9a8 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
@@ -43,7 +43,7 @@
public void TestMapKmerToNode() throws Exception {
GenomixDriver driver = new GenomixDriver();
- driver.run(HDFS_PATH, RESULT_PATH, 0, SIZE_KMER, READ_LENGTH, false, HADOOP_CONF_PATH);
+ driver.run(HDFS_PATH, RESULT_PATH, 2, SIZE_KMER, READ_LENGTH, false, HADOOP_CONF_PATH);
dumpResult();
}