Augment graph construction(hadoop) -- add ThreadList and 5PReaedId
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index d005d7e..baace6a 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -148,7 +148,23 @@
return averageCoverage;
}
- /**
+ public PositionListWritable getStartReads() {
+ return startReads;
+ }
+
+ public void setStartReads(PositionListWritable startReads) {
+ this.startReads = startReads;
+ }
+
+ public PositionListWritable getEndReads() {
+ return endReads;
+ }
+
+ public void setEndReads(PositionListWritable endReads) {
+ this.endReads = endReads;
+ }
+
+ /**
* Returns the length of the byte-array version of this node
*/
public int getSerializedLength() {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
index 88b0339..6424f0a 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
@@ -90,12 +90,17 @@
curReverseKmer.setByReadReverse(KMER_SIZE, array, 0);
curKmerDir = curForwardKmer.compareTo(curReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
setNextKmer(array[KMER_SIZE]);
- //set value.nodeId
- setNodeId(mateId, readID, 1);
- //set value.edgeList
+ //set nodeId
+ setNodeId(mateId, readID, 0);
+ //set value.edgeList and value.threads/readId
setEdgeListForNextKmer();
- //set coverage = 1
+ //set value.coverage = 1
outputNode.setAvgCoverage(1);
+ //set value.startReads because this is the first kmer in read
+ if(curKmerDir == KmerDir.FORWARD)
+ outputNode.setStartReads(nodeIdList);
+ else
+ outputNode.setEndReads(nodeIdList);
//output mapper result
setMapperOutput(output);
@@ -105,9 +110,9 @@
setPreKmerByOldCurKmer();
setCurKmerByOldNextKmer();
setNextKmer(array[i]);
- //set value.nodeId
+ //set nodeId
setNodeId(mateId, readID, i - KMER_SIZE + 1);
- //set value.edgeList
+ //set value.edgeList and value.threads/readId
setEdgeListForPreKmer();
setEdgeListForNextKmer();
//set coverage = 1
@@ -120,9 +125,9 @@
outputNode.reset();
setPreKmerByOldCurKmer();
setCurKmerByOldNextKmer();
- //set value.nodeId
+ //set nodeId
setNodeId(mateId, readID, array.length - KMER_SIZE + 1);
- //set value.edgeList
+ //set value.edgeList and value.threads/readId
setEdgeListForPreKmer();
//set coverage = 1
outputNode.setAvgCoverage(1);
@@ -135,7 +140,6 @@
nodeId.set(mateId, readID, posId);
nodeIdList.reset();
nodeIdList.append(nodeId);
- outputNode.setNodeIdList(nodeIdList);
}
public void setEdgeListForPreKmer(){
@@ -146,11 +150,13 @@
edgeListForPreKmer.reset();
edgeListForPreKmer.append(preForwardKmer);
outputNode.setEdgeList(DirectionFlag.DIR_RR, edgeListForPreKmer);
+ outputNode.setThreadList(DirectionFlag.DIR_RR, nodeIdList);
break;
case REVERSE:
edgeListForPreKmer.reset();
edgeListForPreKmer.append(preReverseKmer);
outputNode.setEdgeList(DirectionFlag.DIR_RF, edgeListForPreKmer);
+ outputNode.setThreadList(DirectionFlag.DIR_RF, nodeIdList);
break;
}
break;
@@ -160,11 +166,13 @@
edgeListForPreKmer.reset();
edgeListForPreKmer.append(preForwardKmer);
outputNode.setEdgeList(DirectionFlag.DIR_FR, edgeListForPreKmer);
+ outputNode.setThreadList(DirectionFlag.DIR_FR, nodeIdList);
break;
case REVERSE:
edgeListForPreKmer.reset();
edgeListForPreKmer.append(preReverseKmer);
outputNode.setEdgeList(DirectionFlag.DIR_FF, edgeListForPreKmer);
+ outputNode.setThreadList(DirectionFlag.DIR_FF, nodeIdList);
break;
}
break;
@@ -179,11 +187,13 @@
edgeListForNextKmer.reset();
edgeListForNextKmer.append(nextForwardKmer);
outputNode.setEdgeList(DirectionFlag.DIR_FF, edgeListForNextKmer);
+ outputNode.setThreadList(DirectionFlag.DIR_FF, nodeIdList);
break;
case REVERSE:
edgeListForNextKmer.reset();
edgeListForNextKmer.append(nextReverseKmer);
outputNode.setEdgeList(DirectionFlag.DIR_FR, edgeListForNextKmer);
+ outputNode.setThreadList(DirectionFlag.DIR_FR, nodeIdList);
break;
}
break;
@@ -193,11 +203,13 @@
edgeListForNextKmer.reset();
edgeListForNextKmer.append(nextForwardKmer);
outputNode.setEdgeList(DirectionFlag.DIR_RF, edgeListForNextKmer);
+ outputNode.setThreadList(DirectionFlag.DIR_RF, nodeIdList);
break;
case REVERSE:
edgeListForNextKmer.reset();
edgeListForNextKmer.append(nextReverseKmer);
outputNode.setEdgeList(DirectionFlag.DIR_RR, edgeListForNextKmer);
+ outputNode.setThreadList(DirectionFlag.DIR_RR, nodeIdList);
break;
}
break;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
index 3a21bae..7ce1ce9 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
@@ -38,10 +38,12 @@
while (values.hasNext()) {
tmpNode.set(values.next());
- outputNode.getNodeIdList().unionUpdate(tmpNode.getNodeIdList());
for (byte d: DirectionFlag.values) {
outputNode.getEdgeList(d).unionUpdate(tmpNode.getEdgeList(d));
+ outputNode.getThreadList(d).unionUpdate(tmpNode.getThreadList(d));
}
+ outputNode.getStartReads().unionUpdate(tmpNode.getStartReads());
+ outputNode.getEndReads().unionUpdate(tmpNode.getEndReads());
averageCoverage += tmpNode.getAvgCoverage();
}
outputNode.setAvgCoverage(averageCoverage);
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java
index d92dc18..b1ab865 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java
@@ -49,7 +49,7 @@
gv.addln(outputEdge);
/** add readIdSet **/
outputNode += " [shape=record, label = \"<f0> " + key.toString()
- + "|<f1> " + value.getNodeIdList().printReadIdSet()
+ + "|<f1> " + value.getThreadList(DirectionFlag.DIR_FF).printReadIdSet()
+ "\"]\n";
gv.addln(outputNode);
}