Partial refactor following edge type changes in NodeWritable
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeListWritable.java
index 2b41f5d..2eb91c1 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeListWritable.java
@@ -24,7 +24,10 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
+import java.util.List;
import org.apache.hadoop.io.WritableComparable;
@@ -52,6 +55,13 @@
setAsCopy(other);
}
+ public EdgeListWritable(List<EdgeWritable> otherList) {
+ this();
+ for (EdgeWritable e : otherList) {
+ add(e);
+ }
+ }
+
public void setAsCopy(EdgeListWritable otherEdge){
reset();
edges.addAll(otherEdge.edges);
@@ -212,4 +222,36 @@
public void remove(EdgeWritable toRemove) {
remove(toRemove, false);
}
+
+ /**
+ * Adds all edges in edgeList to me. If I have the same edge as `other`, that entry will be the union of both sets of readIDs.
+ */
+ public void unionUpdate(EdgeListWritable other) {
+ // TODO test this function properly
+ // TODO perhaps there's a more efficient way to do this?
+ HashMap<VKmerBytesWritable, PositionListWritable> unionEdges = new HashMap<VKmerBytesWritable, PositionListWritable>(edges.size() + other.edges.size());
+
+ for (EdgeWritable e : edges) {
+ VKmerBytesWritable key = e.getKey();
+ if (unionEdges.containsKey(key)) {
+ unionEdges.get(key).unionUpdate(e.getReadIDs());
+ }
+ else {
+ unionEdges.put(key, new PositionListWritable(e.getReadIDs())); // make a new copy of their list
+ }
+ }
+ for (EdgeWritable e : other.edges) {
+ VKmerBytesWritable key = e.getKey();
+ if (unionEdges.containsKey(key)) {
+ unionEdges.get(key).unionUpdate(e.getReadIDs());
+ }
+ else {
+ unionEdges.put(key, new PositionListWritable(e.getReadIDs())); // make a new copy of their list
+ }
+ }
+ edges.clear();
+ for (VKmerBytesWritable key : unionEdges.keySet()) {
+ edges.add(new EdgeWritable(key, unionEdges.get(key)));
+ }
+ }
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
index 1408e60..b647939 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
@@ -45,6 +45,11 @@
}
}
+ public PositionListWritable(PositionListWritable other) {
+ this();
+ set(other);
+ }
+
public void setNewReference(byte[] data, int offset) {
this.valueCount = Marshal.getInt(data, offset);
this.storage = data;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
index 535792b..ad7585c 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
@@ -1,6 +1,7 @@
package edu.uci.ics.genomix.hadoop.contrailgraphbuilding;
import java.io.IOException;
+import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -12,6 +13,8 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.EdgeListWritable;
+import edu.uci.ics.genomix.type.EdgeWritable;
import edu.uci.ics.genomix.type.NodeWritable.DirectionFlag;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.genomix.type.VKmerListWritable;
@@ -146,29 +149,38 @@
switch(curKmerDir){
case FORWARD:
switch(preKmerDir){
- case FORWARD:
- edgeListForPreKmer.reset();
- edgeListForPreKmer.append(preForwardKmer);
- outputNode.setEdgeList(DirectionFlag.DIR_RR, edgeListForPreKmer);
+ case FORWARD:
+// //TODO NOTE this is one way to fix these entries... or you can refactor your code to have these edgeList and edges sitting around
+// edgeListForPreKmer.reset();
+// edgeListForPreKmer.append(preForwardKmer);
+// outputNode.setEdgeList(DirectionFlag.DIR_RR, edgeListForPreKmer);
+// outputNode.setThreadList(DirectionFlag.DIR_RR, nodeIdList);
+ outputNode.setEdgeList(DirectionFlag.DIR_RR, new EdgeListWritable(Arrays.asList(new EdgeWritable(preForwardKmer, nodeIdList))));
break;
case REVERSE:
- edgeListForPreKmer.reset();
- edgeListForPreKmer.append(preReverseKmer);
- outputNode.setEdgeList(DirectionFlag.DIR_RF, edgeListForPreKmer);
+// edgeListForPreKmer.reset();
+// edgeListForPreKmer.append(preReverseKmer);
+// outputNode.setEdgeList(DirectionFlag.DIR_RF, edgeListForPreKmer);
+// outputNode.setThreadList(DirectionFlag.DIR_RF, nodeIdList);
+ outputNode.setEdgeList(DirectionFlag.DIR_RF, new EdgeListWritable(Arrays.asList(new EdgeWritable(preReverseKmer, nodeIdList))));
break;
}
break;
case REVERSE:
switch(preKmerDir){
case FORWARD:
- edgeListForPreKmer.reset();
- edgeListForPreKmer.append(preForwardKmer);
- outputNode.setEdgeList(DirectionFlag.DIR_FR, edgeListForPreKmer);
+// edgeListForPreKmer.reset();
+// edgeListForPreKmer.append(preForwardKmer);
+// outputNode.setEdgeList(DirectionFlag.DIR_FR, edgeListForPreKmer);
+// outputNode.setThreadList(DirectionFlag.DIR_FR, nodeIdList);
+ outputNode.setEdgeList(DirectionFlag.DIR_FR, new EdgeListWritable(Arrays.asList(new EdgeWritable(preForwardKmer, nodeIdList))));
break;
case REVERSE:
- edgeListForPreKmer.reset();
- edgeListForPreKmer.append(preReverseKmer);
- outputNode.setEdgeList(DirectionFlag.DIR_FF, edgeListForPreKmer);
+// edgeListForPreKmer.reset();
+// edgeListForPreKmer.append(preReverseKmer);
+// outputNode.setEdgeList(DirectionFlag.DIR_FF, edgeListForPreKmer);
+// outputNode.setThreadList(DirectionFlag.DIR_FF, nodeIdList);
+ outputNode.setEdgeList(DirectionFlag.DIR_FR, new EdgeListWritable(Arrays.asList(new EdgeWritable(preReverseKmer, nodeIdList))));
break;
}
break;
@@ -180,28 +192,36 @@
case FORWARD:
switch(nextKmerDir){
case FORWARD:
- edgeListForNextKmer.reset();
- edgeListForNextKmer.append(nextForwardKmer);
- outputNode.setEdgeList(DirectionFlag.DIR_FF, edgeListForNextKmer);
+// edgeListForNextKmer.reset();
+// edgeListForNextKmer.append(nextForwardKmer);
+// outputNode.setEdgeList(DirectionFlag.DIR_FF, edgeListForNextKmer);
+// outputNode.setThreadList(DirectionFlag.DIR_FF, nodeIdList);
+ outputNode.setEdgeList(DirectionFlag.DIR_FF, new EdgeListWritable(Arrays.asList(new EdgeWritable(nextForwardKmer, nodeIdList))));
break;
case REVERSE:
- edgeListForNextKmer.reset();
- edgeListForNextKmer.append(nextReverseKmer);
- outputNode.setEdgeList(DirectionFlag.DIR_FR, edgeListForNextKmer);
+// edgeListForNextKmer.reset();
+// edgeListForNextKmer.append(nextReverseKmer);
+// outputNode.setEdgeList(DirectionFlag.DIR_FR, edgeListForNextKmer);
+// outputNode.setThreadList(DirectionFlag.DIR_FR, nodeIdList);
+ outputNode.setEdgeList(DirectionFlag.DIR_FR, new EdgeListWritable(Arrays.asList(new EdgeWritable(nextReverseKmer, nodeIdList))));
break;
}
break;
case REVERSE:
switch(nextKmerDir){
case FORWARD:
- edgeListForNextKmer.reset();
- edgeListForNextKmer.append(nextForwardKmer);
- outputNode.setEdgeList(DirectionFlag.DIR_RF, edgeListForNextKmer);
+// edgeListForNextKmer.reset();
+// edgeListForNextKmer.append(nextForwardKmer);
+// outputNode.setEdgeList(DirectionFlag.DIR_RF, edgeListForNextKmer);
+// outputNode.setThreadList(DirectionFlag.DIR_RF, nodeIdList);
+ outputNode.setEdgeList(DirectionFlag.DIR_RF, new EdgeListWritable(Arrays.asList(new EdgeWritable(nextForwardKmer, nodeIdList))));
break;
case REVERSE:
- edgeListForNextKmer.reset();
- edgeListForNextKmer.append(nextReverseKmer);
- outputNode.setEdgeList(DirectionFlag.DIR_RR, edgeListForNextKmer);
+// edgeListForNextKmer.reset();
+// edgeListForNextKmer.append(nextReverseKmer);
+// outputNode.setEdgeList(DirectionFlag.DIR_RR, edgeListForNextKmer);
+// outputNode.setThreadList(DirectionFlag.DIR_RR, nodeIdList);
+ outputNode.setEdgeList(DirectionFlag.DIR_RR, new EdgeListWritable(Arrays.asList(new EdgeWritable(nextReverseKmer, nodeIdList))));
break;
}
break;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
index 7ce1ce9..4a669c3 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
@@ -37,10 +37,12 @@
averageCoverage = 0;
while (values.hasNext()) {
- tmpNode.set(values.next());
+ tmpNode.setAsCopy(values.next());
for (byte d: DirectionFlag.values) {
+ // TODO NOTE why a *unionupdate* on the edges here? why not just a simple append?
+// outputNode.getEdgeList(d).unionUpdate(tmpNode.getEdgeList(d));
+// outputNode.getThreadList(d).unionUpdate(tmpNode.getThreadList(d));
outputNode.getEdgeList(d).unionUpdate(tmpNode.getEdgeList(d));
- outputNode.getThreadList(d).unionUpdate(tmpNode.getThreadList(d));
}
outputNode.getStartReads().unionUpdate(tmpNode.getStartReads());
outputNode.getEndReads().unionUpdate(tmpNode.getEndReads());
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java
index fb343e9..8531326 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java
@@ -10,6 +10,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
+import edu.uci.ics.genomix.type.EdgeWritable;
import edu.uci.ics.genomix.type.NodeWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.genomix.type.NodeWritable.DirectionFlag;
@@ -68,30 +69,30 @@
public static String convertEdgeToGraph(String outputNode, NodeWritable value) {
String outputEdge = "";
- Iterator<VKmerBytesWritable> kmerIterator;
- kmerIterator = value.getEdgeList(DirectionFlag.DIR_FF).iterator();
- while (kmerIterator.hasNext()) {
- VKmerBytesWritable edge = kmerIterator.next();
- outputEdge += outputNode + " -> " + edge.toString() + "[color = \"black\" label =\"FF: "
- + value.getThreadList(DirectionFlag.DIR_FF).printReadIdSet() + "\"]\n";
+ Iterator<EdgeWritable> edgeIterator;
+ edgeIterator = value.getEdgeList(DirectionFlag.DIR_FF).iterator();
+ while(edgeIterator.hasNext()){
+ EdgeWritable edge = edgeIterator.next();
+ outputEdge += outputNode + " -> " + edge.getKey().toString() + "[color = \"black\" label =\"FF: " +
+ edge.getReadIDs().toString() + "\"]\n";
}
- kmerIterator = value.getEdgeList(DirectionFlag.DIR_FR).iterator();
- while (kmerIterator.hasNext()) {
- VKmerBytesWritable edge = kmerIterator.next();
- outputEdge += outputNode + " -> " + edge.toString() + "[color = \"blue\" label =\"FR: "
- + value.getThreadList(DirectionFlag.DIR_FR).printReadIdSet() + "\"]\n";
+ edgeIterator = value.getEdgeList(DirectionFlag.DIR_FR).iterator();
+ while(edgeIterator.hasNext()){
+ EdgeWritable edge = edgeIterator.next();
+ outputEdge += outputNode + " -> " + edge.toString() + "[color = \"blue\" label =\"FR: " +
+ edge.getReadIDs().toString() + "\"]\n";
}
- kmerIterator = value.getEdgeList(DirectionFlag.DIR_RF).iterator();
- while (kmerIterator.hasNext()) {
- VKmerBytesWritable edge = kmerIterator.next();
- outputEdge += outputNode + " -> " + edge.toString() + "[color = \"green\" label =\"RF: "
- + value.getThreadList(DirectionFlag.DIR_RF).printReadIdSet() + "\"]\n";
+ edgeIterator = value.getEdgeList(DirectionFlag.DIR_RF).iterator();
+ while(edgeIterator.hasNext()){
+ EdgeWritable edge = edgeIterator.next();
+ outputEdge += outputNode + " -> " + edge.toString() + "[color = \"green\" label =\"RF: " +
+ edge.getReadIDs().toString() + "\"]\n";
}
- kmerIterator = value.getEdgeList(DirectionFlag.DIR_RR).iterator();
- while (kmerIterator.hasNext()) {
- VKmerBytesWritable edge = kmerIterator.next();
- outputEdge += outputNode + " -> " + edge.toString() + "[color = \"red\" label =\"RR: "
- + value.getThreadList(DirectionFlag.DIR_RR).printReadIdSet() + "\"]\n";
+ edgeIterator = value.getEdgeList(DirectionFlag.DIR_RR).iterator();
+ while(edgeIterator.hasNext()){
+ EdgeWritable edge = edgeIterator.next();
+ outputEdge += outputNode + " -> " + edge.toString() + "[color = \"red\" label =\"RR: " +
+ edge.getReadIDs().toString() + "\"]\n";
}
//TODO should output actualKmer instead of kmer
if (outputEdge == "")
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/graph/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/graph/dataflow/ReadsKeyValueParserFactory.java
index 448d8b7..34675a7 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/graph/dataflow/ReadsKeyValueParserFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/graph/dataflow/ReadsKeyValueParserFactory.java
@@ -139,7 +139,7 @@
curForwardKmer.setAsCopy(nextForwardKmer);
curReverseKmer.setAsCopy(nextReverseKmer);
curKmerDir = nextKmerDir;
- curNode.set(nextNode);
+ curNode.setAsCopy(nextNode);
nextNode.reset();
nextNode.setAvgCoverage(1);
nextKmerDir = setNextKmer(nextForwardKmer, nextReverseKmer, array[i]);