Complete the Graph construction(Hadoop)
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeWritable.java
index 0cfaabd..4c60530 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeWritable.java
@@ -73,7 +73,7 @@
/**
* Set the internal readIDs when the given positionList has readid, position, and mateid set
*/
- private void setFromPositions(PositionListWritable otherPositions) {
+ public void setFromPositions(PositionListWritable otherPositions) {
readIDs.reset();
for (PositionWritable p : otherPositions) {
appendIDFromPosition(p);
@@ -212,15 +212,15 @@
StringBuilder sbuilder = new StringBuilder();
String delim = "";
long[] ids = readIDs.toReadIDArray();
+ sbuilder.append("[");
if(ids.length > 0){
Arrays.sort(ids);
- for(int i = 0; i < ids.length - 1; i++){
+ for(int i = 0; i < ids.length; i++){
sbuilder.append(delim).append(ids[i]);
delim = ",";
}
- sbuilder.append(ids.length);
}
- sbuilder.append("]}");
+ sbuilder.append("]");
return sbuilder.toString();
}
/**
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index bb6622a..a013c10 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -436,4 +436,8 @@
public boolean isSimpleOrTerminalPath() {
return isPathNode() || (inDegree() == 0 && outDegree() == 1) || (inDegree() == 1 && outDegree() == 0);
}
+
+ public boolean isStartReadOrEndRead(){
+ return startReads.getCountOfPosition() > 0 || endReads.getCountOfPosition() > 0;
+ }
}
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/BridgePath/BridgePath b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/BridgePath/BridgePath
index fff351a..a7b4eaa 100644
--- a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/BridgePath/BridgePath
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/BridgePath/BridgePath
@@ -1,3 +1,2 @@
1 AAATCGAAC
2 AAATCTAAC
-3 AAATCTAAC
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/RingPath/RingPath b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/RingPath/RingPath
index 0f92b6e..0ccdc06 100644
--- a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/RingPath/RingPath
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/RingPath/RingPath
@@ -1,2 +1,2 @@
1 TCTGGGCTCTG
-2 CTGGGCTCTG
+2 CTGGGCTCTT
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/SelfPath/SelfPath.txt b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/SelfPath/SelfPath.txt
new file mode 100644
index 0000000..f78abbb
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/SelfPath/SelfPath.txt
@@ -0,0 +1 @@
+1 GGCC
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/SelfPath/SelfPath.txt~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/SelfPath/SelfPath.txt~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/SelfPath/SelfPath.txt~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/ThreeDuplicate/BridgePath.txt b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/ThreeDuplicate/ThreeDuplicate.txt
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/ThreeDuplicate/BridgePath.txt
rename to genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/ThreeDuplicate/ThreeDuplicate.txt
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
index f8561ac..1e24211 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
@@ -43,8 +43,6 @@
private EdgeWritable edgeForNextKmer;
private EdgeListWritable edgeListForPreKmer;
private EdgeListWritable edgeListForNextKmer;
-// private VKmerListWritable edgeListForPreKmer;
-// private VKmerListWritable edgeListForNextKmer;
private NodeWritable outputNode;
private KmerDir preKmerDir;
@@ -155,25 +153,16 @@
case FORWARD:
switch(preKmerDir){
case FORWARD:
-// //TODO NOTE this is one way to fix these entries... or you can refactor your code to have these edgeList and edges sitting around
-// edgeListForPreKmer.reset();
-// edgeListForPreKmer.append(preForwardKmer);
-// outputNode.setEdgeList(DirectionFlag.DIR_RR, edgeListForPreKmer);
-// outputNode.setThreadList(DirectionFlag.DIR_RR, nodeIdList);
edgeListForPreKmer.reset();
edgeForPreKmer.setKey(preForwardKmer);
- edgeForPreKmer.setReadIDs(nodeIdList);
+ edgeForPreKmer.setFromPositions(nodeIdList);
edgeListForPreKmer.add(edgeForPreKmer);
outputNode.setEdgeList(DirectionFlag.DIR_RR, edgeListForPreKmer);
break;
case REVERSE:
-// edgeListForPreKmer.reset();
-// edgeListForPreKmer.append(preReverseKmer);
-// outputNode.setEdgeList(DirectionFlag.DIR_RF, edgeListForPreKmer);
-// outputNode.setThreadList(DirectionFlag.DIR_RF, nodeIdList);
edgeListForPreKmer.reset();
edgeForPreKmer.setKey(preReverseKmer);
- edgeForPreKmer.setReadIDs(nodeIdList);
+ edgeForPreKmer.setFromPositions(nodeIdList);
edgeListForPreKmer.add(edgeForPreKmer);
outputNode.setEdgeList(DirectionFlag.DIR_RF, edgeListForPreKmer);
break;
@@ -182,26 +171,18 @@
case REVERSE:
switch(preKmerDir){
case FORWARD:
-// edgeListForPreKmer.reset();
-// edgeListForPreKmer.append(preForwardKmer);
-// outputNode.setEdgeList(DirectionFlag.DIR_FR, edgeListForPreKmer);
-// outputNode.setThreadList(DirectionFlag.DIR_FR, nodeIdList);
edgeListForPreKmer.reset();
edgeForPreKmer.setKey(preForwardKmer);
- edgeForPreKmer.setReadIDs(nodeIdList);
+ edgeForPreKmer.setFromPositions(nodeIdList);
edgeListForPreKmer.add(edgeForPreKmer);
outputNode.setEdgeList(DirectionFlag.DIR_FR, edgeListForPreKmer);
break;
case REVERSE:
-// edgeListForPreKmer.reset();
-// edgeListForPreKmer.append(preReverseKmer);
-// outputNode.setEdgeList(DirectionFlag.DIR_FF, edgeListForPreKmer);
-// outputNode.setThreadList(DirectionFlag.DIR_FF, nodeIdList);
edgeListForPreKmer.reset();
edgeForPreKmer.setKey(preReverseKmer);
- edgeForPreKmer.setReadIDs(nodeIdList);
+ edgeForPreKmer.setFromPositions(nodeIdList);
edgeListForPreKmer.add(edgeForPreKmer);
- outputNode.setEdgeList(DirectionFlag.DIR_FR, edgeListForPreKmer);
+ outputNode.setEdgeList(DirectionFlag.DIR_FF, edgeListForPreKmer);
break;
}
break;
@@ -213,24 +194,16 @@
case FORWARD:
switch(nextKmerDir){
case FORWARD:
-// edgeListForNextKmer.reset();
-// edgeListForNextKmer.append(nextForwardKmer);
-// outputNode.setEdgeList(DirectionFlag.DIR_FF, edgeListForNextKmer);
-// outputNode.setThreadList(DirectionFlag.DIR_FF, nodeIdList);
edgeListForNextKmer.reset();
edgeForNextKmer.setKey(nextForwardKmer);
- edgeForNextKmer.setReadIDs(nodeIdList);
+ edgeForNextKmer.setFromPositions(nodeIdList);
edgeListForNextKmer.add(edgeForNextKmer);
outputNode.setEdgeList(DirectionFlag.DIR_FF, edgeListForNextKmer);
break;
case REVERSE:
-// edgeListForNextKmer.reset();
-// edgeListForNextKmer.append(nextReverseKmer);
-// outputNode.setEdgeList(DirectionFlag.DIR_FR, edgeListForNextKmer);
-// outputNode.setThreadList(DirectionFlag.DIR_FR, nodeIdList);
edgeListForNextKmer.reset();
edgeForNextKmer.setKey(nextReverseKmer);
- edgeForNextKmer.setReadIDs(nodeIdList);
+ edgeForNextKmer.setFromPositions(nodeIdList);
edgeListForNextKmer.add(edgeForNextKmer);
outputNode.setEdgeList(DirectionFlag.DIR_FR, edgeListForNextKmer);
break;
@@ -239,25 +212,17 @@
case REVERSE:
switch(nextKmerDir){
case FORWARD:
-// edgeListForNextKmer.reset();
-// edgeListForNextKmer.append(nextForwardKmer);
-// outputNode.setEdgeList(DirectionFlag.DIR_RF, edgeListForNextKmer);
-// outputNode.setThreadList(DirectionFlag.DIR_RF, nodeIdList);
edgeListForNextKmer.reset();
edgeForNextKmer.setKey(nextForwardKmer);
- edgeForNextKmer.setReadIDs(nodeIdList);
+ edgeForNextKmer.setFromPositions(nodeIdList);
edgeListForNextKmer.add(edgeForNextKmer);
new EdgeListWritable(Arrays.asList(new EdgeWritable(nextForwardKmer, nodeIdList)));
outputNode.setEdgeList(DirectionFlag.DIR_RF, edgeListForNextKmer);
break;
case REVERSE:
-// edgeListForNextKmer.reset();
-// edgeListForNextKmer.append(nextReverseKmer);
-// outputNode.setEdgeList(DirectionFlag.DIR_RR, edgeListForNextKmer);
-// outputNode.setThreadList(DirectionFlag.DIR_RR, nodeIdList);
edgeListForNextKmer.reset();
edgeForNextKmer.setKey(nextReverseKmer);
- edgeForNextKmer.setReadIDs(nodeIdList);
+ edgeForNextKmer.setFromPositions(nodeIdList);
edgeListForNextKmer.add(edgeForNextKmer);
outputNode.setEdgeList(DirectionFlag.DIR_RR, edgeListForNextKmer);
break;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
index 4a669c3..6bc4549 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
@@ -39,9 +39,6 @@
while (values.hasNext()) {
tmpNode.setAsCopy(values.next());
for (byte d: DirectionFlag.values) {
- // TODO NOTE why a *unionupdate* on the edges here? why not just a simple append?
-// outputNode.getEdgeList(d).unionUpdate(tmpNode.getEdgeList(d));
-// outputNode.getThreadList(d).unionUpdate(tmpNode.getThreadList(d));
outputNode.getEdgeList(d).unionUpdate(tmpNode.getEdgeList(d));
}
outputNode.getStartReads().unionUpdate(tmpNode.getStartReads());
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java
index 004df78..84a5361 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graph/GenerateGraphViz.java
@@ -49,7 +49,10 @@
outputEdge = convertEdgeToGraph(outputNode, value);
gv.addln(outputEdge);
/** add readIdSet **/
- outputNode += " [shape=record, label = \"<f0> " + key.toString()
+ String fillColor = "";
+ if(value.isStartReadOrEndRead())
+ fillColor = "fillcolor=\"grey\", style=\"filled\",";
+ outputNode += " [shape=record, " + fillColor + " label = \"<f0> " + key.toString()
+ "|<f1> " + value.getStartReads().printStartReadIdSet()
+ "|<f2> " + value.getEndReads().printEndReadIdSet()
+ "|<f3> " + value.getAvgCoverage() + "\"]\n";
@@ -80,19 +83,19 @@
edgeIterator = value.getEdgeList(DirectionFlag.DIR_FR).iterator();
while(edgeIterator.hasNext()){
EdgeWritable edge = edgeIterator.next();
- outputEdge += outputNode + " -> " + edge.toString() + "[color = \"blue\" label =\"FR: " +
+ outputEdge += outputNode + " -> " + edge.getKey().toString() + "[color = \"blue\" label =\"FR: " +
edge.printReadIdSet() + "\"]\n";
}
edgeIterator = value.getEdgeList(DirectionFlag.DIR_RF).iterator();
while(edgeIterator.hasNext()){
EdgeWritable edge = edgeIterator.next();
- outputEdge += outputNode + " -> " + edge.toString() + "[color = \"green\" label =\"RF: " +
+ outputEdge += outputNode + " -> " + edge.getKey().toString() + "[color = \"green\" label =\"RF: " +
edge.printReadIdSet() + "\"]\n";
}
edgeIterator = value.getEdgeList(DirectionFlag.DIR_RR).iterator();
while(edgeIterator.hasNext()){
EdgeWritable edge = edgeIterator.next();
- outputEdge += outputNode + " -> " + edge.toString() + "[color = \"red\" label =\"RR: " +
+ outputEdge += outputNode + " -> " + edge.getKey().toString() + "[color = \"red\" label =\"RR: " +
edge.printReadIdSet() + "\"]\n";
}
//TODO should output actualKmer instead of kmer
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
index e422a16..f9efcd9 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
@@ -24,20 +24,20 @@
private static int SIZE_KMER = 3;
public static final String PreFix = "data/webmap/pathmerge_TestSet";
public static final String[] TestDir = { PreFix + File.separator
- + "test"};
-// + "2", PreFix + File.separator
-// + "3", PreFix + File.separator
-// + "4", PreFix + File.separator
-// + "5", PreFix + File.separator
-// + "6", PreFix + File.separator
-// + "7", PreFix + File.separator
-// + "8", PreFix + File.separator
-// + "9", PreFix + File.separator
-// + "SimplePath", PreFix + File.separator
-// + "BridgePath", PreFix + File.separator
-// + "TreePath", PreFix + File.separator
-// + "CyclePath", PreFix + File.separator
-// + "ThreeDuplicate"};
+ + "2", PreFix + File.separator
+ + "3", PreFix + File.separator
+ + "4", PreFix + File.separator
+ + "5", PreFix + File.separator
+ + "6", PreFix + File.separator
+ + "7", PreFix + File.separator
+ + "8", PreFix + File.separator
+ + "9", PreFix + File.separator
+ + "SimplePath", PreFix + File.separator
+ + "BridgePath", PreFix + File.separator
+ + "TreePath", PreFix + File.separator
+ + "CyclePath", PreFix + File.separator
+ + "ThreeDuplicate", PreFix + File.separator
+ + "SelfPath"};
// + "HighSplitRepeat", PreFix + File.separator
// + "LowSplitRepeat", PreFix + File.separator
// + "MidSplitRepeat", PreFix + File.separator
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
index 39e5760..6db60f3 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
@@ -147,7 +147,6 @@
}
protected static boolean checkResults(String expectedPath, String actualPath, int[] poslistField) throws Exception {
- File dumped = new File(actualPath);
if (poslistField != null) {
// TestUtils.compareWithUnSortedPosition(new File(expectedPath), dumped, poslistField);
} else {