Add AverageCoverage to graph construction(hadoop) and make graphviz more pretty
diff --git a/genomix/genomix-hadoop/data/webmap/SplitOnce.txt b/genomix/genomix-hadoop/data/webmap/SplitOnce.txt
new file mode 100644
index 0000000..d8e2b7e
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/SplitOnce.txt
@@ -0,0 +1,2 @@
+1 AATAG
+2 GCATA
diff --git a/genomix/genomix-hadoop/data/webmap/SplitRepeat.txt b/genomix/genomix-hadoop/data/webmap/SplitTwice.txt
similarity index 100%
rename from genomix/genomix-hadoop/data/webmap/SplitRepeat.txt
rename to genomix/genomix-hadoop/data/webmap/SplitTwice.txt
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/HighSplitRepeat/HighSplitRepeat.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/HighSplitRepeat/HighSplitRepeat.txt
new file mode 100644
index 0000000..eca0a13
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/HighSplitRepeat/HighSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCACA
+2 GCACTTT
+3 CGCCGTC
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/LowSplitRepeat/LowSplitRepeat.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/LowSplitRepeat/LowSplitRepeat.txt
new file mode 100644
index 0000000..259fd80
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/LowSplitRepeat/LowSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCA
+2 AGCCG
+3 GCCTT
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/MidSplitRepeat/MidSplitRepeat.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/MidSplitRepeat/MidSplitRepeat.txt
new file mode 100644
index 0000000..e934e54
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/MidSplitRepeat/MidSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCA
+2 CGCCT
+3 GCCGG
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/Tips1/Tips1.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips1/Tips1.txt
new file mode 100644
index 0000000..1e16d68
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips1/Tips1.txt
@@ -0,0 +1,2 @@
+1 CAGCCA
+2 GCCGTA
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/Tips2/Tips2.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips2/Tips2.txt
new file mode 100644
index 0000000..8109730
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips2/Tips2.txt
@@ -0,0 +1,2 @@
+1 ACAGCG
+2 GGCGAA
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/Tips3/Tips3.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips3/Tips3.txt
new file mode 100644
index 0000000..a672034
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips3/Tips3.txt
@@ -0,0 +1,2 @@
+1 CAGCCT
+2 CAGCCA
diff --git a/genomix/genomix-hadoop/data/webmap/lastesttest/Tips4/Tips4.txt b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips4/Tips4.txt
new file mode 100644
index 0000000..499e8e6
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/lastesttest/Tips4/Tips4.txt
@@ -0,0 +1,2 @@
+1 CAGGCA
+2 CAGGCC
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
index 6951e8b..39a7535 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
@@ -93,6 +93,8 @@
setNodeId(mateId, readID, 1);
//set value.edgeList
setEdgeListForNextKmer();
+ //set coverage = 1
+ outputNode.setAvgCoverage(1);
//output mapper result
setMapperOutput(output);
@@ -107,6 +109,8 @@
//set value.edgeList
setEdgeListForPreKmer();
setEdgeListForNextKmer();
+ //set coverage = 1
+ outputNode.setAvgCoverage(1);
//output mapper result
setMapperOutput(output);
}
@@ -119,6 +123,8 @@
setNodeId(mateId, readID, array.length - KMER_SIZE + 1);
//set value.edgeList
setEdgeListForPreKmer();
+ //set coverage = 1
+ outputNode.setAvgCoverage(1);
//output mapper result
setMapperOutput(output);
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
index 4b79ea4..e8e41c4 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
@@ -19,6 +19,7 @@
public static int KMER_SIZE;
private NodeWritable outputNode;
private NodeWritable tmpNode;
+ private float averageCoverage;
@Override
public void configure(JobConf job) {
@@ -32,6 +33,7 @@
OutputCollector<VKmerBytesWritable, NodeWritable> output,
Reporter reporter) throws IOException {
outputNode.reset();
+ averageCoverage = 0;
while (values.hasNext()) {
tmpNode.set(values.next());
@@ -40,7 +42,9 @@
outputNode.getFRList().unionUpdate(tmpNode.getFRList());
outputNode.getRFList().unionUpdate(tmpNode.getRFList());
outputNode.getRRList().unionUpdate(tmpNode.getRRList());
+ averageCoverage += tmpNode.getAvgCoverage();
}
+ outputNode.setAvgCoverage(averageCoverage);
output.collect(key,outputNode);
}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestCase.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestCase.java
index 1916fc2..44f3168 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestCase.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestCase.java
@@ -11,6 +11,7 @@
import org.apache.hadoop.mapred.JobConf;
import org.junit.Test;
+import edu.uci.ics.genomix.hadoop.graph.GenerateGraphViz;
import edu.uci.ics.genomix.hadoop.pmcommon.HadoopMiniClusterTest;
@@ -59,10 +60,11 @@
- private void dumpResult() throws IOException {
+ private void dumpResult() throws Exception {
// Path src = new Path(RESULT_PATH);
// Path dest = new Path(RESULT_PATH);
// dfs.copyToLocalFile(src, dest);
HadoopMiniClusterTest.copyResultsToLocal(RESULT_PATH, RESULT_PATH + "/test.txt", false, conf, true, dfs);
+ GenerateGraphViz.convertGraphBuildingOutputToGraphViz(RESULT_PATH + "/test.txt.bindir", RESULT_PATH + "/graphviz");
}
}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
index dd9c05f..30e75cf 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
@@ -22,16 +22,23 @@
public class GraphBuildingTestSuite extends TestSuite{
private static int SIZE_KMER = 3;
- public static final String PreFix = "data/webmap/pathmerge_TestSet";
+ public static final String PreFix = "data/webmap/lastesttest";
public static final String[] TestDir = { PreFix + File.separator
- + "2", PreFix + File.separator
- + "3", PreFix + File.separator
- + "4", PreFix + File.separator
- + "5", PreFix + File.separator
- + "6", PreFix + File.separator
- + "7", PreFix + File.separator
- + "8", PreFix + File.separator
- + "9"};
+// + "2", PreFix + File.separator
+// + "3", PreFix + File.separator
+// + "4", PreFix + File.separator
+// + "5", PreFix + File.separator
+// + "6", PreFix + File.separator
+// + "7", PreFix + File.separator
+// + "8", PreFix + File.separator
+// + "9"};
+ + "HighSplitRepeat", PreFix + File.separator
+ + "LowSplitRepeat", PreFix + File.separator
+ + "MidSplitRepeat", PreFix + File.separator
+ + "Tips1", PreFix + File.separator
+ + "Tips2", PreFix + File.separator
+ + "Tips3", PreFix + File.separator
+ + "Tips4"};
private static JobConf conf = new JobConf();
private static final String ACTUAL_RESULT_DIR = "actual";
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
index 0c92afe..3788a27 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
@@ -7,7 +7,6 @@
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.ArrayUtils;
@@ -19,14 +18,10 @@
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.util.ReflectionUtils;
-import org.junit.After;
import org.junit.AfterClass;
-import org.junit.Before;
import org.junit.BeforeClass;
//import edu.uci.ics.genomix.hadoop.velvetgraphbuilding.GraphBuildingDriver;