Add more test cases for path merge
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/TestPathMergeH3.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/TestPathMergeH3.java
index d2f37bc..3f9a858 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/TestPathMergeH3.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/TestPathMergeH3.java
@@ -78,6 +78,24 @@
cleanUpOutput();
copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
buildGraph();
+
+ LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/tip_test1.txt";
+ GRAPHBUILD_FILE = "tip_test1.txt";
+ cleanUpOutput();
+ copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
+ buildGraph();
+
+ LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/fr_with_tip.txt";
+ GRAPHBUILD_FILE = "fr_with_tipcon.txt";
+ cleanUpOutput();
+ copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
+ buildGraph();
+
+ LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/walk_random_seq1.txt";
+ GRAPHBUILD_FILE = "walk_random_seq1.txt";
+ cleanUpOutput();
+ copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
+ buildGraph();
}
// @Test
@@ -109,7 +127,7 @@
FileOutputFormat.setOutputPath(buildConf, new Path(HDFS_GRAPHBUILD));
buildConf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
buildConf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
- driver.runJob(new GenomixJobConf(buildConf), Plan.BUILD_UNMERGED_GRAPH, true);
+ driver.runJob(new GenomixJobConf(buildConf), Plan.BUILD_DEBRUJIN_GRAPH, true);
String fileFormat = buildConf.get(GenomixJobConf.OUTPUT_FORMAT);
boolean resultsAreText = GenomixJobConf.OUTPUT_FORMAT_TEXT.equalsIgnoreCase(fileFormat);
copyResultsToLocal(HDFS_GRAPHBUILD, ACTUAL_ROOT + GRAPHBUILD_FILE, resultsAreText, buildConf);
diff --git a/genomix/genomix-hadoop/src/test/resources/data/sequence/fr_test.txt b/genomix/genomix-hadoop/src/test/resources/data/sequence/fr_test.txt
index b450bd5..4026c2c 100644
--- a/genomix/genomix-hadoop/src/test/resources/data/sequence/fr_test.txt
+++ b/genomix/genomix-hadoop/src/test/resources/data/sequence/fr_test.txt
@@ -1,3 +1,2 @@
-1 ACGTA
-2 AAATA
-
+1 AAACGTAT
+2 GGAATACG
diff --git a/genomix/genomix-hadoop/src/test/resources/data/sequence/fr_test2.txt b/genomix/genomix-hadoop/src/test/resources/data/sequence/fr_test2.txt
index ee05672..e166418 100644
--- a/genomix/genomix-hadoop/src/test/resources/data/sequence/fr_test2.txt
+++ b/genomix/genomix-hadoop/src/test/resources/data/sequence/fr_test2.txt
@@ -1,4 +1,3 @@
-1 AAAACGTAT
-2 GGGAATACG
-3 CGTATTCCC
-
+1 AAACGTAT
+2 CGTATTCC
+3 GGAATACG
diff --git a/genomix/genomix-hadoop/src/test/resources/data/sequence/fr_with_tip.txt b/genomix/genomix-hadoop/src/test/resources/data/sequence/fr_with_tip.txt
new file mode 100644
index 0000000..b6e1640
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/data/sequence/fr_with_tip.txt
@@ -0,0 +1,2 @@
+1 AACGTATA
+2 GGAATACG
diff --git a/genomix/genomix-hadoop/src/test/resources/data/sequence/tip_test1.txt b/genomix/genomix-hadoop/src/test/resources/data/sequence/tip_test1.txt
new file mode 100644
index 0000000..958ccff
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/resources/data/sequence/tip_test1.txt
@@ -0,0 +1,3 @@
+1 AATAGAAG
+2 ATAGACTA
+3 TAGACTAC
diff --git a/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/bubble_test1.txt.binmerge b/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/bubble_test1.txt.binmerge
index 9c735d8..fd7e309 100755
--- a/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/bubble_test1.txt.binmerge
+++ b/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/bubble_test1.txt.binmerge
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/tworeads.txt b/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/tworeads.txt
index 036bbfb..926e39c 100644
--- a/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/tworeads.txt
+++ b/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/tworeads.txt
@@ -1,24 +1,8 @@
-((2,1) [(2,2)] [] [] [] AATAG) (null)
-((2,2) [(2,3)] [] [] [(2,1)] ATAGA) (null)
-((2,3) [(6,1),(2,4)] [] [] [(2,2)] TAGAA) (null)
-((2,4) [(6,2)] [] [] [(2,3)] AGAAG) (null)
-((4,1) [(4,2)] [] [] [] AATAG) (null)
-((4,2) [(4,3)] [] [] [(4,1)] ATAGA) (null)
-((4,3) [(6,1),(4,4)] [] [] [(4,2)] TAGAA) (null)
-((4,4) [(6,2)] [] [] [(4,3)] AGAAG) (null)
-((6,1) [(6,2)] [] [] [(2,3),(1,3),(3,3),(4,3),(5,3)] AGAAG) (null)
-((6,2) [(6,3)] [] [] [(2,4),(3,4),(1,4),(4,4),(5,4),(6,1)] GAAGA) (null)
-((6,3) [(6,4)] [] [] [(6,2)] AAGAA) (null)
-((6,4) [] [] [] [(6,3)] AGAAG) (null)
+((2,1) [(2,2)] [] [] [(1,3)] AGAAG) (null)
+((2,2) [(2,3)] [] [] [(1,4),(2,1)] GAAGC) (null)
+((2,3) [(2,4)] [] [] [(2,2)] AAGCC) (null)
+((2,4) [] [] [] [(2,3)] AGCCC) (null)
((1,1) [(1,2)] [] [] [] AATAG) (null)
((1,2) [(1,3)] [] [] [(1,1)] ATAGA) (null)
-((1,3) [(6,1),(1,4)] [] [] [(1,2)] TAGAA) (null)
-((1,4) [(6,2)] [] [] [(1,3)] AGAAG) (null)
-((3,1) [(3,2)] [] [] [] AATAG) (null)
-((3,2) [(3,3)] [] [] [(3,1)] ATAGA) (null)
-((3,3) [(6,1),(3,4)] [] [] [(3,2)] TAGAA) (null)
-((3,4) [(6,2)] [] [] [(3,3)] AGAAG) (null)
-((5,1) [(5,2)] [] [] [] AATAG) (null)
-((5,2) [(5,3)] [] [] [(5,1)] ATAGA) (null)
-((5,3) [(6,1),(5,4)] [] [] [(5,2)] TAGAA) (null)
-((5,4) [(6,2)] [] [] [(5,3)] AGAAG) (null)
+((1,3) [(2,1),(1,4)] [] [] [(1,2)] TAGAA) (null)
+((1,4) [(2,2)] [] [] [(1,3)] AGAAG) (null)
diff --git a/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/tworeads.txt.binmerge b/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/tworeads.txt.binmerge
index a6b5bdb..313834e 100755
--- a/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/tworeads.txt.binmerge
+++ b/genomix/genomix-hadoop/src/test/resources/expected/graphbuild-unmerged/tworeads.txt.binmerge
Binary files differ