add test case for synthetic data
diff --git a/genomix/genomix-driver/src/main/java/edu/uci/ics/genomix/driver/GenomixDriver.java b/genomix/genomix-driver/src/main/java/edu/uci/ics/genomix/driver/GenomixDriver.java
index 3067afe..9c81534 100644
--- a/genomix/genomix-driver/src/main/java/edu/uci/ics/genomix/driver/GenomixDriver.java
+++ b/genomix/genomix-driver/src/main/java/edu/uci/ics/genomix/driver/GenomixDriver.java
@@ -102,7 +102,7 @@
key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
if (bw == null)
- bw = new BufferedWriter(new FileWriter(localDestDir + File.separator + "text"));
+ bw = new BufferedWriter(new FileWriter(localDestDir + File.separator + "data"));
while (reader.next(key, value)) {
if (key == null || value == null)
break;
@@ -238,13 +238,15 @@
public static void main(String[] args) throws CmdLineException, NumberFormatException, HyracksException, Exception {
String[] myArgs = { "-runLocal", "-kmerLength", "3",
- // "-localInput", "/home/wbiesing/code/hyracks/genomix/genomix-pregelix/data/input/reads/synthetic/",
- "-localInput", "/home/wbiesing/code/hyracks/genomix/genomix-pregelix/data/input/reads/pathmerge",
- "-localOutput", "output",
+ "-localInput", "../genomix-pregelix/data/input/reads/synthetic/",
+// "-localInput", "../genomix-pregelix/data/input/reads/pathmerge",
+// "-localInput", "/home/wbiesing/code/hyracks/genomix/genomix-pregelix/data/input/reads/test",
+// "-localInput", "output-build/bin",
+ "-localOutput", "output-both",
// "-pipelineOrder", "BUILD,MERGE",
// "-inputDir", "/home/wbiesing/code/hyracks/genomix/genomix-driver/graphbuild.binmerge",
// "-localInput", "../genomix-pregelix/data/TestSet/PathMerge/CyclePath/bin/part-00000",
- "-pipelineOrder", "BUILD" };
+ "-pipelineOrder", "BUILD,MERGE" };
GenomixJobConf conf = GenomixJobConf.fromArguments(myArgs);
GenomixDriver driver = new GenomixDriver();
driver.runGenomix(conf);
diff --git a/genomix/genomix-pregelix/data/TestSet/PathMerge/SmallRandom/synthetic1/bin/part-0 b/genomix/genomix-pregelix/data/TestSet/PathMerge/SmallRandom/synthetic1/bin/part-0
new file mode 100755
index 0000000..25c93c6
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/PathMerge/SmallRandom/synthetic1/bin/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/TestSet/PathMerge/SmallRandom/synthetic1/bin/part-1 b/genomix/genomix-pregelix/data/TestSet/PathMerge/SmallRandom/synthetic1/bin/part-1
new file mode 100755
index 0000000..b9f464a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/PathMerge/SmallRandom/synthetic1/bin/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/TestSet/PathMerge/SmallRandom/synthetic1/data b/genomix/genomix-pregelix/data/TestSet/PathMerge/SmallRandom/synthetic1/data
new file mode 100644
index 0000000..a6dff12
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TestSet/PathMerge/SmallRandom/synthetic1/data
@@ -0,0 +1,32 @@
+CAA {[{AAT:[20]}, {AAC:[5]}] [] [] [{AAT:[19]}, {AAC:[4,5,6]}, {TCA:[18,19,20]}, {GCA:[5,6]}] {5':[], ~5':[]} 7.0x}
+TAA {[{AAG:[34]}, {AAT:[26]}] [] [{TAA:[33,34,35]}] [{AAT:[27]}, {GTA:[26,27,28]}, {AAG:[33,34,35]}] {5':[], ~5':[(28-0_0)]} 9.0x}
+CCA {[{CAC:[37]}] [] [] [{GCC:[11,12]}, {CAG:[11,12,13]}] {5':[(37-0_0)], ~5':[]} 4.0x}
+TCA {[{CAC:[23]}, {CAA:[18,20]}, {CAG:[7]}] [{ATG:[29,30,31]}] [{GAC:[28,29,30,31,32]}, {GAA:[7,8,18,19,22,23,24]}] [{CAC:[22]}, {CAA:[19]}, {CAG:[6,8]}] {5':[(20-0_0)], ~5':[(24-0_0)]} 15.0x}
+CGA {[] [{CTC:[2]}] [{CGC:[1,2,33,35]}] [] {5':[], ~5':[(35-0_0),(33-0_0)]} 4.0x}
+CTA {[] [{CTA:[2,3,4]}, {ATA:[25,26]}] [{AGC:[2,3]}, {AGG:[3,4]}] [{ACT:[1,24,25,26]}] {5':[(2-0_0),(4-0_0)], ~5':[(1-0_0)]} 12.0x}
+CAC {[{ACG:[23,37]}, {ACA:[37]}] [] [] [{ACT:[1]}, {TCA:[22,23]}, {GCA:[1,2]}, {ACG:[21,22,36]}, {CCA:[37]}, {ACA:[36,37]}] {5':[], ~5':[]} 8.0x}
+CCC {[] [{AGG:[3,4]}] [] [{ACC:[3,4,5]}] {5':[], ~5':[]} 3.0x}
+CGC {[{GCA:[2,6]}] [{AGC:[1,2,3,6,32,33,35,36]}] [{CGA:[1,2,33,35]}, {CGC:[1,2,6]}] [{GCA:[1]}, {ACG:[31,32,36,37]}] {5':[(3-0_0)], ~5':[(31-0_0)]} 15.0x}
+CTC {[] [{GGA:[4,5]}, {CGA:[2]}, {AGA:[12,13,14]}] [{AGG:[2,4]}, {AGA:[13,14]}] [{ACT:[4,5]}] {5':[(12-0_0)], ~5':[(4-0_0)]} 7.0x}
+CAG {[{AGC:[7]}, {AGA:[9,10,11]}] [] [] [{AGC:[6]}, {AGA:[12,13]}, {TCA:[6,7,8]}, {GCA:[10,11]}, {CCA:[11,12,13]}] {5':[(9-0_0)], ~5':[(8-0_0),(11-0_0)]} 9.0x}
+CCG {[] [{ACG:[16,17]}] [] [{GCC:[15,16,17]}] {5':[], ~5':[(15-0_0)]} 3.0x}
+AAA {[{AAT:[8]}, {AAC:[17,24,25]}] [] [] [{AAT:[9]}, {AAC:[18,23]}, {GAA:[7,8,9,17,18,19,23,24]}] {5':[(25-0_0)], ~5':[(7-0_0)]} 9.0x}
+GAA {[{AAA:[8,17,19,24]}] [] [{TCA:[7,8,18,19,22,23,24]}] [{AAA:[7,9,18,23]}] {5':[(17-0_0)], ~5':[]} 9.0x}
+ACA {[{CAC:[37]}] [] [] [{CAC:[36,37]}] {5':[], ~5':[]} 2.0x}
+GCA {[{CAC:[2]}, {CAA:[5]}, {CAG:[10,11]}] [{ATG:[1,6,14,15,16]}] [{GCC:[10,11,12,15,16]}] [{AGC:[5,6,7]}, {CAC:[1]}, {CGC:[1,2,6]}, {CAA:[6]}] {5':[(14-0_0)], ~5':[]} 14.0x}
+AGA {[] [{ATC:[9,10,13,14,15]}, {CTC:[12,13,14]}] [{CTC:[13,14]}] [{CAG:[9,10,11,12,13]}] {5':[], ~5':[]} 9.0x}
+GGA {[] [{CTC:[4,5]}] [] [{AGG:[3,4,5]}] {5':[], ~5':[]} 3.0x}
+ATA {[] [{CTA:[25,26]}, {GTA:[20,21]}] [] [{AAT:[19,20,21,25,26,27]}] {5':[], ~5':[(19-0_0),(27-0_0)]} 6.0x}
+GTA {[{TAA:[26]}] [{ATA:[20,21]}] [{ACG:[20,21,22,27,28]}] [{TAA:[27,28]}] {5':[(26-0_0)], ~5':[(22-0_0)]} 6.0x}
+AAC {[{ACT:[24,25]}, {ACG:[16,17]}, {ACC:[5]}] [] [] [{ACT:[5]}, {ACG:[18]}, {ACC:[4]}, {CAA:[4,5,6]}, {AAA:[17,18,23,24,25]}] {5':[(16-0_0)], ~5':[(23-0_0),(6-0_0)]} 10.0x}
+GAC {[{ACG:[29]}] [] [{TCA:[28,29,30,31,32]}] [{ACG:[28,31,32]}] {5':[], ~5':[(30-0_0)]} 6.0x}
+ACC {[{CCC:[5]}] [] [] [{AAC:[4,5]}, {CCC:[3,4]}] {5':[], ~5':[]} 3.0x}
+GCC {[] [] [{GCA:[10,11,12,15,16]}] [{CCG:[15,16,17]}, {CCA:[11,12]}] {5':[], ~5':[(10-0_0)]} 6.0x}
+AGC {[{GCA:[5,7]}] [{CGC:[1,2,3,6,32,33,35,36]}] [{CTA:[2,3]}] [{GCA:[6]}, {AAG:[32,33,34,35,36]}, {CAG:[6,7]}] {5':[(6-0_0),(5-0_0)], ~5':[(34-0_0),(3-0_0)]} 14.0x}
+ATC {[] [{AGA:[9,10,13,14,15]}] [{ATG:[14,15]}] [{AAT:[8,9,10]}] {5':[(13-0_0)], ~5':[]} 6.0x}
+AAG {[{AGC:[32,34,36]}] [] [] [{TAA:[33,34,35]}, {AGC:[33,34,35]}] {5':[(32-0_0),(36-0_0)], ~5':[]} 6.0x}
+ACG {[{CGC:[37]}] [{CCG:[16,17]}, {ACG:[21,22,27,28,29]}] [{GTA:[20,21,22,27,28]}] [{GAC:[28,29,31,32]}, {CAC:[21,22,23,36,37]}, {CGC:[31,32,36]}, {AAC:[16,17,18]}] {5':[], ~5':[(18-0_0)]} 19.0x}
+AGG {[{GGA:[3,4,5]}] [{CCC:[3,4]}] [{CTA:[3,4]}, {CTC:[2,4]}] [] {5':[(5-0_0)], ~5':[(2-0_0)]} 6.0x}
+ATG {[] [{TCA:[29,30,31]}, {GCA:[1,6,14,15,16]}] [{ATC:[14,15]}, {ATG:[29,30,31]}] [{AAT:[1,6]}] {5':[], ~5':[(29-0_0)]} 11.0x}
+AAT {[{ATG:[1]}, {ATC:[8]}, {ATA:[20,21,26]}] [] [] [{TAA:[26,27]}, {ATG:[6]}, {ATC:[9,10]}, {CAA:[19,20]}, {AAA:[8,9]}, {ATA:[19,25,27]}] {5':[(21-0_0),(1-0_0)], ~5':[]} 11.0x}
+ACT {[{CTA:[24,25]}] [] [] [{CAC:[1]}, {AAC:[5,24,25]}, {CTA:[1,26]}, {CTC:[4,5]}] {5':[], ~5':[]} 6.0x}
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/combined_random_walks/bin/part-0 b/genomix/genomix-pregelix/data/input/graphs/synthetic/combined_random_walks/bin/part-0
new file mode 100755
index 0000000..25c93c6
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/combined_random_walks/bin/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/combined_random_walks/bin/part-1 b/genomix/genomix-pregelix/data/input/graphs/synthetic/combined_random_walks/bin/part-1
new file mode 100755
index 0000000..b9f464a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/combined_random_walks/bin/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/graphs/synthetic/combined_random_walks/text b/genomix/genomix-pregelix/data/input/graphs/synthetic/combined_random_walks/text
new file mode 100644
index 0000000..a6dff12
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/graphs/synthetic/combined_random_walks/text
@@ -0,0 +1,32 @@
+CAA {[{AAT:[20]}, {AAC:[5]}] [] [] [{AAT:[19]}, {AAC:[4,5,6]}, {TCA:[18,19,20]}, {GCA:[5,6]}] {5':[], ~5':[]} 7.0x}
+TAA {[{AAG:[34]}, {AAT:[26]}] [] [{TAA:[33,34,35]}] [{AAT:[27]}, {GTA:[26,27,28]}, {AAG:[33,34,35]}] {5':[], ~5':[(28-0_0)]} 9.0x}
+CCA {[{CAC:[37]}] [] [] [{GCC:[11,12]}, {CAG:[11,12,13]}] {5':[(37-0_0)], ~5':[]} 4.0x}
+TCA {[{CAC:[23]}, {CAA:[18,20]}, {CAG:[7]}] [{ATG:[29,30,31]}] [{GAC:[28,29,30,31,32]}, {GAA:[7,8,18,19,22,23,24]}] [{CAC:[22]}, {CAA:[19]}, {CAG:[6,8]}] {5':[(20-0_0)], ~5':[(24-0_0)]} 15.0x}
+CGA {[] [{CTC:[2]}] [{CGC:[1,2,33,35]}] [] {5':[], ~5':[(35-0_0),(33-0_0)]} 4.0x}
+CTA {[] [{CTA:[2,3,4]}, {ATA:[25,26]}] [{AGC:[2,3]}, {AGG:[3,4]}] [{ACT:[1,24,25,26]}] {5':[(2-0_0),(4-0_0)], ~5':[(1-0_0)]} 12.0x}
+CAC {[{ACG:[23,37]}, {ACA:[37]}] [] [] [{ACT:[1]}, {TCA:[22,23]}, {GCA:[1,2]}, {ACG:[21,22,36]}, {CCA:[37]}, {ACA:[36,37]}] {5':[], ~5':[]} 8.0x}
+CCC {[] [{AGG:[3,4]}] [] [{ACC:[3,4,5]}] {5':[], ~5':[]} 3.0x}
+CGC {[{GCA:[2,6]}] [{AGC:[1,2,3,6,32,33,35,36]}] [{CGA:[1,2,33,35]}, {CGC:[1,2,6]}] [{GCA:[1]}, {ACG:[31,32,36,37]}] {5':[(3-0_0)], ~5':[(31-0_0)]} 15.0x}
+CTC {[] [{GGA:[4,5]}, {CGA:[2]}, {AGA:[12,13,14]}] [{AGG:[2,4]}, {AGA:[13,14]}] [{ACT:[4,5]}] {5':[(12-0_0)], ~5':[(4-0_0)]} 7.0x}
+CAG {[{AGC:[7]}, {AGA:[9,10,11]}] [] [] [{AGC:[6]}, {AGA:[12,13]}, {TCA:[6,7,8]}, {GCA:[10,11]}, {CCA:[11,12,13]}] {5':[(9-0_0)], ~5':[(8-0_0),(11-0_0)]} 9.0x}
+CCG {[] [{ACG:[16,17]}] [] [{GCC:[15,16,17]}] {5':[], ~5':[(15-0_0)]} 3.0x}
+AAA {[{AAT:[8]}, {AAC:[17,24,25]}] [] [] [{AAT:[9]}, {AAC:[18,23]}, {GAA:[7,8,9,17,18,19,23,24]}] {5':[(25-0_0)], ~5':[(7-0_0)]} 9.0x}
+GAA {[{AAA:[8,17,19,24]}] [] [{TCA:[7,8,18,19,22,23,24]}] [{AAA:[7,9,18,23]}] {5':[(17-0_0)], ~5':[]} 9.0x}
+ACA {[{CAC:[37]}] [] [] [{CAC:[36,37]}] {5':[], ~5':[]} 2.0x}
+GCA {[{CAC:[2]}, {CAA:[5]}, {CAG:[10,11]}] [{ATG:[1,6,14,15,16]}] [{GCC:[10,11,12,15,16]}] [{AGC:[5,6,7]}, {CAC:[1]}, {CGC:[1,2,6]}, {CAA:[6]}] {5':[(14-0_0)], ~5':[]} 14.0x}
+AGA {[] [{ATC:[9,10,13,14,15]}, {CTC:[12,13,14]}] [{CTC:[13,14]}] [{CAG:[9,10,11,12,13]}] {5':[], ~5':[]} 9.0x}
+GGA {[] [{CTC:[4,5]}] [] [{AGG:[3,4,5]}] {5':[], ~5':[]} 3.0x}
+ATA {[] [{CTA:[25,26]}, {GTA:[20,21]}] [] [{AAT:[19,20,21,25,26,27]}] {5':[], ~5':[(19-0_0),(27-0_0)]} 6.0x}
+GTA {[{TAA:[26]}] [{ATA:[20,21]}] [{ACG:[20,21,22,27,28]}] [{TAA:[27,28]}] {5':[(26-0_0)], ~5':[(22-0_0)]} 6.0x}
+AAC {[{ACT:[24,25]}, {ACG:[16,17]}, {ACC:[5]}] [] [] [{ACT:[5]}, {ACG:[18]}, {ACC:[4]}, {CAA:[4,5,6]}, {AAA:[17,18,23,24,25]}] {5':[(16-0_0)], ~5':[(23-0_0),(6-0_0)]} 10.0x}
+GAC {[{ACG:[29]}] [] [{TCA:[28,29,30,31,32]}] [{ACG:[28,31,32]}] {5':[], ~5':[(30-0_0)]} 6.0x}
+ACC {[{CCC:[5]}] [] [] [{AAC:[4,5]}, {CCC:[3,4]}] {5':[], ~5':[]} 3.0x}
+GCC {[] [] [{GCA:[10,11,12,15,16]}] [{CCG:[15,16,17]}, {CCA:[11,12]}] {5':[], ~5':[(10-0_0)]} 6.0x}
+AGC {[{GCA:[5,7]}] [{CGC:[1,2,3,6,32,33,35,36]}] [{CTA:[2,3]}] [{GCA:[6]}, {AAG:[32,33,34,35,36]}, {CAG:[6,7]}] {5':[(6-0_0),(5-0_0)], ~5':[(34-0_0),(3-0_0)]} 14.0x}
+ATC {[] [{AGA:[9,10,13,14,15]}] [{ATG:[14,15]}] [{AAT:[8,9,10]}] {5':[(13-0_0)], ~5':[]} 6.0x}
+AAG {[{AGC:[32,34,36]}] [] [] [{TAA:[33,34,35]}, {AGC:[33,34,35]}] {5':[(32-0_0),(36-0_0)], ~5':[]} 6.0x}
+ACG {[{CGC:[37]}] [{CCG:[16,17]}, {ACG:[21,22,27,28,29]}] [{GTA:[20,21,22,27,28]}] [{GAC:[28,29,31,32]}, {CAC:[21,22,23,36,37]}, {CGC:[31,32,36]}, {AAC:[16,17,18]}] {5':[], ~5':[(18-0_0)]} 19.0x}
+AGG {[{GGA:[3,4,5]}] [{CCC:[3,4]}] [{CTA:[3,4]}, {CTC:[2,4]}] [] {5':[(5-0_0)], ~5':[(2-0_0)]} 6.0x}
+ATG {[] [{TCA:[29,30,31]}, {GCA:[1,6,14,15,16]}] [{ATC:[14,15]}, {ATG:[29,30,31]}] [{AAT:[1,6]}] {5':[], ~5':[(29-0_0)]} 11.0x}
+AAT {[{ATG:[1]}, {ATC:[8]}, {ATA:[20,21,26]}] [] [] [{TAA:[26,27]}, {ATG:[6]}, {ATC:[9,10]}, {CAA:[19,20]}, {AAA:[8,9]}, {ATA:[19,25,27]}] {5':[(21-0_0),(1-0_0)], ~5':[]} 11.0x}
+ACT {[{CTA:[24,25]}] [] [] [{CAC:[1]}, {AAC:[5,24,25]}, {CTA:[1,26]}, {CTC:[4,5]}] {5':[], ~5':[]} 6.0x}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeTestSuite.java
index a5ac42b..4d9bfc5 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeTestSuite.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeTestSuite.java
@@ -18,11 +18,14 @@
// "RingPath", "CyclePath",
// "SimpleTreePath", "ComplexTreePath", "HyracksGraphBuild"
// };
- String PreFix = "BubbleMerge_Input";
+// String PreFix = "BubbleMerge_Input";
+// String testSet[] = { PreFix + File.separator
+// + "SimpleRectangle", PreFix + File.separator
+// + "MediumRectangle", PreFix + File.separator
+// + "ComplexRectangle"};
+ String PreFix = "SmallRandom";
String testSet[] = { PreFix + File.separator
- + "SimpleRectangle", PreFix + File.separator
- + "MediumRectangle", PreFix + File.separator
- + "ComplexRectangle"};
+ + "synthetic1"};
// , PreFix + File.separator
// + "LtoL", PreFix + File.separator
// + "LtoR", PreFix + File.separator