passed map kmer to readid operator
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
index 6cb2491..631fc5b 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
@@ -31,6 +31,7 @@
@SuppressWarnings("deprecation")
public class JobRunStepByStepTest {
private static final int KmerSize = 5;
+ private static final int ReadLength = 8;
private static final String ACTUAL_RESULT_DIR = "actual";
private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
@@ -41,6 +42,8 @@
private static final String EXPECTED_DIR = "src/test/resources/expected/";
private static final String EXPECTED_READER_RESULT = EXPECTED_DIR + "result_after_initial_read";
private static final String EXPECTED_OUPUT_KMER = EXPECTED_DIR + "result_after_kmerAggregate";
+ private static final String EXPECTED_KMER_TO_READID = EXPECTED_DIR + "result_after_kmer2readId";
+ private static final String EXPECTED_GROUPBYREADID = EXPECTED_DIR + "result_after_readIDAggreage";
private static final String DUMPED_RESULT = ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + "/merged.txt";
private static final String CONVERT_RESULT = DUMPED_RESULT + ".txt";
@@ -56,36 +59,40 @@
@Test
public void TestAll() throws Exception {
//TestReader();
- TestGroupbyKmer();
- // TestMapKmerToRead();
- // TestGroupByReadID();
+ //TestGroupbyKmer();
+ //TestMapKmerToRead();
+ TestGroupByReadID();
// TestEndToEnd();
}
public void TestReader() throws Exception {
+ cleanUpReEntry();
conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
driver.runJob(new GenomixJobConf(conf), Plan.CHECK_KMERREADER, true);
- Assert.assertEquals(true, checkResults(EXPECTED_READER_RESULT, false));
+ Assert.assertEquals(true, checkResults(EXPECTED_READER_RESULT, -1));
}
public void TestGroupbyKmer() throws Exception {
+ cleanUpReEntry();
conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
driver.runJob(new GenomixJobConf(conf), Plan.OUTPUT_KMERHASHTABLE, true);
- Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_KMER, true));
+ Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_KMER, 1));
}
public void TestMapKmerToRead() throws Exception {
+ cleanUpReEntry();
conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
driver.runJob(new GenomixJobConf(conf), Plan.OUTPUT_MAP_KMER_TO_READ, true);
- Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_KMER,false));
+ Assert.assertEquals(true, checkResults(EXPECTED_KMER_TO_READID, 2));
}
public void TestGroupByReadID() throws Exception {
+ cleanUpReEntry();
conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
- conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_EXTERNAL);
+ conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
driver.runJob(new GenomixJobConf(conf), Plan.OUTPUT_GROUPBY_READID, true);
- Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_KMER,false));
+ Assert.assertEquals(true, checkResults(EXPECTED_GROUPBYREADID, -1));
}
public void TestEndToEnd() throws Exception {
@@ -93,11 +100,11 @@
cleanUpReEntry();
conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_EXTERNAL);
driver.runJob(new GenomixJobConf(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
- Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_KMER,false));
+ Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_KMER, -1));
cleanUpReEntry();
conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
driver.runJob(new GenomixJobConf(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
- Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_KMER,false));
+ Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_KMER, -1));
}
@Before
@@ -112,6 +119,7 @@
FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
conf.setInt(GenomixJobConf.KMER_LENGTH, KmerSize);
+ conf.setInt(GenomixJobConf.READ_LENGTH, ReadLength);
driver = new Driver(edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.CC_HOST,
edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT, numPartitionPerMachine);
}
@@ -156,7 +164,7 @@
}
}
- private boolean checkResults(String expectedPath, boolean checkPos) throws Exception {
+ private boolean checkResults(String expectedPath, int poslistField) throws Exception {
File dumped = null;
String format = conf.get(GenomixJobConf.OUTPUT_FORMAT);
if (GenomixJobConf.OUTPUT_FORMAT_TEXT.equalsIgnoreCase(format)) {
@@ -202,8 +210,8 @@
dumped = new File(CONVERT_RESULT);
}
- if (checkPos) {
- TestUtils.compareWithUnSortedPosition(new File(expectedPath), dumped);
+ if (poslistField > 0) {
+ TestUtils.compareWithUnSortedPosition(new File(expectedPath), dumped, poslistField);
} else {
TestUtils.compareWithSortedResult(new File(expectedPath), dumped);
}
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java
index e97df1b..49e21ba 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java
@@ -61,7 +61,8 @@
}
}
- public static void compareWithUnSortedPosition(File expectedFile, File actualFile) throws Exception {
+ public static void compareWithUnSortedPosition(File expectedFile, File actualFile, int poslistField)
+ throws Exception {
BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
ArrayList<String> actualLines = new ArrayList<String>();
@@ -77,7 +78,7 @@
if (lineExpected == null) {
throw new Exception("Actual result changed at line " + num + ":\n< " + actualLine + "\n> ");
}
- if (!containStrings(lineExpected, actualLine)) {
+ if (!containStrings(lineExpected, actualLine, poslistField)) {
throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+ actualLine);
}
@@ -162,24 +163,32 @@
return true;
}
- private static boolean containStrings(String lineExpected, String actualLine) {
- String keyExp = lineExpected.split("\\\t")[0];
- String keyAct = actualLine.split("\\\t")[0];
- if (!keyAct.equals(keyExp)) {
+ private static boolean containStrings(String lineExpected, String actualLine, int poslistField) {
+ String[] fieldsExp = lineExpected.split("\\\t");
+ String[] fieldsAct = actualLine.split("\\\t");
+ if (fieldsAct.length != fieldsExp.length) {
return false;
}
+ for (int i = 0; i < fieldsAct.length; i++) {
+ if (i == poslistField) {
+ continue;
+ }
+ if (!fieldsAct[i].equals(fieldsExp[i])) {
+ return false;
+ }
+ }
ArrayList<String> posExp = new ArrayList<String>();
ArrayList<String> posAct = new ArrayList<String>();
- String valueExp = lineExpected.split("\\\t")[1];
+ String valueExp = lineExpected.split("\\\t")[poslistField];
String[] valuesExp = valueExp.substring(1, valueExp.length() - 1).split(",");
for (String str : valuesExp) {
posExp.add(str);
}
- String valueAct = actualLine.split("\\\t")[1];
+ String valueAct = actualLine.split("\\\t")[poslistField];
String[] valuesAct = valueAct.substring(1, valueAct.length() - 1).split(",");
for (String str : valuesAct) {
diff --git a/genomix/genomix-hyracks/src/test/resources/data/webmap/text.txt b/genomix/genomix-hyracks/src/test/resources/data/webmap/text.txt
index 08f0f95..13190dd 100755
--- a/genomix/genomix-hyracks/src/test/resources/data/webmap/text.txt
+++ b/genomix/genomix-hyracks/src/test/resources/data/webmap/text.txt
@@ -2,4 +2,5 @@
2 AATAGAAG
3 AATAGAAG
4 AATAGAAG
-5 AATAGAAG
\ No newline at end of file
+5 AATAGAAG
+6 AGAAGAAG
diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result_after_initial_read b/genomix/genomix-hyracks/src/test/resources/expected/result_after_initial_read
index 728c093..1091d2e 100644
--- a/genomix/genomix-hyracks/src/test/resources/expected/result_after_initial_read
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result_after_initial_read
@@ -1,3 +1,4 @@
+AAGAA (6,2)
AATAG (1,0)
AATAG (2,0)
AATAG (3,0)
@@ -8,13 +9,16 @@
AGAAG (3,3)
AGAAG (4,3)
AGAAG (5,3)
+AGAAG (6,0)
+AGAAG (6,3)
ATAGA (1,1)
ATAGA (2,1)
ATAGA (3,1)
ATAGA (4,1)
ATAGA (5,1)
+GAAGA (6,1)
TAGAA (1,2)
TAGAA (2,2)
TAGAA (3,2)
TAGAA (4,2)
-TAGAA (5,2)
\ No newline at end of file
+TAGAA (5,2)
diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmer2readId b/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmer2readId
new file mode 100644
index 0000000..0ca9de6
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmer2readId
@@ -0,0 +1,24 @@
+1 0 [] AATAG
+1 1 [] ATAGA
+1 2 [] TAGAA
+1 3 [(6,0)] AGAAG
+2 0 [] AATAG
+2 1 [] ATAGA
+2 2 [] TAGAA
+2 3 [(6,0)] AGAAG
+3 0 [] AATAG
+3 1 [] ATAGA
+3 2 [] TAGAA
+3 3 [(6,0)] AGAAG
+4 0 [] AATAG
+4 1 [] ATAGA
+4 2 [] TAGAA
+4 3 [(6,0)] AGAAG
+5 0 [] AATAG
+5 1 [] ATAGA
+5 2 [] TAGAA
+5 3 [(6,0)] AGAAG
+6 0 [(1,3),(2,3),(3,3),(4,3),(5,3),(6,3)] AGAAG
+6 1 [] GAAGA
+6 2 [] AAGAA
+6 3 [(6,0)] AGAAG
\ No newline at end of file
diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmerAggregate b/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmerAggregate
index d5624d7..499200a 100644
--- a/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmerAggregate
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmerAggregate
@@ -1,4 +1,6 @@
+AAGAA [(6,2)]
AATAG [(1,0),(2,0),(3,0),(4,0),(5,0)]
-AGAAG [(1,3),(2,3),(3,3),(4,3),(5,3)]
+AGAAG [(1,3),(2,3),(3,3),(4,3),(5,3),(6,0),(6,3)]
ATAGA [(1,1),(2,1),(3,1),(4,1),(5,1)]
-TAGAA [(1,2),(2,2),(3,2),(4,2),(5,2)]
+GAAGA [(6,1)]
+TAGAA [(1,2),(2,2),(3,2),(4,2),(5,2)]
\ No newline at end of file
diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result_after_readIDAggreage b/genomix/genomix-hyracks/src/test/resources/expected/result_after_readIDAggreage
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result_after_readIDAggreage