Passed test
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java
index 3e42d0b..a775d7e 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java
@@ -80,7 +80,7 @@
nextNodeEntry = new NodeReference(kmerSize);
nextNextNodeEntry = new NodeReference(0);
cachePositionList = new PositionListWritable();
- LAST_POSITION_ID = inputRecDesc.getFieldCount() - InputInfoFieldStart;
+ LAST_POSITION_ID = (inputRecDesc.getFieldCount() - InputInfoFieldStart) / 2;
}
@Override
@@ -119,47 +119,45 @@
setReverseIncomingList(curNodeEntry,
offsetPoslist + accessor.getFieldStartOffset(tIndex, InputInfoFieldStart + 1));
}
- for (int i = InputInfoFieldStart + 2; i < accessor.getFieldCount(); i += 2) {
- // next Node
- if (i + 2 < accessor.getFieldCount()) {
- setForwardOutgoingList(curNodeEntry, offsetPoslist + accessor.getFieldStartOffset(tIndex, i + 2));
- resetNode(nextNodeEntry, readID, (byte) (1 + (i - InputInfoFieldStart) / 2));
- setKmer(nextNodeEntry.getKmer(), offsetPoslist + accessor.getFieldStartOffset(tIndex, i + 2));
- setReverseOutgoingList(nextNodeEntry, offsetPoslist + accessor.getFieldStartOffset(tIndex, i + 1));
- if (nextNodeEntry.getNodeID().getPosInRead() == LAST_POSITION_ID) {
- setReverseIncomingList(nextNodeEntry,
- offsetPoslist + accessor.getFieldStartOffset(tIndex, i + 3));
- }
- } else {
- resetNode(nextNodeEntry, readID, (byte) 0);
- }
- // nextNext node
- if (i + 4 < accessor.getFieldCount()) {
- setForwardOutgoingList(nextNodeEntry, offsetPoslist + accessor.getFieldStartOffset(tIndex, i + 4));
- resetNode(nextNextNodeEntry, readID, (byte) (2 + (i - InputInfoFieldStart) / 2));
- setReverseOutgoingList(nextNextNodeEntry,
- offsetPoslist + accessor.getFieldStartOffset(tIndex, i + 3));
- if (nextNextNodeEntry.getNodeID().getPosInRead() == LAST_POSITION_ID) {
- setReverseIncomingList(nextNextNodeEntry,
- offsetPoslist + accessor.getFieldStartOffset(tIndex, i + 5));
- }
- } else {
- resetNode(nextNextNodeEntry, readID, (byte) 0);
- }
- if (curNodeEntry.inDegree() > 1 || curNodeEntry.outDegree() > 1 || nextNodeEntry.inDegree() > 1
- || nextNodeEntry.outDegree() > 1 || nextNextNodeEntry.inDegree() > 1
- || nextNextNodeEntry.outDegree() > 1) {
+ // next Node
+ readNodesInfo(tIndex, readID, curNodeEntry, nextNodeEntry, InputInfoFieldStart);
+
+ for (int i = InputInfoFieldStart + 2; i < accessor.getFieldCount(); i += 2) {
+ readNodesInfo(tIndex, readID, nextNodeEntry, nextNextNodeEntry, i);
+
+ if (curNodeEntry.inDegree() > 1 || curNodeEntry.outDegree() > 0 || nextNodeEntry.inDegree() > 0
+ || nextNodeEntry.outDegree() > 0 || nextNextNodeEntry.inDegree() > 0
+ || nextNextNodeEntry.outDegree() > 0) {
connect(curNodeEntry, nextNodeEntry);
outputNode(curNodeEntry);
curNodeEntry.set(nextNodeEntry);
+ nextNodeEntry.set(nextNextNodeEntry);
continue;
}
curNodeEntry.mergeForwadNext(nextNodeEntry, kmerSize);
+ nextNodeEntry.set(nextNextNodeEntry);
}
outputNode(curNodeEntry);
}
+ private void readNodesInfo(int tIndex, int readID, NodeReference curNode, NodeReference nextNode, int curFieldID) {
+ // nextNext node
+ int offsetPoslist = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
+ if (curFieldID + 2 < accessor.getFieldCount()) {
+ setForwardOutgoingList(curNode, offsetPoslist + accessor.getFieldStartOffset(tIndex, curFieldID + 2));
+ resetNode(nextNode, readID, (byte) (1 + (curFieldID + 2 - InputInfoFieldStart) / 2));
+ setKmer(nextNode.getKmer(), offsetPoslist + accessor.getFieldStartOffset(tIndex, curFieldID + 2));
+ setReverseOutgoingList(nextNode, offsetPoslist + accessor.getFieldStartOffset(tIndex, curFieldID + 1));
+ if (nextNode.getNodeID().getPosInRead() == LAST_POSITION_ID) {
+ setReverseIncomingList(nextNode,
+ offsetPoslist + accessor.getFieldStartOffset(tIndex, curFieldID + 3));
+ }
+ } else {
+ resetNode(nextNode, readID, (byte) 0);
+ }
+ }
+
private void setKmer(KmerBytesWritable kmer, int offset) {
ByteBuffer buffer = accessor.getBuffer();
int length = buffer.getInt(offset);
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java
index 06fcd8e..201be03 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java
@@ -99,19 +99,27 @@
plist.setNewReference(posCount, buffer, fieldOffset);
fieldOffset += plist.getLength();
- int kmerbytes = Marshal.getInt(buffer, fieldOffset);
- if (kmer.getLength() != kmerbytes) {
- throw new IllegalArgumentException("kmerlength is invalid");
+ int posInRead = (i + 1) / 2;
+ if (i % 2 == 0) {
+ posInRead = -posInRead;
}
- fieldOffset += 4;
- kmer.setNewReference(buffer, fieldOffset);
- fieldOffset += kmer.getLength();
+ String kmerString = "";
+ if (posInRead > 0) {
+ int kmerbytes = Marshal.getInt(buffer, fieldOffset);
+ if (kmer.getLength() != kmerbytes) {
+ throw new IllegalArgumentException("kmerlength is invalid");
+ }
+ fieldOffset += 4;
+ kmer.setNewReference(buffer, fieldOffset);
+ fieldOffset += kmer.getLength();
+ kmerString = kmer.toString();
+ }
- output.write(Integer.toString(i - 1).getBytes());
- output.writeByte(' ');
+ output.write(Integer.toString(posInRead).getBytes());
+ output.writeByte('\t');
output.write(plist.toString().getBytes());
- output.writeByte(' ');
- output.write(kmer.toString().getBytes());
+ output.writeByte('\t');
+ output.write(kmerString.getBytes());
output.writeByte('\t');
}
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java
index c399603..968123e 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java
@@ -76,13 +76,17 @@
tuple.getFieldData(MapKmerPositionToReadOperator.OutputOtherReadIDListField),
tuple.getFieldStart(MapKmerPositionToReadOperator.OutputOtherReadIDListField));
- if (kmer.getLength() > tuple
- .getFieldLength(ReadsKeyValueParserFactory.OutputKmerField)) {
- throw new IllegalArgumentException("Not enough kmer bytes");
+ String kmerString = "";
+ if (posInRead > 0) {
+ if (kmer.getLength() > tuple
+ .getFieldLength(ReadsKeyValueParserFactory.OutputKmerField)) {
+ throw new IllegalArgumentException("Not enough kmer bytes");
+ }
+ kmer.setNewReference(
+ tuple.getFieldData(MapKmerPositionToReadOperator.OutputKmerField),
+ tuple.getFieldStart(MapKmerPositionToReadOperator.OutputKmerField));
+ kmerString = kmer.toString();
}
- kmer.setNewReference(
- tuple.getFieldData(MapKmerPositionToReadOperator.OutputKmerField),
- tuple.getFieldStart(MapKmerPositionToReadOperator.OutputKmerField));
output.write(Integer.toString(readID).getBytes());
output.writeByte('\t');
@@ -90,7 +94,7 @@
output.writeByte('\t');
output.write(plist.toString().getBytes());
output.writeByte('\t');
- output.write(kmer.toString().getBytes());
+ output.write(kmerString.getBytes());
output.writeByte('\n');
} catch (IOException e) {
throw new HyracksDataException(e);
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
index 58331b8..68258d7 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
@@ -59,11 +59,11 @@
@Test
public void TestAll() throws Exception {
- TestReader();
+// TestReader();
// TestGroupbyKmer();
// TestMapKmerToRead();
// TestGroupByReadID();
-// TestEndToEnd();
+ TestEndToEnd();
}
public void TestReader() throws Exception {
@@ -93,7 +93,7 @@
conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
driver.runJob(new GenomixJobConf(conf), Plan.OUTPUT_GROUPBY_READID, true);
- Assert.assertEquals(true, checkResults(EXPECTED_GROUPBYREADID, new int [] {2}));
+ Assert.assertEquals(true, checkResults(EXPECTED_GROUPBYREADID, new int [] {2,5,8,11,14,17,20,23}));
}
public void TestEndToEnd() throws Exception {
@@ -101,7 +101,7 @@
cleanUpReEntry();
conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
driver.runJob(new GenomixJobConf(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
- Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_NODE, new int[] {1,2}));
+ Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_NODE, new int[] {1,2,3,4}));
}
@Before
diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result_after_generateNode b/genomix/genomix-hyracks/src/test/resources/expected/result_after_generateNode
index 2988303..9334b95 100644
--- a/genomix/genomix-hyracks/src/test/resources/expected/result_after_generateNode
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result_after_generateNode
@@ -1,16 +1,14 @@
-((1,0) [] [(1,2)] AATAGA)
-((1,2) [(1,0)] [(6,0),(1,3)] TAGAA)
-((1,3) [(1,2)] [] AGAAG)
-((2,0) [] [(2,2)] AATAGA)
-((2,2) [(2,0)] [(6,0),(2,3)] TAGAA)
-((2,3) [(2,2)] [] AGAAG)
-((3,0) [] [(3,2)] AATAGA)
-((3,2) [(3,0)] [(6,0),(3,3)] TAGAA)
-((3,3) [(3,2)] [] AGAAG)
-((4,0) [] [(4,2)] AATAGA)
-((4,2) [(4,0)] [(6,0),(4,3)] TAGAA)
-((4,3) [(4,2)] [] AGAAG)
-((5,0) [] [(5,2)] AATAGA)
-((5,2) [(5,0)] [(6,0),(5,3)] TAGAA)
-((5,3) [(5,2)] [] AGAAG)
-((6,0) [(1,2),(2,2),(3,2),(5,2),(4,2)] [] AGAAGAAG)
+((1,1) [(1,3)] [] [] [] AATAGA)
+((1,3) [(6,1),(1,4)] [] [] [(1,1)] TAGAA)
+((1,4) [(6,2)] [] [] [(1,3)] AGAAG)
+((2,1) [] [] [] [] AATAGCTT)
+((3,1) [(3,3)] [] [] [] AATAGA)
+((3,3) [(6,1),(3,4)] [] [] [(3,1)] TAGAA)
+((3,4) [(6,2)] [] [] [(3,3)] AGAAG)
+((4,1) [] [] [] [] AATAGCTT)
+((5,1) [(5,3)] [] [] [] AATAGA)
+((5,3) [(6,1),(5,4)] [] [] [(5,1)] TAGAA)
+((5,4) [(6,2)] [] [] [(5,3)] AGAAG)
+((6,1) [(6,2)] [] [] [(5,3),(3,3),(1,3)] AGAAG)
+((6,2) [(6,3)] [] [] [(1,4),(3,4),(5,4),(6,1)] GAAGA)
+((6,3) [] [] [] [(6,2)] AAGAAG)
diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result_after_initial_read b/genomix/genomix-hyracks/src/test/resources/expected/result_after_initial_read
index 1091d2e..3502e95 100644
--- a/genomix/genomix-hyracks/src/test/resources/expected/result_after_initial_read
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result_after_initial_read
@@ -1,24 +1,48 @@
-AAGAA (6,2)
-AATAG (1,0)
-AATAG (2,0)
-AATAG (3,0)
-AATAG (4,0)
-AATAG (5,0)
-AGAAG (1,3)
-AGAAG (2,3)
-AGAAG (3,3)
-AGAAG (4,3)
-AGAAG (5,3)
-AGAAG (6,0)
-AGAAG (6,3)
-ATAGA (1,1)
-ATAGA (2,1)
-ATAGA (3,1)
-ATAGA (4,1)
-ATAGA (5,1)
-GAAGA (6,1)
-TAGAA (1,2)
-TAGAA (2,2)
-TAGAA (3,2)
-TAGAA (4,2)
-TAGAA (5,2)
+AAGAA (6,3)
+AAGCT (2,-4)
+AAGCT (4,-4)
+AATAG (1,1)
+AATAG (2,1)
+AATAG (3,1)
+AATAG (4,1)
+AATAG (5,1)
+AGAAG (1,4)
+AGAAG (3,4)
+AGAAG (5,4)
+AGAAG (6,1)
+AGAAG (6,4)
+AGCTA (2,-3)
+AGCTA (4,-3)
+AGCTT (2,4)
+AGCTT (4,4)
+ATAGA (1,2)
+ATAGA (3,2)
+ATAGA (5,2)
+ATAGC (2,2)
+ATAGC (4,2)
+CTATT (1,-1)
+CTATT (2,-1)
+CTATT (3,-1)
+CTATT (4,-1)
+CTATT (5,-1)
+CTTCT (1,-4)
+CTTCT (3,-4)
+CTTCT (5,-4)
+CTTCT (6,-1)
+CTTCT (6,-4)
+GAAGA (6,2)
+GCTAT (2,-2)
+GCTAT (4,-2)
+TAGAA (1,3)
+TAGAA (3,3)
+TAGAA (5,3)
+TAGCT (2,3)
+TAGCT (4,3)
+TCTAT (1,-2)
+TCTAT (3,-2)
+TCTAT (5,-2)
+TCTTC (6,-2)
+TTCTA (1,-3)
+TTCTA (3,-3)
+TTCTA (5,-3)
+TTCTT (6,-3)
diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmer2readId b/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmer2readId
index 2585102..b52a848 100644
--- a/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmer2readId
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmer2readId
@@ -1,24 +1,48 @@
-1 0 [] AATAG
-1 1 [] ATAGA
-1 2 [] TAGAA
-1 3 [(6,0)] AGAAG
-2 0 [] AATAG
-2 1 [] ATAGA
-2 2 [] TAGAA
-2 3 [(6,0)] AGAAG
-3 0 [] AATAG
-3 1 [] ATAGA
-3 2 [] TAGAA
-3 3 [(6,0)] AGAAG
-4 0 [] AATAG
-4 1 [] ATAGA
-4 2 [] TAGAA
-4 3 [(6,0)] AGAAG
-5 0 [] AATAG
-5 1 [] ATAGA
-5 2 [] TAGAA
-5 3 [(6,0)] AGAAG
-6 0 [(1,3),(2,3),(3,3),(5,3),(4,3)] AGAAG
-6 1 [] GAAGA
-6 2 [] AAGAA
-6 3 [] AGAAG
+1 -1 []
+1 -2 []
+1 -3 []
+1 -4 [(6,-1)]
+1 1 [] AATAG
+1 2 [] ATAGA
+1 3 [] TAGAA
+1 4 [(6,1)] AGAAG
+2 -1 []
+2 -2 []
+2 -3 []
+2 -4 []
+2 1 [] AATAG
+2 2 [] ATAGC
+2 3 [] TAGCT
+2 4 [] AGCTT
+3 -1 []
+3 -2 []
+3 -3 []
+3 -4 [(6,-1)]
+3 1 [] AATAG
+3 2 [] ATAGA
+3 3 [] TAGAA
+3 4 [(6,1)] AGAAG
+4 -1 []
+4 -2 []
+4 -3 []
+4 -4 []
+4 1 [] AATAG
+4 2 [] ATAGC
+4 3 [] TAGCT
+4 4 [] AGCTT
+5 -1 []
+5 -2 []
+5 -3 []
+5 -4 [(6,-1)]
+5 1 [] AATAG
+5 2 [] ATAGA
+5 3 [] TAGAA
+5 4 [(6,1)] AGAAG
+6 -1 [(1,-4),(3,-4),(5,-4)]
+6 -2 []
+6 -3 []
+6 -4 []
+6 1 [(1,4),(3,4),(5,4)] AGAAG
+6 2 [] GAAGA
+6 3 [] AAGAA
+6 4 [] AGAAG
diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmerAggregate b/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmerAggregate
index 499200a..9035f33 100644
--- a/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmerAggregate
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result_after_kmerAggregate
@@ -1,6 +1,18 @@
-AAGAA [(6,2)]
-AATAG [(1,0),(2,0),(3,0),(4,0),(5,0)]
-AGAAG [(1,3),(2,3),(3,3),(4,3),(5,3),(6,0),(6,3)]
-ATAGA [(1,1),(2,1),(3,1),(4,1),(5,1)]
-GAAGA [(6,1)]
-TAGAA [(1,2),(2,2),(3,2),(4,2),(5,2)]
\ No newline at end of file
+AAGAA [(6,3)]
+AAGCT [(2,-4),(4,-4)]
+AATAG [(1,1),(2,1),(3,1),(4,1),(5,1)]
+AGAAG [(1,4),(3,4),(5,4),(6,1),(6,4)]
+AGCTA [(2,-3),(4,-3)]
+AGCTT [(2,4),(4,4)]
+ATAGA [(1,2),(3,2),(5,2)]
+ATAGC [(2,2),(4,2)]
+CTATT [(1,-1),(2,-1),(3,-1),(4,-1),(5,-1)]
+CTTCT [(1,-4),(3,-4),(5,-4),(6,-1),(6,-4)]
+GAAGA [(6,2)]
+GCTAT [(2,-2),(4,-2)]
+TAGAA [(1,3),(3,3),(5,3)]
+TAGCT [(2,3),(4,3)]
+TCTAT [(1,-2),(3,-2),(5,-2)]
+TCTTC [(6,-2)]
+TTCTA [(1,-3),(3,-3),(5,-3)]
+TTCTT [(6,-3)]
diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result_after_readIDAggreage b/genomix/genomix-hyracks/src/test/resources/expected/result_after_readIDAggreage
index 1cd4274..2cc283d 100644
--- a/genomix/genomix-hyracks/src/test/resources/expected/result_after_readIDAggreage
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result_after_readIDAggreage
@@ -1,6 +1,6 @@
-1 0 [] AATAG 1 [] ATAGA 2 [] TAGAA 3 [(6,0)] AGAAG
-2 0 [] AATAG 1 [] ATAGA 2 [] TAGAA 3 [(6,0)] AGAAG
-3 0 [] AATAG 1 [] ATAGA 2 [] TAGAA 3 [(6,0)] AGAAG
-4 0 [] AATAG 1 [] ATAGA 2 [] TAGAA 3 [(6,0)] AGAAG
-5 0 [] AATAG 1 [] ATAGA 2 [] TAGAA 3 [(6,0)] AGAAG
-6 0 [(1,3),(2,3),(3,3),(5,3),(4,3)] AGAAG 1 [] GAAGA 2 [] AAGAA 3 [] AGAAG
+1 1 [] AATAG -1 [] 2 [] ATAGA -2 [] 3 [] TAGAA -3 [] 4 [(6,1)] AGAAG -4 [(6,-1)]
+2 1 [] AATAG -1 [] 2 [] ATAGC -2 [] 3 [] TAGCT -3 [] 4 [] AGCTT -4 []
+3 1 [] AATAG -1 [] 2 [] ATAGA -2 [] 3 [] TAGAA -3 [] 4 [(6,1)] AGAAG -4 [(6,-1)]
+4 1 [] AATAG -1 [] 2 [] ATAGC -2 [] 3 [] TAGCT -3 [] 4 [] AGCTT -4 []
+5 1 [] AATAG -1 [] 2 [] ATAGA -2 [] 3 [] TAGAA -3 [] 4 [(6,1)] AGAAG -4 [(6,-1)]
+6 1 [(3,4),(1,4),(5,4)] AGAAG -1 [(1,-4),(3,-4),(5,-4)] 2 [] GAAGA -2 [] 3 [] AAGAA -3 [] 4 [] AGAAG -4 []