add 3 SplitRepeat TestCases and fix the PositionListWitable: UnionUpdate HashSet
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
index 8de4b0e..881cbd6 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
@@ -92,7 +92,7 @@
int newSize = valueCount + otherList.valueCount;
HashSet<PositionWritable> uniqueElements = new HashSet<PositionWritable>(newSize);
for (PositionWritable pos : this) {
- uniqueElements.add(pos);
+ uniqueElements.add(new PositionWritable(pos));
}
for (PositionWritable pos : otherList) {
uniqueElements.add(pos);
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
index 03d66a6..bcdd423 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
@@ -32,6 +32,10 @@
set(mateId, readId, posId);
}
+ public PositionWritable(PositionWritable other) {
+ this();
+ set(other);
+ }
public PositionWritable(byte[] storage, int offset) {
setNewReference(storage, offset);
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
index 11b0f12..7e516fd 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
@@ -155,11 +155,6 @@
System.arraycopy(newData, offset + HEADER_SIZE, bytes, this.kmerStartOffset, bytesUsed);
}
- public void setAsCopy(int k, byte[] newData, int offset) {
-// int k = Marshal.getInt(newData, offset);
- reset(k);
- System.arraycopy(newData, offset, bytes, this.kmerStartOffset, bytesUsed);
- }
/**
* Point this datablock to the given bytes array It works like the pointer
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java
index c77fa70..f29c51b 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java
@@ -123,7 +123,7 @@
curReverseKmer.setByReadReverse(array, 0);
curKmerDir = curForwardKmer.compareTo(curReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
nextForwardKmer.setAsCopy(curForwardKmer);
- setNextKmer(nextForwardKmer, nextReverseKmer, nextKmerDir, array[kmerSize]);
+ nextKmerDir = setNextKmer(nextForwardKmer, nextReverseKmer, array[kmerSize]);
setnodeId(curNode, mateId, readID, 0);
setnodeId(nextNode, mateId, readID, 0);
setEdgeListForCurAndNextKmer(curKmerDir, curNode, nextKmerDir, nextNode);
@@ -134,9 +134,10 @@
for (; i < array.length; i++) {
curForwardKmer.setAsCopy(nextForwardKmer);
curReverseKmer.setAsCopy(nextReverseKmer);
+ curKmerDir = nextKmerDir;
curNode.set(nextNode);
nextNode.reset();
- setNextKmer(nextForwardKmer, nextReverseKmer, nextKmerDir, array[kmerSize]);
+ nextKmerDir = setNextKmer(nextForwardKmer, nextReverseKmer, array[i]);
setnodeId(nextNode, mateId, readID, 0);
setEdgeListForCurAndNextKmer(curKmerDir, curNode, nextKmerDir, nextNode);
writeToFrame(curForwardKmer, curReverseKmer, curKmerDir, curNode, writer);
@@ -153,11 +154,11 @@
node.setNodeIdList(nodeIdList);
}
- public void setNextKmer(KmerBytesWritable forwardKmer, KmerBytesWritable ReverseKmer, KmerDir nextKmerDir,
+ public KmerDir setNextKmer(KmerBytesWritable forwardKmer, KmerBytesWritable ReverseKmer,
byte nextChar) {
forwardKmer.shiftKmerWithNextChar(nextChar);
ReverseKmer.setByReadReverse(forwardKmer.toString().getBytes(), forwardKmer.getOffset());
- nextKmerDir = forwardKmer.compareTo(ReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
+ return forwardKmer.compareTo(ReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
}
public void writeToFrame(KmerBytesWritable forwardKmer, KmerBytesWritable reverseKmer, KmerDir curKmerDir,
@@ -167,7 +168,7 @@
InsertToFrame(forwardKmer, node, writer);
break;
case REVERSE:
- InsertToFrame(forwardKmer, node, writer);
+ InsertToFrame(reverseKmer, node, writer);
break;
}
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/AggregateKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/AggregateKmerAggregateFactory.java
index a484179..03b220e 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/AggregateKmerAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/AggregateKmerAggregateFactory.java
@@ -48,7 +48,6 @@
return new IAggregatorDescriptor() {
private NodeWritable readNode = new NodeWritable();
-// private KmerBytesWritable readKeyKmer = new KmerBytesWritable();
protected int getOffSet(IFrameTupleAccessor accessor, int tIndex, int fieldId) {
int tupleOffset = accessor.getTupleStartOffset(tIndex);
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/MergeKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/MergeKmerAggregateFactory.java
index aa3c9f6..47ae084 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/MergeKmerAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/aggregators/MergeKmerAggregateFactory.java
@@ -21,10 +21,8 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import edu.uci.ics.genomix.data.Marshal;
import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.NodeWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
@@ -53,7 +51,7 @@
return new IAggregatorDescriptor() {
private NodeWritable readNode = new NodeWritable();
-
+
protected int getOffSet(IFrameTupleAccessor accessor, int tIndex, int fieldId) {
int tupleOffset = accessor.getTupleStartOffset(tIndex);
int fieldStart = accessor.getFieldStartOffset(tIndex, fieldId);
@@ -73,9 +71,6 @@
localUniNode.reset();
readNode.setAsReference(accessor.getBuffer().array(), getOffSet(accessor, tIndex, 1));
localUniNode.getNodeIdList().unionUpdate(readNode.getNodeIdList());
-// VKmerBytesWritable a = new VKmerBytesWritable();
- // a.setAsCopy(readNode.getFFList().getPosition(0));
- // int kRequested = Marshal.getInt(readNode.getFFList().getByteArray(), readNode.getFFList().getStartOffset() + 4);
localUniNode.getFFList().unionUpdate(readNode.getFFList());
localUniNode.getFRList().unionUpdate(readNode.getFRList());
localUniNode.getRFList().unionUpdate(readNode.getRFList());
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/assembleKeyIntoNodeOperator.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/assembleKeyIntoNodeOperator.java
index e248c3b..f245c7a 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/assembleKeyIntoNodeOperator.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/assembleKeyIntoNodeOperator.java
@@ -98,11 +98,10 @@
private void generateNodeFromKmer(int tIndex) throws HyracksDataException {
int offsetPoslist = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
-
setKmer(readKmer, offsetPoslist + accessor.getFieldStartOffset(tIndex, InputKmerField));
readNode.reset();
setNode(readNode, offsetPoslist + accessor.getFieldStartOffset(tIndex, InputtempNodeField));
- readNode.getKmer().setAsCopy(readKmer.getKmerLength(), readKmer.getBytes(), readKmer.getOffset());
+ readNode.getKmer().setAsCopy(readKmer);
outputNode(readNode);
}
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
index bf87b23..1ade3a9 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
@@ -31,12 +31,12 @@
@SuppressWarnings("deprecation")
public class JobRun {
- private static final int KmerSize = 5;
+ private static final int KmerSize = 3;
private static final int ReadLength = 7;
private static final String ACTUAL_RESULT_DIR = "actual";
private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
- private static final String DATA_INPUT_PATH = "src/test/resources/data/webmap/test1.txt";
+ private static final String DATA_INPUT_PATH = "src/test/resources/data/lastesttest/HighSplitRepeat.txt";
private static final String HDFS_INPUT_PATH = "/webmap";
private static final String HDFS_OUTPUT_PATH = "/webmap_result";
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/HighSplitRepeat.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/HighSplitRepeat.txt
new file mode 100644
index 0000000..eca0a13
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/HighSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCACA
+2 GCACTTT
+3 CGCCGTC
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/LowSplitRepeat.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/LowSplitRepeat.txt
new file mode 100644
index 0000000..259fd80
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/LowSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCA
+2 AGCCG
+3 GCCTT
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/MidSplitRepeat.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/MidSplitRepeat.txt
new file mode 100644
index 0000000..e934e54
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/MidSplitRepeat.txt
@@ -0,0 +1,3 @@
+1 AGCCA
+2 CGCCT
+3 GCCGG