passed genomix-data test
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
index d622657..7e578f6 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
@@ -68,6 +68,7 @@
}
public KmerBytesWritable(KmerBytesWritable right) {
+ this(right.kmerlength);
set(right);
}
@@ -196,6 +197,11 @@
}
}
+ public void setByRead(int k, byte[] array, int start) {
+ reset(k);
+ setByRead(array, start);
+ }
+
/**
* Compress Reversed Kmer into bytes array AATAG will compress as
* [0x000A,0xATAG]
@@ -224,6 +230,11 @@
}
}
+ public void setByReadReverse(int k, byte[] array, int start) {
+ reset(k);
+ setByReadReverse(array, start);
+ }
+
/**
* Shift Kmer to accept new char input
*
@@ -297,15 +308,15 @@
* @return the merged Kmer, this K of this Kmer is k+1
*/
public void mergeKmerWithNextCode(byte nextCode) {
- this.kmerlength +=1;
+ this.kmerlength += 1;
setSize(KmerUtil.getByteNumFromK(kmerlength));
- for(int i = getLength()-1; i>0; i--){
- bytes[offset + i] = bytes[offset + i-1];
- }
- if (kmerlength % 4 == 1) {
- getBytes()[offset] = (byte) (nextCode & 0x3);
+ if (kmerlength % 4 == 1) {
+ for (int i = getLength() - 1; i > 0; i--) {
+ bytes[offset + i] = bytes[offset + i - 1];
+ }
+ bytes[offset] = (byte) (nextCode & 0x3);
} else {
- getBytes()[offset] = (byte) (getBytes()[offset] | ((nextCode & 0x3) << ((getKmerLength() % 4) << 1)));
+ bytes[offset] = (byte) (bytes[offset] | ((nextCode & 0x3) << (((kmerlength-1) % 4) << 1)));
}
clearLeadBit();
}
@@ -378,5 +389,4 @@
static { // register this comparator
WritableComparator.define(KmerBytesWritable.class, new Comparator());
}
-
}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/VKmerBytesWritableFactoryTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerBytesWritableFactoryTest.java
similarity index 89%
rename from genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/VKmerBytesWritableFactoryTest.java
rename to genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerBytesWritableFactoryTest.java
index 6037805..54d5926 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/VKmerBytesWritableFactoryTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerBytesWritableFactoryTest.java
@@ -20,24 +20,14 @@
import edu.uci.ics.genomix.type.GeneCode;
import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.genomix.type.KmerBytesWritableFactory;
-public class VKmerBytesWritableFactoryTest {
+public class KmerBytesWritableFactoryTest {
static byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
KmerBytesWritableFactory kmerFactory = new KmerBytesWritableFactory(8);
@Test
- public void TestDegree() {
- Assert.assertTrue(GeneCode.inDegree((byte) 0xff) == 4);
- Assert.assertTrue(GeneCode.outDegree((byte) 0xff) == 4);
- Assert.assertTrue(GeneCode.inDegree((byte) 0x3f) == 2);
- Assert.assertTrue(GeneCode.outDegree((byte) 0x01) == 1);
- Assert.assertTrue(GeneCode.inDegree((byte) 0x01) == 0);
- }
-
- @Test
public void TestGetLastKmer() {
KmerBytesWritable kmer = new KmerBytesWritable(9);
kmer.setByRead(array, 0);
@@ -49,7 +39,7 @@
lastKmer = kmerFactory.getSubKmerFromChain(9 - i, i, kmer);
Assert.assertEquals("AGCTGACCG".substring(9 - i), lastKmer.toString());
}
- VKmerBytesWritable vlastKmer;
+ KmerBytesWritable vlastKmer;
for (int i = 8; i > 0; i--) {
vlastKmer = kmerFactory.getLastKmerFromChain(i, kmer);
Assert.assertEquals("AGCTGACCG".substring(9 - i), vlastKmer.toString());
@@ -70,7 +60,7 @@
firstKmer = kmerFactory.getSubKmerFromChain(0, i, kmer);
Assert.assertEquals("AGCTGACCG".substring(0, i), firstKmer.toString());
}
- VKmerBytesWritable vfirstKmer;
+ KmerBytesWritable vfirstKmer;
for (int i = 8; i > 0; i--) {
vfirstKmer = kmerFactory.getFirstKmerFromChain(i, kmer);
Assert.assertEquals("AGCTGACCG".substring(0, i), vfirstKmer.toString());
@@ -84,7 +74,7 @@
KmerBytesWritable kmer = new KmerBytesWritable(9);
kmer.setByRead(array, 0);
Assert.assertEquals("AGCTGACCG", kmer.toString());
- VKmerBytesWritable subKmer;
+ KmerBytesWritable subKmer;
for (int istart = 0; istart < kmer.getKmerLength() - 1; istart++) {
for (int isize = 1; isize + istart <= kmer.getKmerLength(); isize++) {
subKmer = kmerFactory.getSubKmerFromChain(istart, isize, kmer);
@@ -168,7 +158,7 @@
KmerBytesWritable kmer5 = new KmerBytesWritable(7);
kmer5.setByRead(array, 0);
String text5 = "AGCTGAC";
- VKmerBytesWritable kmer6 = new VKmerBytesWritable(9);
+ KmerBytesWritable kmer6 = new KmerBytesWritable(9);
kmer6.setByRead(9, array, 1);
String text6 = "GCTGACCGT";
merged = kmerFactory.mergeTwoKmer(kmer5, kmer6);
@@ -188,14 +178,14 @@
@Test
public void TestShift() {
- VKmerBytesWritable kmer = new VKmerBytesWritable(kmerFactory.getKmerByRead(9, array, 0));
+ KmerBytesWritable kmer = new KmerBytesWritable(kmerFactory.getKmerByRead(9, array, 0));
String text = "AGCTGACCG";
Assert.assertEquals(text, kmer.toString());
- VKmerBytesWritable kmerForward = kmerFactory.shiftKmerWithNextCode(kmer, GeneCode.A);
+ KmerBytesWritable kmerForward = kmerFactory.shiftKmerWithNextCode(kmer, GeneCode.A);
Assert.assertEquals(text, kmer.toString());
Assert.assertEquals("GCTGACCGA", kmerForward.toString());
- VKmerBytesWritable kmerBackward = kmerFactory.shiftKmerWithPreCode(kmer, GeneCode.C);
+ KmerBytesWritable kmerBackward = kmerFactory.shiftKmerWithPreCode(kmer, GeneCode.C);
Assert.assertEquals(text, kmer.toString());
Assert.assertEquals("CAGCTGACC", kmerBackward.toString());
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerBytesWritableTest.java
index faee509..a5a4430 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerBytesWritableTest.java
@@ -90,4 +90,21 @@
}
}
+ @Test
+ public void TestMergeNext() {
+ KmerBytesWritable kmer = new KmerBytesWritable(9);
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G' };
+ kmer.setByRead(array, 0);
+ Assert.assertEquals("AGCTGACCG", kmer.toString());
+
+ String text = "AGCTGACCG";
+ for (int i = 0; i < 10; i++) {
+ for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
+ kmer.mergeKmerWithNextCode(x);
+ text = text + (char) GeneCode.GENE_SYMBOL[x];
+ Assert.assertEquals(text, kmer.toString());
+ }
+ }
+ }
+
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java
index c383ade..4e99865 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java
@@ -1,9 +1,14 @@
package edu.uci.ics.genomix.hyracks.data.primitive;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-public class NodeReference {
+public class NodeReference implements WritableComparable<NodeReference> {
private PositionReference nodeID;
private int countOfKmer;
private PositionListReference incomingList;
@@ -17,8 +22,8 @@
outgoingList = new PositionListReference();
kmer = new KmerBytesWritable(kmerSize);
}
-
- public int getCount(){
+
+ public int getCount() {
return countOfKmer;
}
@@ -60,15 +65,15 @@
public PositionReference getNodeID() {
return nodeID;
}
-
- public KmerBytesWritable getKmer(){
+
+ public KmerBytesWritable getKmer() {
return kmer;
}
public void mergeNextWithinOneRead(NodeReference nextNodeEntry) {
this.countOfKmer += 1;
this.outgoingList.set(nextNodeEntry.outgoingList);
- kmer.mergeKmerWithNextCode(nextNodeEntry.kmer.getGeneCodeAtPosition(nextNodeEntry.kmer.getKmerLength()-1));
+ kmer.mergeKmerWithNextCode(nextNodeEntry.kmer.getGeneCodeAtPosition(nextNodeEntry.kmer.getKmerLength() - 1));
}
public void set(NodeReference node) {
@@ -79,4 +84,31 @@
this.kmer.set(node.kmer);
}
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ this.nodeID.readFields(in);
+ this.countOfKmer = in.readInt();
+ this.incomingList.readFields(in);
+ this.outgoingList.readFields(in);
+ this.kmer.readFields(in);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ this.nodeID.write(out);
+ out.writeInt(this.countOfKmer);
+ this.incomingList.write(out);
+ this.outgoingList.write(out);
+ this.kmer.write(out);
+ }
+
+ @Override
+ public int compareTo(NodeReference other) {
+ return this.nodeID.compareTo(other.nodeID);
+ }
+
+ @Override
+ public int hashCode() {
+ return nodeID.hashCode();
+ }
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java
index 100f8d6..d4c8f7b 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java
@@ -49,19 +49,6 @@
}
}
- @Override
- public void readFields(DataInput in) throws IOException {
- this.valueCount = in.readInt();
- setSize(valueCount * PositionReference.LENGTH);
- in.readFully(storage, offset, valueCount * PositionReference.LENGTH);
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeInt(valueCount);
- out.write(storage, offset, valueCount * PositionReference.LENGTH);
- }
-
public PositionReference getPosition(int i) {
if (i >= valueCount) {
throw new ArrayIndexOutOfBoundsException("No such positions");
@@ -149,4 +136,17 @@
return valueCount * PositionReference.LENGTH;
}
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ this.valueCount = in.readInt();
+ setSize(valueCount * PositionReference.LENGTH);
+ in.readFully(storage, offset, valueCount * PositionReference.LENGTH);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(valueCount);
+ out.write(storage, offset, valueCount * PositionReference.LENGTH);
+ }
+
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java
index 100b74d..29e894b 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java
@@ -4,12 +4,13 @@
import java.io.DataOutput;
import java.io.IOException;
-import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
import edu.uci.ics.hyracks.data.std.api.IValueReference;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
-public class PositionReference implements IValueReference,Writable {
+public class PositionReference implements IValueReference, WritableComparable<PositionReference> {
private byte[] storage;
private int offset;
public static final int LENGTH = 5;
@@ -72,4 +73,51 @@
out.write(storage, offset, LENGTH);
}
+ @Override
+ public int hashCode() {
+ return this.getReadID();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof PositionReference))
+ return false;
+ PositionReference other = (PositionReference) o;
+ return this.getReadID() == other.getReadID() && this.getPosInRead() == other.getPosInRead();
+ }
+
+ @Override
+ public int compareTo(PositionReference other) {
+ int diff = this.getReadID() - other.getReadID();
+ if (diff == 0) {
+ return this.getPosInRead() - other.getPosInRead();
+ }
+ return diff;
+ }
+
+ @Override
+ public String toString() {
+ return "(" + Integer.toString(getReadID()) + "," + Integer.toString((int) getPosInRead()) + ")";
+ }
+
+ /** A Comparator optimized for IntWritable. */
+ public static class Comparator extends WritableComparator {
+ public Comparator() {
+ super(PositionReference.class);
+ }
+
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ int thisValue = IntegerSerializerDeserializer.getInt(b1, s1);
+ int thatValue = IntegerSerializerDeserializer.getInt(b2, s2);
+ int diff = thisValue - thatValue;
+ if (diff == 0){
+ return b1[s1+INTBYTES] - b2[s2+INTBYTES];
+ }
+ return diff;
+ }
+ }
+
+ static { // register this comparator
+ WritableComparator.define(PositionReference.class, new Comparator());
+ }
}
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/example/jobrun/JobRunTest.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/example/jobrun/JobRunTest.java
index ba9aea2..91ec530 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/example/jobrun/JobRunTest.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/example/jobrun/JobRunTest.java
@@ -34,7 +34,6 @@
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.util.ReflectionUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -43,7 +42,6 @@
import edu.uci.ics.genomix.hyracks.driver.Driver.Plan;
import edu.uci.ics.genomix.hyracks.job.GenomixJob;
import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
public class JobRunTest {
private static final String ACTUAL_RESULT_DIR = "actual";
@@ -213,8 +211,8 @@
// KmerBytesWritable key = (KmerBytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
KmerBytesWritable key = new KmerBytesWritable(conf.getInt(GenomixJob.KMER_LENGTH,
GenomixJob.DEFAULT_KMERLEN));
- KmerCountValue value = (KmerCountValue) ReflectionUtils.newInstance(reader.getValueClass(), conf);
-
+// KmerCountValue value = (KmerCountValue) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+ KmerBytesWritable value = null;
while (reader.next(key, value)) {
if (key == null || value == null) {
break;