change KmerListWritable and NodeWritable to new construction for new graph-'Kmer-Base system as key'
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
index e19bb0f..0f91a7c 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
@@ -17,7 +17,8 @@
protected byte[] storage;
protected int offset;
protected int valueCount;
- public int kmerByteSize = 2; //default kmerSize = 5, kmerByteSize = 2, fix length once setting
+ public int kmerByteSize = 0; //default kmerSize = 5, kmerByteSize = 2, fix length once setting
+ public int kmerlength = 0;
protected static final byte[] EMPTY = {};
protected KmerBytesWritable posIter = new KmerBytesWritable();
@@ -37,6 +38,12 @@
setNewReference(count, data, offset);
}
+ public KmerListWritable(int kmerlength, int count, byte[] data, int offset) {
+ this.kmerlength = kmerlength;
+ this.kmerByteSize = KmerUtil.getByteNumFromK(kmerlength);
+ setNewReference(count, data, offset);
+ }
+
public KmerListWritable(List<KmerBytesWritable> kmers) {
this();
setSize(kmers.size()); // reserve space for all elements
@@ -52,8 +59,10 @@
}
public void append(KmerBytesWritable kmer){
- setSize((1 + valueCount) * kmerByteSize);
- System.arraycopy(kmer.getBytes(), 0, storage, offset, kmerByteSize);
+ kmerByteSize = kmer.kmerByteSize;
+ kmerlength = kmer.kmerlength;
+ setSize((1 + valueCount) * kmerByteSize);
+ System.arraycopy(kmer.getBytes(), 0, storage, offset + valueCount * kmerByteSize, kmerByteSize);
valueCount += 1;
}
@@ -79,18 +88,22 @@
}
public void reset() {
+ storage = EMPTY;
valueCount = 0;
+ offset = 0;
}
public KmerBytesWritable getPosition(int i) {
if (i >= valueCount) {
throw new ArrayIndexOutOfBoundsException("No such positions");
}
- posIter.setNewReference(storage, offset + i * kmerByteSize);
+ posIter.setNewReference(kmerlength, storage, offset + i * kmerByteSize);
return posIter;
}
public void set(KmerListWritable otherList) {
+ this.kmerlength = otherList.kmerlength;
+ this.kmerByteSize = otherList.kmerByteSize;
set(otherList.valueCount, otherList.storage, otherList.offset);
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index 60e733d..00aa633 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -7,19 +7,20 @@
import org.apache.hadoop.io.WritableComparable;
-import edu.uci.ics.genomix.type.PositionListWritable;
public class NodeWritable implements WritableComparable<NodeWritable>, Serializable{
private static final long serialVersionUID = 1L;
public static final NodeWritable EMPTY_NODE = new NodeWritable(0);
- private PositionListWritable forwardForwardList;
- private PositionListWritable forwardReverseList;
- private PositionListWritable reverseForwardList;
- private PositionListWritable reverseReverseList;
+ private PositionWritable nodeId;
+ private KmerListWritable forwardForwardList;
+ private KmerListWritable forwardReverseList;
+ private KmerListWritable reverseForwardList;
+ private KmerListWritable reverseReverseList;
private KmerBytesWritable kmer;
+
// merge/update directions
public static class DirectionFlag {
public static final byte DIR_FF = 0b00 << 0;
@@ -34,26 +35,28 @@
}
public NodeWritable(int kmerSize) {
- forwardForwardList = new PositionListWritable();
- forwardReverseList = new PositionListWritable();
- reverseForwardList = new PositionListWritable();
- reverseReverseList = new PositionListWritable();
+ nodeId = new PositionWritable();
+ forwardForwardList = new KmerListWritable();
+ forwardReverseList = new KmerListWritable();
+ reverseForwardList = new KmerListWritable();
+ reverseReverseList = new KmerListWritable();
kmer = new KmerBytesWritable(kmerSize);
}
- public NodeWritable(PositionListWritable FFList, PositionListWritable FRList,
- PositionListWritable RFList, PositionListWritable RRList, KmerBytesWritable kmer) {
+ public NodeWritable(PositionWritable nodeId, KmerListWritable FFList, KmerListWritable FRList,
+ KmerListWritable RFList, KmerListWritable RRList, KmerBytesWritable kmer) {
this(kmer.getKmerLength());
- set(FFList, FRList, RFList, RRList, kmer);
+ set(nodeId, FFList, FRList, RFList, RRList, kmer);
}
public void set(NodeWritable node){
- set(node.forwardForwardList, node.forwardReverseList, node.reverseForwardList,
+ set(node.nodeId, node.forwardForwardList, node.forwardReverseList, node.reverseForwardList,
node.reverseReverseList, node.kmer);
}
- public void set(PositionListWritable FFList, PositionListWritable FRList,
- PositionListWritable RFList, PositionListWritable RRList, KmerBytesWritable kmer) {
+ public void set(PositionWritable nodeId, KmerListWritable FFList, KmerListWritable FRList,
+ KmerListWritable RFList, KmerListWritable RRList, KmerBytesWritable kmer) {
+ this.nodeId.set(nodeId);
this.forwardForwardList.set(FFList);
this.forwardReverseList.set(FRList);
this.reverseForwardList.set(RFList);
@@ -62,6 +65,7 @@
}
public void reset(int kmerSize) {
+ nodeId.reset();
forwardForwardList.reset();
forwardReverseList.reset();
reverseForwardList.reset();
@@ -69,6 +73,14 @@
kmer.reset(kmerSize);
}
+ public PositionWritable getNodeId() {
+ return nodeId;
+ }
+
+ public void setNodeId(PositionWritable nodeId) {
+ this.nodeId = nodeId;
+ }
+
public KmerBytesWritable getKmer() {
return kmer;
}
@@ -81,23 +93,23 @@
return kmer.getKmerLength();
}
- public PositionListWritable getFFList() {
+ public KmerListWritable getFFList() {
return forwardForwardList;
}
- public PositionListWritable getFRList() {
+ public KmerListWritable getFRList() {
return forwardReverseList;
}
- public PositionListWritable getRFList() {
+ public KmerListWritable getRFList() {
return reverseForwardList;
}
- public PositionListWritable getRRList() {
+ public KmerListWritable getRRList() {
return reverseReverseList;
}
- public PositionListWritable getListFromDir(byte dir) {
+ public KmerListWritable getListFromDir(byte dir) {
switch (dir & DirectionFlag.DIR_MASK) {
case DirectionFlag.DIR_FF:
return getFFList();
@@ -113,6 +125,7 @@
}
@Override
public void write(DataOutput out) throws IOException {
+ this.nodeId.write(out);
this.forwardForwardList.write(out);
this.forwardReverseList.write(out);
this.reverseForwardList.write(out);
@@ -122,6 +135,7 @@
@Override
public void readFields(DataInput in) throws IOException {
+ this.nodeId.readFields(in);
this.forwardForwardList.readFields(in);
this.forwardReverseList.readFields(in);
this.reverseForwardList.readFields(in);
@@ -143,7 +157,8 @@
public boolean equals(Object o) {
if (o instanceof NodeWritable) {
NodeWritable nw = (NodeWritable) o;
- return (this.forwardForwardList.equals(nw.forwardForwardList)
+ return (this.nodeId.equals(nw.nodeId)
+ && this.forwardForwardList.equals(nw.forwardForwardList)
&& this.forwardReverseList.equals(nw.forwardReverseList)
&& this.reverseForwardList.equals(nw.reverseForwardList)
&& this.reverseReverseList.equals(nw.reverseReverseList) && this.kmer.equals(nw.kmer));
@@ -155,6 +170,7 @@
public String toString() {
StringBuilder sbuilder = new StringBuilder();
sbuilder.append('(');
+ sbuilder.append(nodeId.toString()).append('\t');
sbuilder.append(forwardForwardList.toString()).append('\t');
sbuilder.append(forwardReverseList.toString()).append('\t');
sbuilder.append(reverseForwardList.toString()).append('\t');
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
new file mode 100644
index 0000000..c31ca6d
--- /dev/null
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
@@ -0,0 +1,118 @@
+package edu.uci.ics.genomix.data.test;
+
+import java.util.Iterator;
+import java.util.Random;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerListWritable;
+
+public class KmerListWritableTest {
+
+ @Test
+ public void TestInitial() {
+ KmerListWritable kmerList = new KmerListWritable();
+ Assert.assertEquals(kmerList.getCountOfPosition(), 0);
+
+ //one kmer in list and reset each time
+ KmerBytesWritable kmer;
+ for (int i = 1; i < 200; i++) {
+ kmer = new KmerBytesWritable(i);
+ String randomString = generateString(i);
+ byte[] array = randomString.getBytes();
+ kmer.setByRead(array, 0);
+ kmerList.reset();
+ kmerList.append(kmer);
+ Assert.assertEquals(kmerList.getPosition(0).toString(), randomString);
+ Assert.assertEquals(1, kmerList.getCountOfPosition());
+ }
+
+ kmerList.reset();
+ //add one more kmer each time and fix kmerSize
+ for (int i = 0; i < 200; i++) {
+ kmer = new KmerBytesWritable(5);
+ String randomString = generateString(5);
+ byte[] array = randomString.getBytes();
+ kmer.setByRead(array, 0);
+ kmerList.append(kmer);
+ Assert.assertEquals(kmerList.getPosition(i).toString(), randomString);
+ Assert.assertEquals(i + 1, kmerList.getCountOfPosition());
+ }
+
+ byte [] another = new byte [kmerList.getLength()*2];
+ int start = 20;
+ System.arraycopy(kmerList.getByteArray(), 0, another, start, kmerList.getLength());
+ KmerListWritable plist2 = new KmerListWritable(kmerList.kmerlength, kmerList.getCountOfPosition(),another,start);
+ for(int i = 0; i < plist2.getCountOfPosition(); i++){
+ Assert.assertEquals(kmerList.getPosition(i).toString(), plist2.getPosition(i).toString());
+ }
+ }
+
+ @Test
+ public void TestRemove() {
+ KmerListWritable kmerList = new KmerListWritable();
+ Assert.assertEquals(kmerList.getCountOfPosition(), 0);
+
+ int i;
+ KmerBytesWritable kmer;
+ for (i = 0; i < 200; i++) {
+ kmer = new KmerBytesWritable(5);
+ String randomString = generateString(5);
+ byte[] array = randomString.getBytes();
+ kmer.setByRead(array, 0);
+ kmerList.append(kmer);
+ Assert.assertEquals(kmerList.getPosition(i).toString(), randomString);
+ Assert.assertEquals(i + 1, kmerList.getCountOfPosition());
+ }
+
+ //delete one element each time
+ KmerBytesWritable tmpKmer = new KmerBytesWritable();
+ i = 0;
+ KmerListWritable copyList = new KmerListWritable();
+ copyList.set(kmerList);
+ Iterator<KmerBytesWritable> iterator;
+ for(int j = 0; j < 5; j++){
+ iterator = copyList.iterator();
+ byte[] array = kmerList.getPosition(j).toString().getBytes();
+ KmerBytesWritable deletePos = new KmerBytesWritable(5);
+ deletePos.setByRead(array, 0);
+ while(iterator.hasNext()){
+ tmpKmer = iterator.next();
+ if(tmpKmer.equals(deletePos)){
+ iterator.remove();
+ break;
+ }
+ }
+ Assert.assertEquals(200 - 1 - j, copyList.getCountOfPosition());
+ while(iterator.hasNext()){
+ tmpKmer = iterator.next();
+ Assert.assertTrue(!tmpKmer.getBytes().equals(deletePos.getBytes()));
+ i++;
+ }
+ }
+
+ //delete all the elements
+ i = 0;
+ iterator = kmerList.iterator();
+ while(iterator.hasNext()){
+ tmpKmer = iterator.next();
+ iterator.remove();
+ }
+
+ Assert.assertEquals(0, kmerList.getCountOfPosition());
+ }
+
+ public String generateString(int n){
+ char[] chars = "ACGT".toCharArray();
+ StringBuilder sb = new StringBuilder();
+ Random random = new Random();
+ for (int i = 0; i < n; i++) {
+ char c = chars[random.nextInt(chars.length)];
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+}