allow references in all variable-length elements by keeping track of originally allowed space
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/NodeWritable.java
index b2ba461..3cb2216 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/NodeWritable.java
@@ -45,7 +45,7 @@
private PositionListWritable forwardReverseList;
private PositionListWritable reverseForwardList;
private PositionListWritable reverseReverseList;
- private KmerBytesWritable kmer;
+ private VKmerBytesWritable kmer;
public NodeWritable() {
this(21);
@@ -57,7 +57,7 @@
forwardReverseList = new PositionListWritable();
reverseForwardList = new PositionListWritable();
reverseReverseList = new PositionListWritable();
- kmer = new KmerBytesWritable(kmerSize);
+ kmer = new VKmerBytesWritable();
}
public NodeWritable(PositionWritable nodeID, PositionListWritable FFList, PositionListWritable FRList,
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
index cd70bb7..2aee32d 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
@@ -23,19 +23,19 @@
protected byte[] storage;
protected int offset;
protected int valueCount;
+ protected int storageMaxSize; // since we may be a reference inside a larger datablock, we must track our maximum size
private KmerBytesWritable posIter = new KmerBytesWritable();
public KmerListWritable() {
- this.storage = EMPTY_BYTES;
- this.valueCount = 0;
- this.offset = 0;
+ storage = EMPTY_BYTES;
+ valueCount = 0;
+ offset = 0;
+ storageMaxSize = storage.length;
}
public KmerListWritable(byte[] data, int offset) {
-// setNewReference(data, offset);
- this();
- setCopy(data, offset);
+ setNewReference(data, offset);
}
public KmerListWritable(List<KmerBytesWritable> kmers) {
@@ -46,15 +46,16 @@
}
}
-// public void setNewReference(byte[] data, int offset) {
-// valueCount = Marshal.getInt(data, offset);
-// if (valueCount * KmerBytesWritable.getBytesPerKmer() > data.length - offset) {
-// throw new IllegalArgumentException("Specified data buffer (len=" + (data.length - offset)
-// + ") is not large enough to store requested number of elements (" + valueCount + ")!");
-// }
-// this.storage = data;
-// this.offset = offset;
-// }
+ public void setNewReference(byte[] data, int offset) {
+ valueCount = Marshal.getInt(data, offset);
+ if (valueCount * KmerBytesWritable.getBytesPerKmer() > data.length - offset) {
+ throw new IllegalArgumentException("Specified data buffer (len=" + (data.length - offset)
+ + ") is not large enough to store requested number of elements (" + valueCount + ")!");
+ }
+ this.storage = data;
+ this.offset = offset;
+ this.storageMaxSize = valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE;
+ }
public void append(KmerBytesWritable kmer) {
setSize((1 + valueCount) * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
@@ -108,17 +109,18 @@
}
protected int getCapacity() {
- return storage.length - offset;
+ return storageMaxSize - offset;
}
protected void setCapacity(int new_cap) {
if (new_cap > getCapacity()) {
byte[] new_data = new byte[new_cap];
- if (storage.length - offset > 0) {
- System.arraycopy(storage, offset, new_data, 0, storage.length - offset);
+ if (valueCount > 0) {
+ System.arraycopy(storage, offset, new_data, 0, valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
}
storage = new_data;
offset = 0;
+ storageMaxSize = storage.length;
}
}
@@ -139,15 +141,16 @@
}
/**
- * save as a copy of the given data buffer, including the header
+ * read a KmerListWritable from newData, which should include the header
*/
public void setCopy(byte[] newData, int offset) {
- valueCount = Marshal.getInt(newData, offset);
- setSize(valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
- if (valueCount > 0) {
- System.arraycopy(newData, offset + HEADER_SIZE, storage, this.offset + HEADER_SIZE, valueCount
+ int newValueCount = Marshal.getInt(newData, offset);
+ setSize(newValueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+ if (newValueCount > 0) {
+ System.arraycopy(newData, offset + HEADER_SIZE, storage, this.offset + HEADER_SIZE, newValueCount
* KmerBytesWritable.getBytesPerKmer());
}
+ valueCount = newValueCount;
Marshal.putInt(valueCount, storage, this.offset);
}
@@ -190,9 +193,10 @@
while (posIterator.hasNext()) {
if (toRemove.equals(posIterator.next())) {
posIterator.remove();
- return;
+ return; // break as soon as the element is found
}
}
+ // element was not found
if (!ignoreMissing) {
throw new ArrayIndexOutOfBoundsException("the KmerBytesWritable `" + toRemove.toString()
+ "` was not found in this list.");
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index 5d89ba2..362c12e 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -173,6 +173,23 @@
curOffset += reverseReverseList.getLength();
kmer.setAsCopy(data, curOffset);
}
+
+ public void setAsReference(byte[] data, int offset) {
+ int curOffset = offset;
+ nodeIdList.setNewReference(data, curOffset);
+
+ curOffset += nodeIdList.getLength();
+ forwardForwardList.setNewReference(data, curOffset);
+ curOffset += forwardForwardList.getLength();
+ forwardReverseList.setNewReference(data, curOffset);
+ curOffset += forwardReverseList.getLength();
+ reverseForwardList.setNewReference(data, curOffset);
+ curOffset += reverseForwardList.getLength();
+ reverseReverseList.setNewReference(data, curOffset);
+
+ curOffset += reverseReverseList.getLength();
+ kmer.setAsReference(data, curOffset);
+ }
@Override
public void write(DataOutput out) throws IOException {
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
index fa7d9d5..8de4b0e 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
@@ -15,24 +15,26 @@
public class PositionListWritable implements Writable, Iterable<PositionWritable>, Serializable {
private static final long serialVersionUID = 1L;
+ protected static final byte[] EMPTY_BYTES = {0,0,0,0};
+ protected static final int HEADER_SIZE = 4;
+
protected byte[] storage;
protected int offset;
protected int valueCount;
- protected static final byte[] EMPTY_BYTES = {0,0,0,0};
- protected static final int HEADER_SIZE = 4;
+ protected int maxStorageSize;
+
protected PositionWritable posIter = new PositionWritable();
public PositionListWritable() {
- this.storage = EMPTY_BYTES;
- this.valueCount = 0;
- this.offset = 0;
+ storage = EMPTY_BYTES;
+ valueCount = 0;
+ offset = 0;
+ maxStorageSize = storage.length;
}
public PositionListWritable(byte[] data, int offset) {
-// setNewReference(data, offset);
- this();
- set(data, offset);
+ setNewReference(data, offset);
}
public PositionListWritable(List<PositionWritable> posns) {
@@ -42,14 +44,13 @@
append(p);
}
}
-
-// // TODO: we currently don't track who actually owns each block so variable-length references are impossible. We could track 1) if it's a reference and 2) the size of the original reference block, but that would require a check for every setSize and might be coplicated logic
-// public void setNewReference(byte[] data, int offset) {
-// this.valueCount = Marshal.getInt(data, offset);
-// this.storage = data;
-// this.offset = offset;
-// isReference = true;
-// }
+
+ public void setNewReference(byte[] data, int offset) {
+ this.valueCount = Marshal.getInt(data, offset);
+ this.storage = data;
+ this.offset = offset;
+ maxStorageSize = valueCount * PositionWritable.LENGTH + HEADER_SIZE;
+ }
public void append(long uuid) {
setSize((1 + valueCount) * PositionWritable.LENGTH + HEADER_SIZE);
@@ -115,11 +116,12 @@
}
public void set(byte[] newData, int newOffset) {
- valueCount = Marshal.getInt(newData, newOffset);
- setSize(valueCount * PositionWritable.LENGTH + HEADER_SIZE);
- if (valueCount > 0) {
- System.arraycopy(newData, newOffset + HEADER_SIZE, storage, this.offset + HEADER_SIZE, valueCount * PositionWritable.LENGTH);
+ int newValueCount = Marshal.getInt(newData, newOffset);
+ setSize(newValueCount * PositionWritable.LENGTH + HEADER_SIZE);
+ if (newValueCount > 0) {
+ System.arraycopy(newData, newOffset + HEADER_SIZE, storage, this.offset + HEADER_SIZE, newValueCount * PositionWritable.LENGTH);
}
+ valueCount = newValueCount;
Marshal.putInt(valueCount, storage, this.offset);
}
@@ -135,20 +137,18 @@
}
protected int getCapacity() {
-// if (isReference) { // my storage is a borrowed reference so I can't expand beyond my original size
-// return valueCount * PositionWritable.LENGTH + HEADER_SIZE;
-// }
- return storage.length - offset;
+ return maxStorageSize - offset;
}
protected void setCapacity(int new_cap) {
if (new_cap > getCapacity()) {
byte[] new_data = new byte[new_cap];
- if (storage.length - offset > 0) {
- System.arraycopy(storage, offset, new_data, 0, storage.length - offset);
+ if (valueCount > 0) {
+ System.arraycopy(storage, offset, new_data, 0, valueCount * PositionWritable.LENGTH + HEADER_SIZE);
}
storage = new_data;
offset = 0;
+ maxStorageSize = storage.length;
}
}
@@ -221,9 +221,10 @@
while (posIterator.hasNext()) {
if (toRemove.equals(posIterator.next())) {
posIterator.remove();
- return;
+ return; // found it. return early.
}
}
+ // element not found.
if (!ignoreMissing) {
throw new ArrayIndexOutOfBoundsException("the PositionWritable `" + toRemove.toString()
+ "` was not found in this list.");
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
index fc664c5..c38e35d 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
@@ -42,6 +42,7 @@
protected int bytesUsed;
protected byte[] bytes;
protected int kmerStartOffset;
+ protected int storageMaxSize; // since we may be a reference inside a larger datablock, we must track our maximum size
/**
* Initialize as empty kmer
@@ -56,6 +57,7 @@
public VKmerBytesWritable(String kmer) {
bytes = new byte[HEADER_SIZE + KmerUtil.getByteNumFromK(kmer.length())];
kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
setAsCopy(kmer);
}
@@ -82,6 +84,7 @@
throw new IllegalArgumentException("Invalid K (" + k + ").");
}
kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
setKmerLength(k);
}
@@ -146,6 +149,7 @@
throw new IllegalArgumentException("Requested " + bytesRequested + " bytes (k=" + kRequested
+ ") but buffer has only " + (newData.length - blockOffset) + " bytes");
}
+ storageMaxSize = bytesRequested; // since we are a reference, store our max capacity
setKmerLength(kRequested);
}
@@ -159,6 +163,7 @@
if (bytesUsed < newByteLength) {
bytes = new byte[newByteLength + HEADER_SIZE];
kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
}
setKmerLength(k);
}
@@ -300,7 +305,7 @@
}
protected int getKmerByteCapacity() {
- return bytes.length - HEADER_SIZE;
+ return storageMaxSize - HEADER_SIZE;
}
protected void setKmerByteCapacity(int new_cap) {
@@ -314,6 +319,7 @@
}
bytes = new_data;
kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
}
}
@@ -329,6 +335,7 @@
if (getKmerByteCapacity() < this.bytesUsed) {
this.bytes = new byte[this.bytesUsed + HEADER_SIZE];
this.kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
}
in.readFully(bytes, kmerStartOffset, bytesUsed);
}