Use VKmers in lists
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerListWritable.java
index 44b2772..620e8ad 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerListWritable.java
@@ -10,12 +10,13 @@
 
 import org.apache.hadoop.io.Writable;
 
+import edu.uci.ics.genomix.data.KmerUtil;
 import edu.uci.ics.genomix.data.Marshal;
 
 /**
  * A list of fixed-length kmers. The length of this list is stored internally.
  */
-public class VKmerListWritable implements Writable, Iterable<KmerBytesWritable>, Serializable {
+public class VKmerListWritable implements Writable, Iterable<VKmerBytesWritable>, Serializable {
     private static final long serialVersionUID = 1L;
     protected static final byte[] EMPTY_BYTES = { 0, 0, 0, 0 };
     protected static final int HEADER_SIZE = 4;
@@ -25,7 +26,7 @@
     protected int valueCount;
     protected int storageMaxSize;  // since we may be a reference inside a larger datablock, we must track our maximum size
 
-    private KmerBytesWritable posIter = new KmerBytesWritable();
+    private VKmerBytesWritable posIter = new VKmerBytesWritable();
 
     public VKmerListWritable() {
         storage = EMPTY_BYTES;
@@ -38,30 +39,25 @@
         setNewReference(data, offset);
     }
 
-    public VKmerListWritable(List<KmerBytesWritable> kmers) {
+    public VKmerListWritable(List<VKmerBytesWritable> kmers) {
         this();
-        setSize(kmers.size() * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE); // reserve space for all elements
-        for (KmerBytesWritable kmer : kmers) {
+        for (VKmerBytesWritable kmer : kmers) {
             append(kmer);
         }
     }
 
     public void setNewReference(byte[] data, int offset) {
         valueCount = Marshal.getInt(data, offset);
-        if (valueCount * KmerBytesWritable.getBytesPerKmer() > data.length - offset) {
-            throw new IllegalArgumentException("Specified data buffer (len=" + (data.length - offset)
-                    + ") is not large enough to store requested number of elements (" + valueCount + ")!");
-        }
         this.storage = data;
         this.offset = offset;
-        this.storageMaxSize = valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE;
+        this.storageMaxSize = getLength();
     }
 
-    public void append(KmerBytesWritable kmer) {
-        setSize((1 + valueCount) * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
-        System.arraycopy(kmer.getBytes(), 0, storage,
-                offset + HEADER_SIZE + valueCount * KmerBytesWritable.getBytesPerKmer(),
-                KmerBytesWritable.getBytesPerKmer());
+    public void append(VKmerBytesWritable kmer) {
+        setSize(getLength() + kmer.getLength());
+        System.arraycopy(kmer.getBytes(), kmer.kmerStartOffset - VKmerBytesWritable.HEADER_SIZE,
+                storage, offset + getLength(), 
+                kmer.getLength());
         valueCount += 1;
         Marshal.putInt(valueCount, storage, offset);
     }
@@ -71,11 +67,12 @@
      */
     public void appendList(VKmerListWritable otherList) {
         if (otherList.valueCount > 0) {
-            setSize((valueCount + otherList.valueCount) * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+            setSize(getLength() + otherList.getLength() - HEADER_SIZE); // one of the headers is redundant
+            
             // copy contents of otherList into the end of my storage
-            System.arraycopy(otherList.storage, otherList.offset + HEADER_SIZE, storage, offset + HEADER_SIZE
-                    + valueCount * KmerBytesWritable.getBytesPerKmer(),
-                    otherList.valueCount * KmerBytesWritable.getBytesPerKmer());
+            System.arraycopy(otherList.storage, otherList.offset + HEADER_SIZE, // skip other header
+                    storage, offset + getLength(), // add to end
+                    otherList.getLength() - HEADER_SIZE);
             valueCount += otherList.valueCount;
             Marshal.putInt(valueCount, storage, offset);
         }
@@ -87,16 +84,17 @@
      */
     public void unionUpdate(VKmerListWritable otherList) {
         int newSize = valueCount + otherList.valueCount;
-        HashSet<KmerBytesWritable> uniqueElements = new HashSet<KmerBytesWritable>(newSize);
-        for (KmerBytesWritable kmer : this) {
-            uniqueElements.add(kmer);
+        HashSet<VKmerBytesWritable> uniqueElements = new HashSet<VKmerBytesWritable>(newSize);
+        for (VKmerBytesWritable kmer : this) {
+            // have to make copies of my own kmers since I may overwrite them
+            uniqueElements.add(new VKmerBytesWritable(kmer));
         }
-        for (KmerBytesWritable kmer : otherList) {
-            uniqueElements.add(kmer);
+        for (VKmerBytesWritable kmer : otherList) {
+            uniqueElements.add(kmer); // references okay
         }
+        setSize(getLength() + otherList.getLength());  // upper bound on memory usage
         valueCount = 0;
-        setSize(newSize * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
-        for (KmerBytesWritable kmer : uniqueElements) {
+        for (VKmerBytesWritable kmer : uniqueElements) {
             append(kmer);
         }
         Marshal.putInt(valueCount, storage, offset);
@@ -116,7 +114,7 @@
         if (new_cap > getCapacity()) {
             byte[] new_data = new byte[new_cap];
             if (valueCount > 0) {
-                System.arraycopy(storage, offset, new_data, 0, valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+                System.arraycopy(storage, offset, new_data, 0, getLength());
             }
             storage = new_data;
             offset = 0;
@@ -126,15 +124,28 @@
 
     public void reset() {
         valueCount = 0;
+        Marshal.putInt(valueCount, storage, offset);
     }
 
-    public KmerBytesWritable getPosition(int i) {
-        if (i >= valueCount) {
-            throw new ArrayIndexOutOfBoundsException("No such positions");
-        }
-        posIter.setAsReference(storage, offset + HEADER_SIZE + i * KmerBytesWritable.getBytesPerKmer());
+    public VKmerBytesWritable getPosition(int i) {
+        posIter.setAsReference(storage, getOffsetOfKmer(i));
         return posIter;
     }
+    
+    /**
+     * Return the offset of the kmer at the i'th position 
+     */
+    public int getOffsetOfKmer(int i) {
+        if (i >= valueCount) {
+            throw new ArrayIndexOutOfBoundsException("No such position " + i + " in list " + toString());
+        }
+        // seek to the given position
+        int posOffset = offset + HEADER_SIZE;
+        for (int curIndex = 0; curIndex < i; curIndex++) {
+            posOffset += KmerUtil.getByteNumFromK(Marshal.getInt(storage, posOffset)) + VKmerBytesWritable.HEADER_SIZE;
+        }
+        return posOffset;
+    }
 
     public void setCopy(VKmerListWritable otherList) {
         setCopy(otherList.storage, otherList.offset);
@@ -143,22 +154,25 @@
     /**
      * read a KmerListWritable from newData, which should include the header
      */
-    public void setCopy(byte[] newData, int offset) {
-        int newValueCount = Marshal.getInt(newData, offset);
-        setSize(newValueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+    public void setCopy(byte[] newData, int newOffset) {
+        int newValueCount = Marshal.getInt(newData, newOffset);
+        int newLength = getLength(newData, newOffset);
+        setSize(newLength);
         if (newValueCount > 0) {
-            System.arraycopy(newData, offset + HEADER_SIZE, storage, this.offset + HEADER_SIZE, newValueCount
-                    * KmerBytesWritable.getBytesPerKmer());
+            System.arraycopy(newData, newOffset + HEADER_SIZE, 
+                    storage, this.offset + HEADER_SIZE,
+                    newLength - HEADER_SIZE);
         }
         valueCount = newValueCount;
         Marshal.putInt(valueCount, storage, this.offset);
     }
 
     @Override
-    public Iterator<KmerBytesWritable> iterator() {
-        Iterator<KmerBytesWritable> it = new Iterator<KmerBytesWritable>() {
+    public Iterator<VKmerBytesWritable> iterator() {
+        Iterator<VKmerBytesWritable> it = new Iterator<VKmerBytesWritable>() {
 
             private int currentIndex = 0;
+            private int currentOffset = HEADER_SIZE; // init as offset of first kmer
 
             @Override
             public boolean hasNext() {
@@ -166,19 +180,31 @@
             }
 
             @Override
-            public KmerBytesWritable next() {
-                return getPosition(currentIndex++);
+            public VKmerBytesWritable next() {
+                posIter.setAsReference(storage, currentOffset);
+                currentOffset += KmerUtil.getByteNumFromK(Marshal.getInt(storage, currentOffset)) + VKmerBytesWritable.HEADER_SIZE;
+                currentIndex++;
+                return posIter;
             }
 
             @Override
             public void remove() {
-                if (currentIndex < valueCount)
-                    System.arraycopy(storage, offset + currentIndex * KmerBytesWritable.getBytesPerKmer(), storage,
-                            offset + (currentIndex - 1) * KmerBytesWritable.getBytesPerKmer(),
-                            (valueCount - currentIndex) * KmerBytesWritable.getBytesPerKmer());
+                if (currentOffset <= 0) {
+                    throw new IllegalStateException("You must advance the iterator using .next() before calling remove()!");
+                }
+                // we're removing the element at prevIndex
+                int prevIndex = currentIndex - 1;
+                int prevOffset = getOffsetOfKmer(prevIndex);
+                
+                if (currentIndex < valueCount) { // if it's the last element, don't have to do any copying
+                    System.arraycopy(storage, currentOffset, // from the "next" element
+                            storage, prevOffset, // to the one just returned (overwriting it)
+                            getLength() - currentOffset + offset);  // remaining bytes except current element
+                }
                 valueCount--;
                 currentIndex--;
                 Marshal.putInt(valueCount, storage, offset);
+                currentOffset = prevOffset;
             }
         };
         return it;
@@ -189,7 +215,7 @@
      * exception if not in this list.
      */
     public void remove(KmerBytesWritable toRemove, boolean ignoreMissing) {
-        Iterator<KmerBytesWritable> posIterator = this.iterator();
+        Iterator<VKmerBytesWritable> posIterator = this.iterator();
         while (posIterator.hasNext()) {
             if (toRemove.equals(posIterator.next())) {
                 posIterator.remove();
@@ -209,15 +235,27 @@
 
     @Override
     public void readFields(DataInput in) throws IOException {
+        reset();
         valueCount = in.readInt();
-        setSize(valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
-        in.readFully(storage, offset + HEADER_SIZE, valueCount * KmerBytesWritable.getBytesPerKmer() - HEADER_SIZE);
         Marshal.putInt(valueCount, storage, offset);
+        int curOffset = offset + HEADER_SIZE;
+        int elemBytes = 0;
+        int elemLetters = 0;
+        int curLength = getLength();
+        for (int i = 0; i < valueCount; i++) {
+            elemLetters = in.readInt();
+            elemBytes = KmerUtil.getByteNumFromK(elemLetters) + VKmerBytesWritable.HEADER_SIZE;
+            setSize(curLength + elemBytes); // make sure we have room for the new element
+            Marshal.putInt(elemLetters, storage, curOffset); // write header
+            in.readFully(storage, curOffset + VKmerBytesWritable.HEADER_SIZE, elemBytes - VKmerBytesWritable.HEADER_SIZE); // write kmer
+            curOffset += elemBytes;
+            curLength += elemBytes;
+        }
     }
 
     @Override
     public void write(DataOutput out) throws IOException {
-        out.write(storage, offset, valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+        out.write(storage, offset, getLength());
     }
 
     public int getCountOfPosition() {
@@ -233,8 +271,21 @@
     }
 
     public int getLength() {
-        return valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE;
+        int totalSize = HEADER_SIZE;
+        for (int curCount = 0; curCount < valueCount; curCount++) {
+            totalSize += KmerUtil.getByteNumFromK(Marshal.getInt(storage, offset + totalSize)) + VKmerBytesWritable.HEADER_SIZE;
+        }
+        return totalSize;
     }
+    
+    public static int getLength(byte[] listStorage, int listOffset) {
+      int totalSize = HEADER_SIZE;
+      int listValueCount = Marshal.getInt(listStorage, listOffset);
+      for (int curCount = 0; curCount < listValueCount; curCount++) {
+          totalSize += KmerUtil.getByteNumFromK(Marshal.getInt(listStorage, listOffset + totalSize)) + VKmerBytesWritable.HEADER_SIZE;
+      }
+      return totalSize;
+  }
 
     @Override
     public String toString() {
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
index 25e74bd..9ca4488 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
@@ -7,7 +7,7 @@
 
 import org.junit.Test;
 
-import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
 import edu.uci.ics.genomix.type.VKmerListWritable;
 
 public class KmerListWritableTest {
@@ -18,10 +18,9 @@
         Assert.assertEquals(kmerList.getCountOfPosition(), 0);
         
         //one kmer in list and reset each time
-        KmerBytesWritable kmer;
+        VKmerBytesWritable kmer;
         for (int i = 1; i < 200; i++) {
-            KmerBytesWritable.setGlobalKmerLength(i);
-            kmer = new KmerBytesWritable();
+            kmer = new VKmerBytesWritable(i);
             String randomString = generaterRandomString(i);
             byte[] array = randomString.getBytes();
             kmer.setByRead(array, 0);
@@ -32,10 +31,9 @@
         }
         
         kmerList.reset();
-        KmerBytesWritable.setGlobalKmerLength(5);
         //add one more kmer each time and fix kmerSize
         for (int i = 0; i < 200; i++) {
-            kmer = new KmerBytesWritable();
+            kmer = new VKmerBytesWritable(5);
             String randomString = generaterRandomString(5);
             byte[] array = randomString.getBytes();
             kmer.setByRead(array, 0);
@@ -59,10 +57,9 @@
         Assert.assertEquals(kmerList.getCountOfPosition(), 0);
         
         int i;
-        KmerBytesWritable kmer;
+        VKmerBytesWritable kmer;
         for (i = 0; i < 200; i++) {
-            KmerBytesWritable.setGlobalKmerLength(5);
-            kmer = new KmerBytesWritable();
+            kmer = new VKmerBytesWritable(5);
             String randomString = generaterRandomString(5);
             byte[] array = randomString.getBytes();
             kmer.setByRead(array, 0);
@@ -72,23 +69,26 @@
         }
         
         //delete one element each time
-        KmerBytesWritable tmpKmer = new KmerBytesWritable();
+        VKmerBytesWritable tmpKmer = new VKmerBytesWritable(5);
         i = 0;
         VKmerListWritable copyList = new VKmerListWritable();
         copyList.setCopy(kmerList);
-        Iterator<KmerBytesWritable> iterator;
+        Iterator<VKmerBytesWritable> iterator;
         for(int j = 0; j < 5; j++){
             iterator = copyList.iterator();
             byte[] array = kmerList.getPosition(j).toString().getBytes();
-            KmerBytesWritable deletePos = new KmerBytesWritable();
+            VKmerBytesWritable deletePos = new VKmerBytesWritable(5);
             deletePos.setByRead(array, 0);
+            boolean removed = false;
             while(iterator.hasNext()){
                 tmpKmer = iterator.next();
                 if(tmpKmer.equals(deletePos)){
                     iterator.remove();
+                    removed = true;
                     break;
                 }
             }
+            Assert.assertTrue(removed);
             Assert.assertEquals(200 - 1 - j, copyList.getCountOfPosition());
             while(iterator.hasNext()){
                 tmpKmer = iterator.next();
@@ -107,14 +107,13 @@
         
         Assert.assertEquals(0, kmerList.getCountOfPosition());
         
-        KmerBytesWritable.setGlobalKmerLength(3);
         VKmerListWritable edgeList = new VKmerListWritable();
-        KmerBytesWritable k = new KmerBytesWritable();
+        VKmerBytesWritable k = new VKmerBytesWritable(3);
         k.setByRead(("AAA").getBytes(), 0);
         edgeList.append(k);
         k.setByRead(("CCC").getBytes(), 0);
         edgeList.append(k);
-        for(KmerBytesWritable edge : edgeList){
+        for(VKmerBytesWritable edge : edgeList){
         	System.out.println(edge.toString());
         }
     }