Use Hashset for unionUpdate-ing with another KmerListWritable
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
index 00ed2b7..6bf8dac 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
@@ -4,6 +4,7 @@
 import java.io.DataOutput;
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 
@@ -12,225 +13,256 @@
 import edu.uci.ics.genomix.data.KmerUtil;
 import edu.uci.ics.genomix.data.Marshal;
 
-public class KmerListWritable implements Writable, Iterable<KmerBytesWritable>, Serializable{
-    private static final long serialVersionUID = 1L;
-    private static final byte[] EMPTY_BYTES = {0,0,0,0};
-    
-    private byte[] storage;
-    private int offset;
-    private int valueCount;
-    
-    private int kmerByteSize = 0; 
-    private int kmerlength = 0;
-    private KmerBytesWritable posIter = new KmerBytesWritable();
-    
-    public KmerListWritable() {
-        this.storage = EMPTY_BYTES;
-        this.valueCount = 0;
-        this.offset = 0;
-    }
-    
-    public KmerListWritable(int kmerlength) {
-        this();
-        this.kmerlength = kmerlength;
-        this.kmerByteSize = KmerUtil.getByteNumFromK(kmerlength);
-    }
-    
-    public KmerListWritable(int kmerlength, int count, byte[] data, int offset) {
-        this.kmerlength = kmerlength;
-        this.kmerByteSize = KmerUtil.getByteNumFromK(kmerlength);
-        setNewReference(count, data, offset);
-    }
-    
-    public KmerListWritable(List<KmerBytesWritable> kmers) {
-        this();
-        setSize(kmers.size());  // reserve space for all elements
-        for (KmerBytesWritable kmer : kmers) {
-            append(kmer);
-        }
-    }
-    
-    public void setNewReference(int count, byte[] data, int offset) {
-        this.valueCount = count;
-        this.storage = data;
-        this.offset = offset;
-    }
-    
-    public void append(KmerBytesWritable kmer){
-        if(kmer != null){
-            kmerByteSize = kmer.kmerByteSize;
-            kmerlength = kmer.kmerlength;
-            setSize((1 + valueCount) * kmerByteSize); 
-            System.arraycopy(kmer.getBytes(), 0, storage, offset + valueCount * kmerByteSize, kmerByteSize);
-            valueCount += 1;
-        }
-    }
-    
-    /*
-     * Append the otherList to the end of myList
-     */
-    public void appendList(KmerListWritable otherList) {
-        if (otherList.valueCount > 0) {
-            setSize((valueCount + otherList.valueCount) * kmerByteSize);
-            // copy contents of otherList into the end of my storage
-            System.arraycopy(otherList.storage, otherList.offset,
-                    storage, offset + valueCount * kmerByteSize, 
-                    otherList.valueCount * kmerByteSize);
-            valueCount += otherList.valueCount;
-        }
-    }
-    
-    protected void setSize(int size) {
-        if (size > getCapacity()) {
-            setCapacity((size * 3 / 2));
-        }
-    }
-    
-    protected int getCapacity() {
-        return storage.length - offset;
-    }
 
-    protected void setCapacity(int new_cap) {
-        if (new_cap > getCapacity()) {
-            byte[] new_data = new byte[new_cap];
-            if (storage.length - offset > 0) {
-                System.arraycopy(storage, offset, new_data, 0, storage.length - offset);
-            }
-            storage = new_data;
-            offset = 0;
-        }
-    }
-    
-    public void reset() {
-        this.reset(0);
-    }
-    
-    public void reset(int kmerSize) {
-        kmerlength = kmerSize;
-        kmerByteSize = KmerUtil.getByteNumFromK(kmerlength);
-        storage = EMPTY_BYTES;
-        valueCount = 0;
-        offset = 0;
-    }
-    
-    public KmerBytesWritable getPosition(int i) {
-        if (i >= valueCount) {
-            throw new ArrayIndexOutOfBoundsException("No such positions");
-        }
-        posIter.setAsReference(kmerlength, storage, offset + i * kmerByteSize);
-        return posIter;
-    }
-    
-    public void set(KmerListWritable otherList) {
-        this.kmerlength = otherList.kmerlength;
-        this.kmerByteSize = otherList.kmerByteSize;
-        set(otherList.valueCount, otherList.storage, otherList.offset);
-    }
+/**
+ * A list of fixed-length kmers.  The length of this list is stored internally 
+ *
+ */
+public class KmerListWritable implements Writable, Iterable<KmerBytesWritable>,
+		Serializable {
+	private static final long serialVersionUID = 1L;
+	protected static final byte[] EMPTY_BYTES = { 0, 0, 0, 0 };
+	protected static final int HEADER_SIZE = 4;
 
-    public void set(int valueCount, byte[] newData, int offset) {
-        this.valueCount = valueCount;
-        setSize(valueCount * kmerByteSize);
-        if (valueCount > 0) {
-            System.arraycopy(newData, offset, storage, this.offset, valueCount * kmerByteSize);
-        }
-    }
-    
-    @Override
-    public Iterator<KmerBytesWritable> iterator() {
-        Iterator<KmerBytesWritable> it = new Iterator<KmerBytesWritable>() {
+	protected byte[] storage;
+	protected int offset;
+	protected int valueCount;
 
-            private int currentIndex = 0;
+	protected int bytesPerKmer = 0;
+	protected int lettersPerKmer = 0;
+	private KmerBytesWritable posIter = new KmerBytesWritable();
 
-            @Override
-            public boolean hasNext() {
-                return currentIndex < valueCount;
-            }
+	public KmerListWritable() {
+		this.storage = EMPTY_BYTES;
+		this.valueCount = 0;
+		this.offset = 0;
+	}
 
-            @Override
-            public KmerBytesWritable next() {
-                return getPosition(currentIndex++);
-            }
+	public KmerListWritable(int kmerlength) {
+		this();
+		this.lettersPerKmer = kmerlength;
+		this.bytesPerKmer = KmerUtil.getByteNumFromK(kmerlength);
+	}
 
-            @Override
-            public void remove() {
-                if(currentIndex < valueCount)
-                    System.arraycopy(storage, offset + currentIndex * kmerByteSize, 
-                          storage, offset + (currentIndex - 1) * kmerByteSize, 
-                          (valueCount - currentIndex) * kmerByteSize);
-                valueCount--;
-                currentIndex--;
-            }
-        };
-        return it;
-    }
-    
-    /*
-     * remove the first instance of @toRemove. Uses a linear scan.  Throws an exception if not in this list.
-     */
-    public void remove(KmerBytesWritable toRemove, boolean ignoreMissing) {
-        Iterator<KmerBytesWritable> posIterator = this.iterator();
-        while (posIterator.hasNext()) {
-            if(toRemove.equals(posIterator.next())) {
-                posIterator.remove();
-                return;
-            }
-        }
-        if (!ignoreMissing) {
-            throw new ArrayIndexOutOfBoundsException("the KmerBytesWritable `" + toRemove.toString() + "` was not found in this list.");
-        }
-    }
-    
-    public void remove(KmerBytesWritable toRemove) {
-        remove(toRemove, false);
-    }
+	public KmerListWritable(int kmerlength, byte[] data, int offset) {
+		this.lettersPerKmer = kmerlength;
+		this.bytesPerKmer = KmerUtil.getByteNumFromK(kmerlength);
+		setNewReference(data, offset);
+	}
 
-    @Override
-    public void readFields(DataInput in) throws IOException {
-        this.valueCount = in.readInt();
-        setSize(valueCount * kmerByteSize);//kmerByteSize
-        in.readFully(storage, offset, valueCount * kmerByteSize);//kmerByteSize
-    }
+	public KmerListWritable(List<KmerBytesWritable> kmers) {
+		this();
+		setSize(kmers.size()); // reserve space for all elements
+		for (KmerBytesWritable kmer : kmers) {
+			if (kmer.getKmerLength() != lettersPerKmer)
+				throw new IllegalArgumentException("Kmer " + kmer.toString()
+						+ " is of incorrect length (l=" + kmer.getKmerLength()
+						+ ") for this list (should be " + lettersPerKmer + ").");
+			append(kmer);
+		}
+	}
 
-    @Override
-    public void write(DataOutput out) throws IOException {
-        out.writeInt(valueCount);
-        out.write(storage, offset, valueCount * kmerByteSize);
-    }
-    
-    public int getCountOfPosition() {
-        return valueCount;
-    }
+	public void setNewReference(byte[] data, int offset) {
+		this.valueCount = Marshal.getInt(data, offset);
+		this.storage = data;
+		this.offset = offset;
+	}
 
-    public byte[] getByteArray() {
-        return storage;
-    }
+	public void append(KmerBytesWritable kmer) {
+		setSize((1 + valueCount) * bytesPerKmer);
+		System.arraycopy(kmer.getBytes(), 0, storage, offset + valueCount
+				* bytesPerKmer, bytesPerKmer);
+		valueCount += 1;
+	}
 
-    public int getStartOffset() {
-        return offset;
-    }
-    
-    public int getLength() {
-        return valueCount * kmerByteSize;
-    }
-    
-    @Override
-    public String toString() {
-        StringBuilder sbuilder = new StringBuilder();
-        sbuilder.append('[');
-        for(int i = 0; i < valueCount; i++){
-            sbuilder.append(getPosition(i).toString());
-            sbuilder.append(',');
-        }
-        if (valueCount > 0) {
-            sbuilder.setCharAt(sbuilder.length() - 1, ']');
-        } else {
-            sbuilder.append(']');
-        }
-        return sbuilder.toString();
-    }
-    
-    @Override
-    public int hashCode() {
-        return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
-    }
+	/*
+	 * Append the otherList to the end of myList
+	 */
+	public void appendList(KmerListWritable otherList) {
+		if (otherList.valueCount > 0) {
+			setSize((valueCount + otherList.valueCount) * bytesPerKmer);
+			// copy contents of otherList into the end of my storage
+			System.arraycopy(otherList.storage, otherList.offset, storage,
+					offset + valueCount * bytesPerKmer, otherList.valueCount
+							* bytesPerKmer);
+			valueCount += otherList.valueCount;
+		}
+	}
+
+	/**
+	 * Save the union of my list and otherList. Uses a temporary HashSet for
+	 * uniquefication
+	 */
+	public void unionUpdate(KmerListWritable otherList) {
+		int newSize = valueCount + otherList.valueCount;
+		HashSet<KmerBytesWritable> uniqueElements = new HashSet<KmerBytesWritable>(
+				newSize);
+		for (KmerBytesWritable kmer : this) {
+			uniqueElements.add(kmer);
+		}
+		for (KmerBytesWritable kmer : otherList) {
+			uniqueElements.add(kmer);
+		}
+		valueCount = 0;
+		setSize(newSize);
+		for (KmerBytesWritable kmer : uniqueElements) {
+			append(kmer);
+		}
+	}
+
+	protected void setSize(int size) {
+		if (size > getCapacity()) {
+			setCapacity((size * 3 / 2));
+		}
+	}
+
+	protected int getCapacity() {
+		return storage.length - offset;
+	}
+
+	protected void setCapacity(int new_cap) {
+		if (new_cap > getCapacity()) {
+			byte[] new_data = new byte[new_cap];
+			if (storage.length - offset > 0) {
+				System.arraycopy(storage, offset, new_data, 0, storage.length
+						- offset);
+			}
+			storage = new_data;
+			offset = 0;
+		}
+	}
+
+	public void reset(int kmerSize) {
+		lettersPerKmer = kmerSize;
+		bytesPerKmer = KmerUtil.getByteNumFromK(lettersPerKmer);
+		storage = EMPTY_BYTES;
+		valueCount = 0;
+		offset = 0;
+	}
+
+	public KmerBytesWritable getPosition(int i) {
+		if (i >= valueCount) {
+			throw new ArrayIndexOutOfBoundsException("No such positions");
+		}
+		posIter.setAsReference(lettersPerKmer, storage, offset + i
+				* bytesPerKmer);
+		return posIter;
+	}
+
+	public void set(KmerListWritable otherList) {
+		this.lettersPerKmer = otherList.lettersPerKmer;
+		this.bytesPerKmer = otherList.bytesPerKmer;
+		set(otherList.valueCount, otherList.storage, otherList.offset);
+	}
+
+	public void set(int valueCount, byte[] newData, int offset) {
+		this.valueCount = valueCount;
+		setSize(valueCount * bytesPerKmer);
+		if (valueCount > 0) {
+			System.arraycopy(newData, offset, storage, this.offset, valueCount
+					* bytesPerKmer);
+		}
+	}
+
+	@Override
+	public Iterator<KmerBytesWritable> iterator() {
+		Iterator<KmerBytesWritable> it = new Iterator<KmerBytesWritable>() {
+
+			private int currentIndex = 0;
+
+			@Override
+			public boolean hasNext() {
+				return currentIndex < valueCount;
+			}
+
+			@Override
+			public KmerBytesWritable next() {
+				return getPosition(currentIndex++);
+			}
+
+			@Override
+			public void remove() {
+				if (currentIndex < valueCount)
+					System.arraycopy(storage, offset + currentIndex
+							* bytesPerKmer, storage, offset
+							+ (currentIndex - 1) * bytesPerKmer,
+							(valueCount - currentIndex) * bytesPerKmer);
+				valueCount--;
+				currentIndex--;
+			}
+		};
+		return it;
+	}
+
+	/*
+	 * remove the first instance of `toRemove`. Uses a linear scan. Throws an
+	 * exception if not in this list.
+	 */
+	public void remove(KmerBytesWritable toRemove, boolean ignoreMissing) {
+		Iterator<KmerBytesWritable> posIterator = this.iterator();
+		while (posIterator.hasNext()) {
+			if (toRemove.equals(posIterator.next())) {
+				posIterator.remove();
+				return;
+			}
+		}
+		if (!ignoreMissing) {
+			throw new ArrayIndexOutOfBoundsException("the KmerBytesWritable `"
+					+ toRemove.toString() + "` was not found in this list.");
+		}
+	}
+
+	public void remove(KmerBytesWritable toRemove) {
+		remove(toRemove, false);
+	}
+
+	@Override
+	public void readFields(DataInput in) throws IOException {
+		this.valueCount = in.readInt();
+		setSize(valueCount * bytesPerKmer);// kmerByteSize
+		in.readFully(storage, offset, valueCount * bytesPerKmer);// kmerByteSize
+	}
+
+	@Override
+	public void write(DataOutput out) throws IOException {
+		out.writeInt(valueCount);
+		out.write(storage, offset, valueCount * bytesPerKmer);
+	}
+
+	public int getCountOfPosition() {
+		return valueCount;
+	}
+
+	public byte[] getByteArray() {
+		return storage;
+	}
+
+	public int getStartOffset() {
+		return offset;
+	}
+
+	public int getLength() {
+		return valueCount * bytesPerKmer;
+	}
+
+	@Override
+	public String toString() {
+		StringBuilder sbuilder = new StringBuilder();
+		sbuilder.append('[');
+		for (int i = 0; i < valueCount; i++) {
+			sbuilder.append(getPosition(i).toString());
+			sbuilder.append(',');
+		}
+		if (valueCount > 0) {
+			sbuilder.setCharAt(sbuilder.length() - 1, ']');
+		} else {
+			sbuilder.append(']');
+		}
+		return sbuilder.toString();
+	}
+
+	@Override
+	public int hashCode() {
+		return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
+	}
 }
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
index 0e26b77..c6360a9 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
@@ -18,20 +18,17 @@
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
-import java.io.Serializable;
 
-import org.apache.hadoop.io.BinaryComparable;
-import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.WritableComparator;
 
 import edu.uci.ics.genomix.data.KmerUtil;
 import edu.uci.ics.genomix.data.Marshal;
-import edu.uci.ics.genomix.oldtype.NodeWritable.DirectionFlag;
 
 /**
- * Variable kmer length byteswritable It was used to generate the graph in which
- * phase the kmer length doesn't change. Thus the kmerByteSize of bytes doesn't
- * change either.
+ * Variable-length kmer which stores its length internally.
+ * 
+ * Note: `offset` as used in this class is the offset at which the *kmer*
+ * begins. There is a {@value HEADER_SIZE}-byte header preceding the kmer
  */
 public class VKmerBytesWritable extends KmerBytesWritable {
 
@@ -126,7 +123,8 @@
 	public void setAsCopy(byte[] newData, int offset) {
 		int k = Marshal.getInt(newData, offset);
 		reset(k);
-		System.arraycopy(newData, offset + HEADER_SIZE, bytes, this.offset, bytesUsed);
+		System.arraycopy(newData, offset + HEADER_SIZE, bytes, this.offset,
+				bytesUsed);
 	}
 
 	/**
@@ -156,12 +154,12 @@
 		this.lettersInKmer = k;
 		Marshal.putInt(k, bytes, offset - HEADER_SIZE);
 	}
-	
+
 	@Override
 	protected int getCapacity() {
 		return bytes.length - HEADER_SIZE;
 	}
-	
+
 	@Override
 	protected void setCapacity(int new_cap) {
 		if (new_cap != getCapacity()) {
@@ -170,14 +168,14 @@
 				bytesUsed = new_cap;
 			}
 			if (bytesUsed != 0) {
-				System.arraycopy(bytes, offset, new_data, HEADER_SIZE, bytesUsed);
+				System.arraycopy(bytes, offset, new_data, HEADER_SIZE,
+						bytesUsed);
 			}
 			bytes = new_data;
 			offset = HEADER_SIZE;
 		}
 	}
 
-
 	@Override
 	public void mergeWithFFKmer(int initialKmerSize, KmerBytesWritable kmer) {
 		super.mergeWithFFKmer(initialKmerSize, kmer);