Merge branch 'anbangx/fullstack_genomix' into genomix/fullstack_genomix
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
index af5b2ed..35241a2 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
@@ -505,6 +505,9 @@
      *            : the next kmer
      */
     public void mergeWithFFKmer(int initialKmerSize, VKmerBytesWritable kmer) {
+    	if (lettersInKmer < initialKmerSize - 1 || kmer.lettersInKmer < initialKmerSize - 1) {
+    		throw new IllegalArgumentException("Not enough letters in the kmers to perform a merge! Tried K=" + initialKmerSize + ", merge '" + this + "' with '" + kmer + "'.");
+    	}
         int preKmerLength = lettersInKmer;
         int preSize = bytesUsed;
         lettersInKmer += kmer.lettersInKmer - initialKmerSize + 1;
@@ -538,6 +541,9 @@
      *            : the next kmer
      */
     public void mergeWithFRKmer(int initialKmerSize, VKmerBytesWritable kmer) {
+    	if (lettersInKmer < initialKmerSize - 1 || kmer.lettersInKmer < initialKmerSize - 1) {
+    		throw new IllegalArgumentException("Not enough letters in the kmers to perform a merge! Tried K=" + initialKmerSize + ", merge '" + this + "' with '" + kmer + "'.");
+    	}
         int preSize = bytesUsed;
         int preKmerLength = lettersInKmer;
         lettersInKmer += kmer.lettersInKmer - initialKmerSize + 1;
@@ -605,6 +611,9 @@
      *            : the previous kmer
      */
     public void mergeWithRRKmer(int initialKmerSize, VKmerBytesWritable preKmer) {
+    	if (lettersInKmer < initialKmerSize - 1 || preKmer.lettersInKmer < initialKmerSize - 1) {
+    		throw new IllegalArgumentException("Not enough letters in the kmers to perform a merge! Tried K=" + initialKmerSize + ", merge '" + this + "' with '" + preKmer + "'.");
+    	}
         int preKmerLength = lettersInKmer;
         int preSize = bytesUsed;
         lettersInKmer += preKmer.lettersInKmer - initialKmerSize + 1;
@@ -665,5 +674,37 @@
         }
         return new KmerBytesWritable(bytes, kmerStartOffset);
     }
+        
+    public static int editDistance(VKmerBytesWritable kmer1, VKmerBytesWritable kmer2) {
+    	int rows = kmer1.getKmerLetterLength() + 1, columns = kmer2.getKmerLetterLength() + 1, r=0, c=0, match=0;
+    	int[][] distMat = new int[rows][columns];
+    	
+    	// initialize top row and left column
+    	for (r = 0; r < rows; r++) {
+    		distMat[r][0] = r;
+    	}
+    	for (c = 0; c < columns; c++) {
+    		distMat[0][c] = c;
+    	}
+    	
+    	// fill out the matrix as the min of left+1, up+1, and diag+nomatch
+    	for (r = 1; r < rows; r++) {
+    		for (c = 1; c < columns; c++) {
+    			match = kmer1.getGeneCodeAtPosition(r-1) == kmer2.getGeneCodeAtPosition(c-1) ? 0 : 1;
+    			distMat[r][c] = min(distMat[r-1][c] + 1,
+    								distMat[r][c-1] + 1,
+    								distMat[r-1][c-1] + match);
+    		}
+    	}
+    	return distMat[rows - 1][columns - 1];
+    }
+    
+    private static int min(int a, int b, int c) {
+    	return a <= b ? (a <= c ? a : c) : (b <= c ? b : c);
+    }
+    
+    public int editDistance(VKmerBytesWritable other) {
+    	return editDistance(this, other);
+    }
 
 }
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
index 4a52558..7460776 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
@@ -104,7 +104,7 @@
         String string = "AGCTGACCGT";
         for (int k = 3; k <= 10; k++) {
             VKmerBytesWritable kmer = new VKmerBytesWritable();
-            VKmerBytesWritable kmerAppend = new VKmerBytesWritable();
+            VKmerBytesWritable kmerAppend = new VKmerBytesWritable(k);
             kmer.setByRead(k, array, 0);
             Assert.assertEquals(string.substring(0, k), kmer.toString());
             for (int b = 0; b < k; b++) {
@@ -157,7 +157,7 @@
                 text2 = text.substring(0, jk);
                 Assert.assertEquals(text1, kmer1.toString());
                 Assert.assertEquals(text2, kmer2.toString());
-                for (int x = 1; x < jk; x++) {
+                for (int x = 1; x < (jk < ik ? jk : ik); x++) {
                     merge.setAsCopy(kmer1);
                     merge.mergeWithFFKmer(x, kmer2);
                     Assert.assertEquals(text1 + text2.substring(x - 1), merge.toString());
@@ -184,6 +184,7 @@
         Assert.assertEquals(text2, kmer2.toString());
 
         VKmerBytesWritable merge = new VKmerBytesWritable();
+        merge.setAsCopy(kmer1);
         merge.mergeWithFRKmer(kmerSize, kmer2);
         Assert.assertEquals(result, merge.toString());
 
@@ -221,6 +222,7 @@
         Assert.assertEquals(text2, kmer2.toString());
 
         VKmerBytesWritable merge = new VKmerBytesWritable();
+        merge.setAsCopy(kmer1);
         merge.mergeWithRFKmer(kmerSize, kmer2);
         Assert.assertEquals(result, merge.toString());
 
@@ -333,7 +335,7 @@
                 text2 = text.substring(0, jk);
                 Assert.assertEquals(text1, kmer1.toString());
                 Assert.assertEquals(text2, kmer2.toString());
-                for (int x = 1; x < ik; x++) {
+                for (int x = 1; x < (ik < jk ? ik : jk); x++) {
                     merge.setAsCopy(kmer2);
                     merge.mergeWithRRKmer(x, kmer1);
                     Assert.assertEquals(text1.substring(0, text1.length() - x + 1) + text2, merge.toString());
@@ -554,5 +556,22 @@
         System.out.println("CTA = " + map.get(kmerList.getPosition(0)).toString());
         System.out.println("GTA = " + map.get(kmerList.getPosition(1)).toString());
     }
+    
+    @Test
+    public void TestEditDistance() {
+    	VKmerBytesWritable kmer1 = new VKmerBytesWritable("ACGT");
+    	VKmerBytesWritable kmer2 = new VKmerBytesWritable("AAAACGT");
+    	
+    	Assert.assertEquals(kmer1.editDistance(kmer2), 3);
+    	Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+    	
+    	kmer1.setAsCopy("");
+    	Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.getKmerLetterLength());
+    	Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+    	
+    	kmer2.setAsCopy("");
+    	Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.getKmerLetterLength());
+    	Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+    }
 
 }