Add fracDissimilar to VKmer
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
index 71efb5f..f6061fd 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
@@ -675,7 +675,12 @@
}
return new KmerBytesWritable(bytes, kmerStartOffset);
}
-
+
+ /**
+ * return the edit distance required to transform kemr1 into kmer2 using substitutions, insertions, and deletions.
+ *
+ * This uses the classic dynamic programming algorithm and takes O(length_1 * length_2) time and space.
+ */
public static int editDistance(VKmerBytesWritable kmer1, VKmerBytesWritable kmer2) {
int rows = kmer1.getKmerLetterLength() + 1, columns = kmer2.getKmerLetterLength() + 1, r=0, c=0, match=0;
int[][] distMat = new int[rows][columns];
@@ -703,9 +708,25 @@
private static int min(int a, int b, int c) {
return a <= b ? (a <= c ? a : c) : (b <= c ? b : c);
}
+ private static int min(int a, int b) {
+ return a <= b ? a : b;
+ }
public int editDistance(VKmerBytesWritable other) {
return editDistance(this, other);
}
+
+ /**
+ * return the fractional difference between the given kmers. This is the edit distance divided by the smaller length.
+ *
+ * Note: the fraction may be larger than 1 (when the edit distance is larger than the kmer)
+ */
+ public static float fracDissimilar(VKmerBytesWritable kmer1, VKmerBytesWritable kmer2) {
+ return editDistance(kmer1, kmer2) / (float) min(kmer1.getKmerLetterLength(), kmer2.getKmerLetterLength());
+ }
+
+ public float fracDissimilar(VKmerBytesWritable other) {
+ return fracDissimilar(this, other);
+ }
}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
index 7460776..9bf728d 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
@@ -564,6 +564,7 @@
Assert.assertEquals(kmer1.editDistance(kmer2), 3);
Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+ Assert.assertEquals(kmer1.fracDissimilar(kmer2), .75f);
kmer1.setAsCopy("");
Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.getKmerLetterLength());
@@ -572,6 +573,8 @@
kmer2.setAsCopy("");
Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.getKmerLetterLength());
Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+
+
}
}