Add edit distance computation for VKmers
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
index af5b2ed..17132e4 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
@@ -665,5 +665,37 @@
}
return new KmerBytesWritable(bytes, kmerStartOffset);
}
+
+ public static int editDistance(VKmerBytesWritable kmer1, VKmerBytesWritable kmer2) {
+ int rows = kmer1.getKmerLetterLength() + 1, columns = kmer2.getKmerLetterLength() + 1, r=0, c=0, match=0;
+ int[][] distMat = new int[rows][columns];
+
+ // initialize top row and left column
+ for (r = 0; r < rows; r++) {
+ distMat[r][0] = r;
+ }
+ for (c = 0; c < columns; c++) {
+ distMat[0][c] = c;
+ }
+
+ // fill out the matrix as the min of left+1, up+1, and diag+nomatch
+ for (r = 1; r < rows; r++) {
+ for (c = 1; c < columns; c++) {
+ match = kmer1.getGeneCodeAtPosition(r-1) == kmer2.getGeneCodeAtPosition(c-1) ? 0 : 1;
+ distMat[r][c] = min(distMat[r-1][c] + 1,
+ distMat[r][c-1] + 1,
+ distMat[r-1][c-1] + match);
+ }
+ }
+ return distMat[rows - 1][columns - 1];
+ }
+
+ private static int min(int a, int b, int c) {
+ return a <= b ? (a <= c ? a : c) : (b <= c ? b : c);
+ }
+
+ public int editDistance(VKmerBytesWritable other) {
+ return editDistance(this, other);
+ }
}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
index 4a52558..850d24f 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
@@ -104,7 +104,7 @@
String string = "AGCTGACCGT";
for (int k = 3; k <= 10; k++) {
VKmerBytesWritable kmer = new VKmerBytesWritable();
- VKmerBytesWritable kmerAppend = new VKmerBytesWritable();
+ VKmerBytesWritable kmerAppend = new VKmerBytesWritable(k);
kmer.setByRead(k, array, 0);
Assert.assertEquals(string.substring(0, k), kmer.toString());
for (int b = 0; b < k; b++) {
@@ -554,5 +554,22 @@
System.out.println("CTA = " + map.get(kmerList.getPosition(0)).toString());
System.out.println("GTA = " + map.get(kmerList.getPosition(1)).toString());
}
+
+ @Test
+ public void TestEditDistance() {
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable("ACGT");
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable("AAAACGT");
+
+ Assert.assertEquals(kmer1.editDistance(kmer2), 3);
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+
+ kmer1.setAsCopy("");
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.getKmerLetterLength());
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+
+ kmer2.setAsCopy("");
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.getKmerLetterLength());
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+ }
}