Merge branch 'anbangx/fullstack_genomix' into genomix/fullstack_genomix
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
index af5b2ed..35241a2 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
@@ -505,6 +505,9 @@
* : the next kmer
*/
public void mergeWithFFKmer(int initialKmerSize, VKmerBytesWritable kmer) {
+ if (lettersInKmer < initialKmerSize - 1 || kmer.lettersInKmer < initialKmerSize - 1) {
+ throw new IllegalArgumentException("Not enough letters in the kmers to perform a merge! Tried K=" + initialKmerSize + ", merge '" + this + "' with '" + kmer + "'.");
+ }
int preKmerLength = lettersInKmer;
int preSize = bytesUsed;
lettersInKmer += kmer.lettersInKmer - initialKmerSize + 1;
@@ -538,6 +541,9 @@
* : the next kmer
*/
public void mergeWithFRKmer(int initialKmerSize, VKmerBytesWritable kmer) {
+ if (lettersInKmer < initialKmerSize - 1 || kmer.lettersInKmer < initialKmerSize - 1) {
+ throw new IllegalArgumentException("Not enough letters in the kmers to perform a merge! Tried K=" + initialKmerSize + ", merge '" + this + "' with '" + kmer + "'.");
+ }
int preSize = bytesUsed;
int preKmerLength = lettersInKmer;
lettersInKmer += kmer.lettersInKmer - initialKmerSize + 1;
@@ -605,6 +611,9 @@
* : the previous kmer
*/
public void mergeWithRRKmer(int initialKmerSize, VKmerBytesWritable preKmer) {
+ if (lettersInKmer < initialKmerSize - 1 || preKmer.lettersInKmer < initialKmerSize - 1) {
+ throw new IllegalArgumentException("Not enough letters in the kmers to perform a merge! Tried K=" + initialKmerSize + ", merge '" + this + "' with '" + preKmer + "'.");
+ }
int preKmerLength = lettersInKmer;
int preSize = bytesUsed;
lettersInKmer += preKmer.lettersInKmer - initialKmerSize + 1;
@@ -665,5 +674,37 @@
}
return new KmerBytesWritable(bytes, kmerStartOffset);
}
+
+ public static int editDistance(VKmerBytesWritable kmer1, VKmerBytesWritable kmer2) {
+ int rows = kmer1.getKmerLetterLength() + 1, columns = kmer2.getKmerLetterLength() + 1, r=0, c=0, match=0;
+ int[][] distMat = new int[rows][columns];
+
+ // initialize top row and left column
+ for (r = 0; r < rows; r++) {
+ distMat[r][0] = r;
+ }
+ for (c = 0; c < columns; c++) {
+ distMat[0][c] = c;
+ }
+
+ // fill out the matrix as the min of left+1, up+1, and diag+nomatch
+ for (r = 1; r < rows; r++) {
+ for (c = 1; c < columns; c++) {
+ match = kmer1.getGeneCodeAtPosition(r-1) == kmer2.getGeneCodeAtPosition(c-1) ? 0 : 1;
+ distMat[r][c] = min(distMat[r-1][c] + 1,
+ distMat[r][c-1] + 1,
+ distMat[r-1][c-1] + match);
+ }
+ }
+ return distMat[rows - 1][columns - 1];
+ }
+
+ private static int min(int a, int b, int c) {
+ return a <= b ? (a <= c ? a : c) : (b <= c ? b : c);
+ }
+
+ public int editDistance(VKmerBytesWritable other) {
+ return editDistance(this, other);
+ }
}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
index 4a52558..7460776 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
@@ -104,7 +104,7 @@
String string = "AGCTGACCGT";
for (int k = 3; k <= 10; k++) {
VKmerBytesWritable kmer = new VKmerBytesWritable();
- VKmerBytesWritable kmerAppend = new VKmerBytesWritable();
+ VKmerBytesWritable kmerAppend = new VKmerBytesWritable(k);
kmer.setByRead(k, array, 0);
Assert.assertEquals(string.substring(0, k), kmer.toString());
for (int b = 0; b < k; b++) {
@@ -157,7 +157,7 @@
text2 = text.substring(0, jk);
Assert.assertEquals(text1, kmer1.toString());
Assert.assertEquals(text2, kmer2.toString());
- for (int x = 1; x < jk; x++) {
+ for (int x = 1; x < (jk < ik ? jk : ik); x++) {
merge.setAsCopy(kmer1);
merge.mergeWithFFKmer(x, kmer2);
Assert.assertEquals(text1 + text2.substring(x - 1), merge.toString());
@@ -184,6 +184,7 @@
Assert.assertEquals(text2, kmer2.toString());
VKmerBytesWritable merge = new VKmerBytesWritable();
+ merge.setAsCopy(kmer1);
merge.mergeWithFRKmer(kmerSize, kmer2);
Assert.assertEquals(result, merge.toString());
@@ -221,6 +222,7 @@
Assert.assertEquals(text2, kmer2.toString());
VKmerBytesWritable merge = new VKmerBytesWritable();
+ merge.setAsCopy(kmer1);
merge.mergeWithRFKmer(kmerSize, kmer2);
Assert.assertEquals(result, merge.toString());
@@ -333,7 +335,7 @@
text2 = text.substring(0, jk);
Assert.assertEquals(text1, kmer1.toString());
Assert.assertEquals(text2, kmer2.toString());
- for (int x = 1; x < ik; x++) {
+ for (int x = 1; x < (ik < jk ? ik : jk); x++) {
merge.setAsCopy(kmer2);
merge.mergeWithRRKmer(x, kmer1);
Assert.assertEquals(text1.substring(0, text1.length() - x + 1) + text2, merge.toString());
@@ -554,5 +556,22 @@
System.out.println("CTA = " + map.get(kmerList.getPosition(0)).toString());
System.out.println("GTA = " + map.get(kmerList.getPosition(1)).toString());
}
+
+ @Test
+ public void TestEditDistance() {
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable("ACGT");
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable("AAAACGT");
+
+ Assert.assertEquals(kmer1.editDistance(kmer2), 3);
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+
+ kmer1.setAsCopy("");
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.getKmerLetterLength());
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+
+ kmer2.setAsCopy("");
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.getKmerLetterLength());
+ Assert.assertEquals(kmer1.editDistance(kmer2), kmer2.editDistance(kmer1));
+ }
}