Merge and fix FRMerge. RF still doesn't work.
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
index 416ab49..be65d55 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
@@ -15,6 +15,8 @@
package edu.uci.ics.genomix.type;
+import javax.management.RuntimeErrorException;
+
public class GeneCode {
public final static byte[] GENE_SYMBOL = { 'A', 'C', 'G', 'T' };
/**
@@ -51,7 +53,7 @@
public static byte getPairedGeneCode(byte genecode){
if ( genecode < 0 || genecode > 3){
- throw new IllegalArgumentException("Invalid genecode");
+ throw new IllegalArgumentException("Invalid genecode: " + genecode);
}
return (byte) (3- genecode);
}
@@ -66,4 +68,30 @@
}
return GENE_SYMBOL[code];
}
+
+ public static String reverseComplement(String kmer) {
+ StringBuilder sb = new StringBuilder();
+ for (char letter : kmer.toCharArray()) {
+ sb.append(complement(letter));
+ }
+ return sb.reverse().toString();
+ }
+
+ public static char complement(char ch) {
+ switch (ch) {
+ case 'A':
+ case 'a':
+ return 'T';
+ case 'C':
+ case 'c':
+ return 'G';
+ case 'G':
+ case 'g':
+ return 'C';
+ case 'T':
+ case 't':
+ return 'A';
+ }
+ throw new RuntimeException("Invalid character given in complement: " + ch);
+ }
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
index 1a875d4..46e1065 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
@@ -267,7 +267,8 @@
byte l = 0;
int bytecount = 0;
int bcount = size - 1;
- for (int i = start + kmerlength - 1; i >= 0 && i < array.length; i--) {
+// for (int i = start + kmerlength - 1; i >= 0 && i < array.length; i--) {
+ for (int i = start + kmerlength - 1; i >= start && i < array.length; i--) {
byte code = GeneCode.getPairedCodeFromSymbol(array[i]);
l |= (byte) (code << bytecount);
bytecount += 2;
@@ -384,36 +385,32 @@
* : the next kmer
*/
public void mergeWithFRKmer(int initialKmerSize, KmerBytesWritable kmer) {
- int preKmerLength = kmerlength;
int preSize = size;
+ int preKmerLength = kmerlength;
this.kmerlength += kmer.kmerlength - initialKmerSize + 1;
setSize(KmerUtil.getByteNumFromK(kmerlength));
-
// copy prefix into right-side of buffer
for (int i = 1; i <= preSize; i++) {
bytes[offset + size - i] = bytes[offset + preSize - i];
}
-
- // copy complement of suffix in reverse order into left side of buffer.
- // we read two bits (one letter) at a time from leading bits of kmer, copying their complement
- // into my trailing bits
- byte destByte = 0x00;
- int destPosn = 0;
- for (; destPosn < kmer.getKmerLength(); destPosn++) {
- // srcPosn starts at the end of kmer, but excludes the last (initialKmerSize - 1) letters
- // there are +1 and -1 terms in there that cancel out :P
- int srcPosn = kmer.getKmerLength() - destPosn - initialKmerSize;
- byte compLetter = GeneCode.getPairedCodeFromSymbol(kmer.getGeneCodeAtPosition(srcPosn));
- if ((destPosn % 4) == 0 && destPosn >= 4) {
- // byte is full. write the complete byte to storage
- bytes[offset + preSize - (destPosn / 4)] = destByte;
- destByte &= 0x00;
- }
- destByte = (byte) ((destByte << 2) | compLetter);
- }
- // fill in the leading, partial byte
- bytes[offset + preSize - (destPosn / 4)] = destByte;
- clearLeadBit();
+
+ int bytecount = (preKmerLength % 4) * 2;
+ int bcount = size - preSize - bytecount / 8; // may overlap previous kmer
+ byte l = bcount == size - preSize ? bytes[offset + bcount] : 0x00;
+ bytecount %= 8;
+ for (int i = kmer.kmerlength - initialKmerSize; i >= 0; i--) {
+ byte code = GeneCode.getPairedGeneCode(kmer.getGeneCodeAtPosition(i));
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[offset + bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[offset] = l;
+ }
}
/**
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
index 8438a44..c172994 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
@@ -15,6 +15,10 @@
package edu.uci.ics.genomix.data.test;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+
import junit.framework.Assert;
import org.junit.Test;
@@ -115,7 +119,7 @@
}
@Test
- public void TestMergeNextKmer() {
+ public void TestMergeFFKmer() {
byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
String text = "AGCTGACCGT";
KmerBytesWritable kmer1 = new KmerBytesWritable(8);
@@ -154,9 +158,86 @@
}
}
}
+
+ @Test
+ public void TestMergeFRKmer() {
+ int kmerSize = 3;
+ String result = "AAGCTAACAACC";
+ byte[] resultArray = result.getBytes();
+
+ String text1 = "AAGCTAA";
+ KmerBytesWritable kmer1 = new KmerBytesWritable(text1.length());
+ kmer1.setByRead(resultArray, 0);
+ Assert.assertEquals(text1, kmer1.toString());
+
+ // kmer2 is the rc of the end of the read
+ String text2 = "GGTTGTT";
+ KmerBytesWritable kmer2 = new KmerBytesWritable(text2.length());
+ kmer2.setByReadReverse(resultArray, result.length() - text2.length());
+ Assert.assertEquals(text2, kmer2.toString());
+
+ KmerBytesWritable merge = new KmerBytesWritable(kmer1);
+ merge.mergeWithFRKmer(kmerSize, kmer2);
+ Assert.assertEquals(result, merge.toString());
+
+ int i = 1;
+ merge.set(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAAAACAACC", merge.toString());
+
+ i = 2;
+ merge.set(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAAACAACC", merge.toString());
+
+ i = 3;
+ merge.set(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAACAACC", merge.toString());
+ }
+
+
+ @Test
+ public void TestMergeRFKmer() {
+ int kmerSize = 3;
+ String result = "AAGCTAACAACC";
+ byte[] resultArray = result.getBytes();
+
+ String text1 = "AAGCTAA";
+ KmerBytesWritable kmer1 = new KmerBytesWritable(text1.length());
+ kmer1.setByRead(resultArray, 0);
+ Assert.assertEquals(text1, kmer1.toString());
+
+ // kmer2 is the rc of the end of the read
+ String text2 = "GGTTGTT";
+ KmerBytesWritable kmer2 = new KmerBytesWritable(text2.length());
+ kmer2.setByReadReverse(resultArray, result.length() - text2.length());
+ Assert.assertEquals(text2, kmer2.toString());
+
+ KmerBytesWritable merge = new KmerBytesWritable(kmer1);
+ merge.mergeWithFRKmer(kmerSize, kmer2);
+ Assert.assertEquals(result, merge.toString());
+
+ int i = 1;
+ merge.set(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAAAACAACC", merge.toString());
+
+ i = 2;
+ merge.set(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAAACAACC", merge.toString());
+
+ i = 3;
+ merge.set(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAACAACC", merge.toString());
+ }
+
+
@Test
- public void TestMergePreKmer() {
+ public void TestMergeRRKmer() {
byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
String text = "AGCTGACCGT";
KmerBytesWritable kmer1 = new KmerBytesWritable(8);