refactor kmer mergeNext and mergePre -> mergeFF etc. untested :)
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
index 50baeb4..1a875d4 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
@@ -358,7 +358,7 @@
* @param kmer
* : the next kmer
*/
- public void mergeNextKmer(int initialKmerSize, KmerBytesWritable kmer) {
+ public void mergeWithFFKmer(int initialKmerSize, KmerBytesWritable kmer) {
int preKmerLength = kmerlength;
int preSize = size;
this.kmerlength += kmer.kmerlength - initialKmerSize + 1;
@@ -372,8 +372,100 @@
}
clearLeadBit();
}
+
+ /**
+ * Merge Kmer with the next connected Kmer, when that Kmer needs to be reverse-complemented
+ * e.g. AAGCTAA merge with GGTTGTT, if the initial kmerSize = 3
+ * then it will return AAGCTAACAACC
+ *
+ * @param initialKmerSize
+ * : the initial kmerSize
+ * @param kmer
+ * : the next kmer
+ */
+ public void mergeWithFRKmer(int initialKmerSize, KmerBytesWritable kmer) {
+ int preKmerLength = kmerlength;
+ int preSize = size;
+ this.kmerlength += kmer.kmerlength - initialKmerSize + 1;
+ setSize(KmerUtil.getByteNumFromK(kmerlength));
+
+ // copy prefix into right-side of buffer
+ for (int i = 1; i <= preSize; i++) {
+ bytes[offset + size - i] = bytes[offset + preSize - i];
+ }
+
+ // copy complement of suffix in reverse order into left side of buffer.
+ // we read two bits (one letter) at a time from leading bits of kmer, copying their complement
+ // into my trailing bits
+ byte destByte = 0x00;
+ int destPosn = 0;
+ for (; destPosn < kmer.getKmerLength(); destPosn++) {
+ // srcPosn starts at the end of kmer, but excludes the last (initialKmerSize - 1) letters
+ // there are +1 and -1 terms in there that cancel out :P
+ int srcPosn = kmer.getKmerLength() - destPosn - initialKmerSize;
+ byte compLetter = GeneCode.getPairedCodeFromSymbol(kmer.getGeneCodeAtPosition(srcPosn));
+ if ((destPosn % 4) == 0 && destPosn >= 4) {
+ // byte is full. write the complete byte to storage
+ bytes[offset + preSize - (destPosn / 4)] = destByte;
+ destByte &= 0x00;
+ }
+ destByte = (byte) ((destByte << 2) | compLetter);
+ }
+ // fill in the leading, partial byte
+ bytes[offset + preSize - (destPosn / 4)] = destByte;
+ clearLeadBit();
+ }
/**
+ * Merge Kmer with the previous connected Kmer, when that kmer needs to be reverse-complemented
+ * e.g. AACAACC merge with TTCTGCC, if the initial kmerSize = 3
+ * then it will return GGCAGAACAACC
+ *
+ * @param initialKmerSize
+ * : the initial kmerSize
+ * @param preKmer
+ * : the previous kmer
+ */
+ public void mergeWithRFKmer(int initialKmerSize, KmerBytesWritable preKmer) {
+ int preKmerLength = kmerlength;
+ int preSize = size;
+ this.kmerlength += preKmer.kmerlength - initialKmerSize + 1;
+ setSize(KmerUtil.getByteNumFromK(kmerlength));
+ byte cacheByte = getOneByteFromKmerAtPosition(0, bytes, offset, preSize);
+
+ // copy reverse complement of prekmer
+ // copy complement of suffix in reverse order into left side of buffer.
+ // we read two bits (one letter) at a time from leading bits of kmer, copying their complement
+ // into my trailing bits
+ byte destByte = 0x00;
+ int destPosn = 0;
+ for (; destPosn < preKmer.getKmerLength(); destPosn++) {
+ // srcPosn starts at the end of kmer
+ int srcPosn = preKmer.getKmerLength() - destPosn - 1;
+ byte compLetter = GeneCode.getPairedCodeFromSymbol(preKmer.getGeneCodeAtPosition(srcPosn));
+ if ((destPosn % 4) == 0 && destPosn >= 4) {
+ // byte is full. write the complete byte to storage
+ bytes[offset + size - (destPosn / 4)] = destByte;
+ destByte &= 0x00;
+ }
+ destByte = (byte) ((destByte << 2) | compLetter);
+ }
+ // fill in the leading, partial byte
+ bytes[offset + preSize - (destPosn / 4)] = destByte;
+
+ // copy current kmer
+ int k = 4;
+ for (; k < preKmerLength; k += 4) {
+ byte onebyte = getOneByteFromKmerAtPosition(k, bytes, offset, preSize);
+ appendOneByteAtPosition(preKmer.kmerlength - initialKmerSize + k - 4 + 1, cacheByte, bytes, offset, size);
+ cacheByte = onebyte;
+ }
+ appendOneByteAtPosition(preKmer.kmerlength - initialKmerSize + k - 4 + 1, cacheByte, bytes, offset, size);
+ clearLeadBit();
+ }
+
+
+ /**
* Merge Kmer with the previous connected Kmer
* e.g. AACAACC merge with AAGCTAA, if the initial kmerSize = 3
* then it will return AAGCTAACAACC
@@ -383,7 +475,7 @@
* @param preKmer
* : the previous kmer
*/
- public void mergePreKmer(int initialKmerSize, KmerBytesWritable preKmer) {
+ public void mergeWithRRKmer(int initialKmerSize, KmerBytesWritable preKmer) {
int preKmerLength = kmerlength;
int preSize = size;
this.kmerlength += preKmer.kmerlength - initialKmerSize + 1;
@@ -417,7 +509,7 @@
buffer[position] = (byte) ((buffer[position] & mask) | ((0xff & onebyte) << shift));
if (position > start && shift != 0) {
- buffer[position - 1] = (byte) ((buffer[position - 1] & (0xff - mask)) | ((byte) ((0xff & onebyte) >> (8 - shift))));
+ buffer[position - 1] = (byte) ((buffer[position - 1] & (0xff - mask)) | ((byte) ((0xff & onebyte) >>> (8 - shift))));
}
}
@@ -427,7 +519,7 @@
throw new IllegalArgumentException("Buffer of kmer storage is invalid");
}
int shift = (k % 4) << 1;
- byte data = (byte) (((0xff) & buffer[position]) >> shift);
+ byte data = (byte) (((0xff) & buffer[position]) >>> shift);
if (shift != 0 && position > start) {
data |= 0xff & (buffer[position - 1] << (8 - shift));
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index 8955aab..8aab267 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -121,13 +121,13 @@
public void mergeForwardNext(NodeWritable nextNode, int initialKmerSize) {
this.forwardForwardList.set(nextNode.forwardForwardList);
this.forwardReverseList.set(nextNode.forwardReverseList);
- kmer.mergeNextKmer(initialKmerSize, nextNode.getKmer());
+ kmer.mergeWithFFKmer(initialKmerSize, nextNode.getKmer());
}
public void mergeForwardPre(NodeWritable preNode, int initialKmerSize) {
this.reverseForwardList.set(preNode.reverseForwardList);
this.reverseReverseList.set(preNode.reverseReverseList);
- kmer.mergePreKmer(initialKmerSize, preNode.getKmer());
+ kmer.mergeWithRRKmer(initialKmerSize, preNode.getKmer());
}
public void set(NodeWritable node) {
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
index 5a59a87..8438a44 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
@@ -127,12 +127,12 @@
Assert.assertEquals(text2, kmer2.toString());
KmerBytesWritable merge = new KmerBytesWritable(kmer1);
int kmerSize = 8;
- merge.mergeNextKmer(kmerSize, kmer2);
+ merge.mergeWithFFKmer(kmerSize, kmer2);
Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
for (int i = 1; i < 8; i++) {
merge.set(kmer1);
- merge.mergeNextKmer(i, kmer2);
+ merge.mergeWithFFKmer(i, kmer2);
Assert.assertEquals(text1 + text2.substring(i - 1), merge.toString());
}
@@ -148,7 +148,7 @@
Assert.assertEquals(text2, kmer2.toString());
for (int x = 1; x < jk; x++) {
merge.set(kmer1);
- merge.mergeNextKmer(x, kmer2);
+ merge.mergeWithFFKmer(x, kmer2);
Assert.assertEquals(text1 + text2.substring(x - 1), merge.toString());
}
}
@@ -168,12 +168,12 @@
Assert.assertEquals(text2, kmer2.toString());
KmerBytesWritable merge = new KmerBytesWritable(kmer2);
int kmerSize = 8;
- merge.mergePreKmer(kmerSize, kmer1);
+ merge.mergeWithRRKmer(kmerSize, kmer1);
Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
for (int i = 1; i < 8; i++) {
merge.set(kmer2);
- merge.mergePreKmer(i, kmer1);
+ merge.mergeWithRRKmer(i, kmer1);
Assert.assertEquals(text1.substring(0, text1.length() - i + 1) + text2, merge.toString());
}
@@ -189,7 +189,7 @@
Assert.assertEquals(text2, kmer2.toString());
for (int x = 1; x < ik; x++) {
merge.set(kmer2);
- merge.mergePreKmer(x, kmer1);
+ merge.mergeWithRRKmer(x, kmer1);
Assert.assertEquals(text1.substring(0, text1.length() - x + 1) + text2, merge.toString());
}
}