finished merge pre and merge next test
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
index 40d8a23..c84658a 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
@@ -303,11 +303,8 @@
* The k of new kmer will increase by 1
* e.g. AAGCT merge with A => AAGCTA
*
- * @param k
- * :input k of kmer
* @param nextCode
* : next neighbor in gene-code format
- * @return the merged Kmer, this K of this Kmer is k+1
*/
public void mergeNextCode(byte nextCode) {
this.kmerlength += 1;
@@ -322,11 +319,6 @@
}
clearLeadBit();
}
-
- public void mergePreCode(byte preCode) {
- //TODO
- return;
- }
/**
* Merge Kmer with the next connected Kmer
@@ -337,41 +329,70 @@
* : the initial kmerSize
* @param kmer
*/
- @SuppressWarnings("unchecked")
public void mergeNextKmer(int initialKmerSize, KmerBytesWritable kmer) {
- if (kmer.getKmerLength() == initialKmerSize){
- mergeNextCode(kmer.getGeneCodeAtPosition(kmer.getKmerLength()-1));
- return;
- }
int preKmerLength = kmerlength;
int preSize = size;
this.kmerlength += kmer.kmerlength - initialKmerSize + 1;
setSize(KmerUtil.getByteNumFromK(kmerlength));
- int i = 1;
- for (; i <= preSize; i++) {
+ for (int i = 1; i <= preSize; i++) {
bytes[offset + size - i] = bytes[offset + preSize - i];
}
- if (i > 1) {
- i--;
- }
- if (preKmerLength % 4 == 0) {
- for (int j = 1; j <= kmer.getLength() && offset + size >= i + j; j++) {
- bytes[offset + size - i - j] = kmer.getBytes()[kmer.getOffset() + kmer.getLength() - j];
- }
- } else {
- int posNeedToMove = ((preKmerLength % 4) << 1);
- bytes[offset + size - i] |= kmer.getBytes()[kmer.getOffset() + kmer.getLength() - 1] << posNeedToMove;
- for (int j = 1; j <= kmer.getLength() && offset + size - i - j >= 0; j++) {
- bytes[offset + size - i - j] = (byte) (((kmer.getBytes()[kmer.getOffset() + kmer.getLength() - j] & 0xff) >> (8 - posNeedToMove)) | (kmer
- .getBytes()[kmer.getOffset() + kmer.getLength() - j - 1] << posNeedToMove));
- }
+ for (int k = initialKmerSize - 1; k < kmer.getKmerLength(); k += 4) {
+ byte onebyte = getOneByteFromKmerAtPosition(k, kmer.getBytes(), kmer.getOffset(), kmer.getLength());
+ appendOneByteAtPosition(preKmerLength + k - initialKmerSize + 1, onebyte, bytes, offset, size);
}
clearLeadBit();
}
-
- public void mergePreKmer(int initialKmerSize, KmerBytesWritable kmer){
- //TODO
- return;
+
+ public void mergePreKmer(int initialKmerSize, KmerBytesWritable preKmer) {
+ int preKmerLength = kmerlength;
+ int preSize = size;
+ this.kmerlength += preKmer.kmerlength - initialKmerSize + 1;
+ setSize(KmerUtil.getByteNumFromK(kmerlength));
+ byte cacheByte = getOneByteFromKmerAtPosition(0, bytes, offset, preSize);
+
+ // copy prekmer
+ for (int k = 0; k < preKmer.kmerlength - initialKmerSize + 1; k += 4) {
+ byte onebyte = getOneByteFromKmerAtPosition(k, preKmer.bytes, preKmer.offset, preKmer.size);
+ appendOneByteAtPosition(k, onebyte, bytes, offset, size);
+ }
+
+ // copy current kmer
+ int k = 4;
+ for (; k < preKmerLength; k += 4) {
+ byte onebyte = getOneByteFromKmerAtPosition(k, bytes, offset, preSize);
+ appendOneByteAtPosition(preKmer.kmerlength - initialKmerSize + k - 4 + 1, cacheByte, bytes, offset, size);
+ cacheByte = onebyte;
+ }
+ appendOneByteAtPosition(preKmer.kmerlength - initialKmerSize + k - 4 + 1, cacheByte, bytes, offset, size);
+ clearLeadBit();
+ }
+
+ public static void appendOneByteAtPosition(int k, byte onebyte, byte[] buffer, int start, int length) {
+ int position = start + length - 1 - k / 4;
+ if (position < start) {
+ throw new IllegalArgumentException("Buffer for kmer storage is invalid");
+ }
+ int shift = ((k) % 4) << 1;
+ int mask = shift == 0 ? 0 : ((1 << shift) - 1);
+
+ buffer[position] = (byte) ((buffer[position] & mask) | ((0xff & onebyte) << shift));
+ if (position > start && shift != 0) {
+ buffer[position - 1] = (byte) ((buffer[position - 1] & (0xff - mask)) | ((byte) ((0xff & onebyte) >> (8 - shift))));
+ }
+ }
+
+ public static byte getOneByteFromKmerAtPosition(int k, byte[] buffer, int start, int length) {
+ int position = start + length - 1 - k / 4;
+ if (position < start) {
+ throw new IllegalArgumentException("Buffer for kmer storage is invalid");
+ }
+ int shift = (k % 4) << 1;
+ byte data = (byte) (((0xff) & buffer[position]) >> shift);
+ if (shift != 0 && position > start) {
+ data |= 0xff & (buffer[position - 1] << (8 - shift));
+ }
+ return data;
}
protected void clearLeadBit() {
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
index 04fc3bb..9480aa7 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
@@ -106,29 +106,111 @@
}
}
}
-
+
@Test
- public void TestMergeNextKmer(){
+ public void TestGetOneByteFromKmer() {
byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
- KmerBytesWritable kmer1 = new KmerBytesWritable(9);
+ String string = "AGCTGACCGT";
+ for (int k = 3; k <= 10; k++) {
+ KmerBytesWritable kmer = new KmerBytesWritable(k);
+ KmerBytesWritable kmerAppend = new KmerBytesWritable(k);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(string.substring(0, k), kmer.toString());
+ for (int b = 0; b < k; b++) {
+ byte byteActual = KmerBytesWritable.getOneByteFromKmerAtPosition(b, kmer.getBytes(), kmer.getOffset(),
+ kmer.getLength());
+ byte byteExpect = GeneCode.getCodeFromSymbol(array[b]);
+ for (int i = 1; i < 4 && b + i < k; i++) {
+ byteExpect += GeneCode.getCodeFromSymbol(array[b + i]) << (i * 2);
+ }
+ Assert.assertEquals(byteActual, byteExpect);
+ KmerBytesWritable.appendOneByteAtPosition(b, byteActual, kmerAppend.getBytes(), kmerAppend.getOffset(),
+ kmerAppend.getLength());
+ }
+ Assert.assertEquals(kmer.toString(), kmerAppend.toString());
+ }
+ }
+
+ @Test
+ public void TestMergeNextKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String text = "AGCTGACCGT";
+ KmerBytesWritable kmer1 = new KmerBytesWritable(8);
kmer1.setByRead(array, 0);
- String text1 = "AGCTGACCG";
- KmerBytesWritable kmer2 = new KmerBytesWritable(9);
+ String text1 = "AGCTGACC";
+ KmerBytesWritable kmer2 = new KmerBytesWritable(8);
kmer2.setByRead(array, 1);
- String text2 = "GCTGACCGT";
- Assert.assertEquals(text1, kmer1.toString());
+ String text2 = "GCTGACCG";
Assert.assertEquals(text2, kmer2.toString());
+ KmerBytesWritable merge = new KmerBytesWritable(kmer1);
+ int kmerSize = 8;
+ merge.mergeNextKmer(kmerSize, kmer2);
+ Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
- KmerBytesWritable merged = new KmerBytesWritable(kmer1);
- merged.mergeNextKmer(9, kmer2);
- Assert.assertEquals("AGCTGACCGT", merged);
+ for (int i = 1; i < 8; i++) {
+ merge.set(kmer1);
+ merge.mergeNextKmer(i, kmer2);
+ Assert.assertEquals(text1 + text2.substring(i - 1), merge.toString());
+ }
- KmerBytesWritable kmer3 = new KmerBytesWritable(3);
- kmer3.setByRead(array, 1);
- String text3 = "GCT";
- Assert.assertEquals(text3, kmer3.toString());
- merged.mergeNextKmer(1, kmer3);
- Assert.assertEquals(text1 + text3, merged.toString());
+ for (int ik = 1; ik <= 10; ik++) {
+ for (int jk = 1; jk <= 10; jk++) {
+ kmer1 = new KmerBytesWritable(ik);
+ kmer2 = new KmerBytesWritable(jk);
+ kmer1.setByRead(array, 0);
+ kmer2.setByRead(array, 0);
+ text1 = text.substring(0, ik);
+ text2 = text.substring(0, jk);
+ Assert.assertEquals(text1, kmer1.toString());
+ Assert.assertEquals(text2, kmer2.toString());
+ for (int x = 1; x < jk; x++) {
+ merge.set(kmer1);
+ merge.mergeNextKmer(x, kmer2);
+ Assert.assertEquals(text1 + text2.substring(x - 1), merge.toString());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void TestMergePreKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String text = "AGCTGACCGT";
+ KmerBytesWritable kmer1 = new KmerBytesWritable(8);
+ kmer1.setByRead(array, 0);
+ String text1 = "AGCTGACC";
+ KmerBytesWritable kmer2 = new KmerBytesWritable(8);
+ kmer2.setByRead(array, 1);
+ String text2 = "GCTGACCG";
+ Assert.assertEquals(text2, kmer2.toString());
+ KmerBytesWritable merge = new KmerBytesWritable(kmer2);
+ int kmerSize = 8;
+ merge.mergePreKmer(kmerSize, kmer1);
+ Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
+
+ for (int i = 1; i < 8; i++) {
+ merge.set(kmer2);
+ merge.mergePreKmer(i, kmer1);
+ Assert.assertEquals(text1.substring(0, text1.length() - i + 1) + text2, merge.toString());
+ }
+
+ for (int ik = 1; ik <= 10; ik++) {
+ for (int jk = 1; jk <= 10; jk++) {
+ kmer1 = new KmerBytesWritable(ik);
+ kmer2 = new KmerBytesWritable(jk);
+ kmer1.setByRead(array, 0);
+ kmer2.setByRead(array, 0);
+ text1 = text.substring(0, ik);
+ text2 = text.substring(0, jk);
+ Assert.assertEquals(text1, kmer1.toString());
+ Assert.assertEquals(text2, kmer2.toString());
+ for (int x = 1; x < ik; x++) {
+ merge.set(kmer2);
+ merge.mergePreKmer(x, kmer1);
+ Assert.assertEquals(text1.substring(0, text1.length() - x + 1) + text2, merge.toString());
+ }
+ }
+ }
}
}