add getSubKmer function
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritableFactory.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritableFactory.java
index dfc0ee3..ab1e633 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritableFactory.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritableFactory.java
@@ -100,6 +100,35 @@
return kmer;
}
+ public VKmerBytesWritable getSubKmerFromChain(int startK, int kSize, final KmerBytesWritable kmerChain) {
+ if (startK + kSize > kmerChain.getKmerLength()) {
+ return null;
+ }
+ if (startK == 0 && kSize == kmerChain.getKmerLength()) {
+ kmer.set(kmerChain);
+ return kmer;
+ }
+ kmer.reset(kSize);
+
+ /** from end to start */
+ int byteInChain = kmerChain.getLength() - 1 - startK / 4;
+ int posInByteOfChain = startK % 4 << 1; // *2
+ int byteInKmer = kmer.getLength() - 1;
+ for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
+ kmer.getBytes()[byteInKmer] = (byte) ((0xff & kmerChain.getBytes()[byteInChain]) >> posInByteOfChain);
+ kmer.getBytes()[byteInKmer] |= ((kmerChain.getBytes()[byteInChain - 1] << (8 - posInByteOfChain)));
+ }
+
+ /** last kmer byte */
+ if (byteInKmer == 0) {
+ kmer.getBytes()[0] = (byte) ((kmerChain.getBytes()[0] & 0xff) >> posInByteOfChain);
+ }
+ if (kSize % 4 != 0) {
+ kmer.getBytes()[0] &= (1 << ((kSize % 4) << 1)) - 1;
+ }
+ return kmer;
+ }
+
/**
* Merge kmer with next neighbor in gene-code format.
* The k of new kmer will increase by 1
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/VKmerBytesWritableFactoryTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/VKmerBytesWritableFactoryTest.java
index c40729c..6611752 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/VKmerBytesWritableFactoryTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/VKmerBytesWritableFactoryTest.java
@@ -31,11 +31,50 @@
for (int i = 8; i > 0; i--) {
lastKmer = kmerFactory.getLastKmerFromChain(i, kmer);
Assert.assertEquals("AGCTGACCG".substring(9 - i), lastKmer.toString());
+ lastKmer = kmerFactory.getSubKmerFromChain(9-i, i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(9 - i), lastKmer.toString());
}
VKmerBytesWritable vlastKmer;
for (int i = 8; i > 0; i--) {
vlastKmer = kmerFactory.getLastKmerFromChain(i, kmer);
Assert.assertEquals("AGCTGACCG".substring(9 - i), vlastKmer.toString());
+ vlastKmer = kmerFactory.getSubKmerFromChain(9-i, i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(9 - i), vlastKmer.toString());
+ }
+ }
+
+ @Test
+ public void TestGetFirstKmer(){
+ KmerBytesWritable kmer = new KmerBytesWritable(9);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals("AGCTGACCG", kmer.toString());
+ KmerBytesWritable firstKmer;
+ for (int i = 8; i > 0; i--) {
+ firstKmer = kmerFactory.getFirstKmerFromChain(i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(0,i), firstKmer.toString());
+ firstKmer = kmerFactory.getSubKmerFromChain(0,i,kmer);
+ Assert.assertEquals("AGCTGACCG".substring(0,i), firstKmer.toString());
+ }
+ VKmerBytesWritable vfirstKmer;
+ for (int i = 8; i > 0; i--) {
+ vfirstKmer = kmerFactory.getFirstKmerFromChain(i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(0,i), vfirstKmer.toString());
+ vfirstKmer = kmerFactory.getSubKmerFromChain(0, i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(0,i), vfirstKmer.toString());
+ }
+ }
+
+ @Test
+ public void TestGetSubKmer(){
+ KmerBytesWritable kmer = new KmerBytesWritable(9);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals("AGCTGACCG", kmer.toString());
+ VKmerBytesWritable subKmer;
+ for (int istart = 0; istart < kmer.getKmerLength()-1; istart++) {
+ for(int isize = 1; isize + istart <= kmer.getKmerLength(); isize ++){
+ subKmer = kmerFactory.getSubKmerFromChain(istart, isize, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(istart, istart+isize), subKmer.toString());
+ }
}
}