make genomix-hadoop genomix-hyracks compilable
diff --git a/genomix/genomix-data/src/main/assembly/binary-assembly.xml b/genomix/genomix-data/src/main/assembly/binary-assembly.xml
index 0500499..68d424a 100644
--- a/genomix/genomix-data/src/main/assembly/binary-assembly.xml
+++ b/genomix/genomix-data/src/main/assembly/binary-assembly.xml
@@ -1,19 +1,19 @@
<assembly>
- <id>binary-assembly</id>
- <formats>
- <format>zip</format>
- <format>dir</format>
- </formats>
- <includeBaseDirectory>false</includeBaseDirectory>
- <fileSets>
- <fileSet>
- <directory>target/appassembler/bin</directory>
- <outputDirectory>bin</outputDirectory>
- <fileMode>0755</fileMode>
- </fileSet>
- <fileSet>
- <directory>target/appassembler/lib</directory>
- <outputDirectory>lib</outputDirectory>
- </fileSet>
- </fileSets>
+ <id>binary-assembly</id>
+ <formats>
+ <format>zip</format>
+ <format>dir</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+ <fileSets>
+ <fileSet>
+ <directory>target/appassembler/bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>target/appassembler/lib</directory>
+ <outputDirectory>lib</outputDirectory>
+ </fileSet>
+ </fileSets>
</assembly>
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
index ef5a62b..1ed6f80 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
@@ -1,126 +1,126 @@
package edu.uci.ics.genomix.type;
public class GeneCode {
- public final static byte[] GENE_SYMBOL = { 'A', 'C', 'G', 'T' };
- /**
- * make sure this 4 ids equal to the sequence id of char in {@GENE_SYMBOL
+ public final static byte[] GENE_SYMBOL = { 'A', 'C', 'G', 'T' };
+ /**
+ * make sure this 4 ids equal to the sequence id of char in {@GENE_SYMBOL
* }
- */
- public static final byte A = 0;
- public static final byte C = 1;
- public static final byte G = 2;
- public static final byte T = 3;
+ */
+ public static final byte A = 0;
+ public static final byte C = 1;
+ public static final byte G = 2;
+ public static final byte T = 3;
- public static byte getCodeFromSymbol(byte ch) {
- byte r = 0;
- switch (ch) {
- case 'A':
- case 'a':
- r = A;
- break;
- case 'C':
- case 'c':
- r = C;
- break;
- case 'G':
- case 'g':
- r = G;
- break;
- case 'T':
- case 't':
- r = T;
- break;
- }
- return r;
- }
+ public static byte getCodeFromSymbol(byte ch) {
+ byte r = 0;
+ switch (ch) {
+ case 'A':
+ case 'a':
+ r = A;
+ break;
+ case 'C':
+ case 'c':
+ r = C;
+ break;
+ case 'G':
+ case 'g':
+ r = G;
+ break;
+ case 'T':
+ case 't':
+ r = T;
+ break;
+ }
+ return r;
+ }
- public static byte getSymbolFromCode(byte code) {
- if (code > 3) {
- return '!';
- }
- return GENE_SYMBOL[code];
- }
+ public static byte getSymbolFromCode(byte code) {
+ if (code > 3) {
+ return '!';
+ }
+ return GENE_SYMBOL[code];
+ }
- public static byte getAdjBit(byte t) {
- byte r = 0;
- switch (t) {
- case 'A':
- case 'a':
- r = 1 << A;
- break;
- case 'C':
- case 'c':
- r = 1 << C;
- break;
- case 'G':
- case 'g':
- r = 1 << G;
- break;
- case 'T':
- case 't':
- r = 1 << T;
- break;
- }
- return r;
- }
+ public static byte getAdjBit(byte t) {
+ byte r = 0;
+ switch (t) {
+ case 'A':
+ case 'a':
+ r = 1 << A;
+ break;
+ case 'C':
+ case 'c':
+ r = 1 << C;
+ break;
+ case 'G':
+ case 'g':
+ r = 1 << G;
+ break;
+ case 'T':
+ case 't':
+ r = 1 << T;
+ break;
+ }
+ return r;
+ }
- /**
- * It works for path merge. Merge the kmer by his next, we need to make sure
- * the @{t} is a single neighbor.
- *
- * @param t
- * the neighbor code in BitMap
- * @return the genecode
- */
- public static byte getGeneCodeFromBitMap(byte t) {
- switch (t) {
- case 1 << A:
- return A;
- case 1 << C:
- return C;
- case 1 << G:
- return G;
- case 1 << T:
- return T;
- }
- return -1;
- }
-
- public static int countNumberOfBitSet(int i) {
- int c = 0;
- for (; i != 0; c++) {
- i &= i - 1;
- }
- return c;
- }
-
- public static int inDegree(byte bitmap) {
- return countNumberOfBitSet((bitmap >> 4) & 0x0f);
- }
+ /**
+ * It works for path merge. Merge the kmer by his next, we need to make sure
+ * the @{t} is a single neighbor.
+ *
+ * @param t
+ * the neighbor code in BitMap
+ * @return the genecode
+ */
+ public static byte getGeneCodeFromBitMap(byte t) {
+ switch (t) {
+ case 1 << A:
+ return A;
+ case 1 << C:
+ return C;
+ case 1 << G:
+ return G;
+ case 1 << T:
+ return T;
+ }
+ return -1;
+ }
- public static int outDegree(byte bitmap) {
- return countNumberOfBitSet(bitmap & 0x0f);
- }
+ public static int countNumberOfBitSet(int i) {
+ int c = 0;
+ for (; i != 0; c++) {
+ i &= i - 1;
+ }
+ return c;
+ }
- public static byte mergePreNextAdj(byte pre, byte next) {
- return (byte) (pre << 4 | (next & 0x0f));
- }
+ public static int inDegree(byte bitmap) {
+ return countNumberOfBitSet((bitmap >> 4) & 0x0f);
+ }
- public static String getSymbolFromBitMap(byte code) {
- int left = (code >> 4) & 0x0F;
- int right = code & 0x0F;
- StringBuilder str = new StringBuilder();
- for (int i = A; i <= T; i++) {
- if ((left & (1 << i)) != 0) {
- str.append((char) GENE_SYMBOL[i]);
- }
- }
- str.append('|');
- for (int i = A; i <= T; i++) {
- if ((right & (1 << i)) != 0) {
- str.append((char) GENE_SYMBOL[i]);
- }
- }
- return str.toString();
- }
+ public static int outDegree(byte bitmap) {
+ return countNumberOfBitSet(bitmap & 0x0f);
+ }
+
+ public static byte mergePreNextAdj(byte pre, byte next) {
+ return (byte) (pre << 4 | (next & 0x0f));
+ }
+
+ public static String getSymbolFromBitMap(byte code) {
+ int left = (code >> 4) & 0x0F;
+ int right = code & 0x0F;
+ StringBuilder str = new StringBuilder();
+ for (int i = A; i <= T; i++) {
+ if ((left & (1 << i)) != 0) {
+ str.append((char) GENE_SYMBOL[i]);
+ }
+ }
+ str.append('|');
+ for (int i = A; i <= T; i++) {
+ if ((right & (1 << i)) != 0) {
+ str.append((char) GENE_SYMBOL[i]);
+ }
+ }
+ return str.toString();
+ }
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
index e8d3e67..ded9f36 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
@@ -26,225 +26,228 @@
* It was used to generate the graph in which phase the kmer length doesn't change.
* Thus the size of bytes doesn't change either.
*/
-public class KmerBytesWritable extends BinaryComparable implements
- WritableComparable<BinaryComparable> {
- protected int size;
- protected byte[] bytes;
- protected int kmerlength;
+public class KmerBytesWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
+ protected int size;
+ protected byte[] bytes;
+ protected int kmerlength;
- /**
- * Initial Kmer space by kmerlength
- *
- * @param k
- * kmerlength
- */
- public KmerBytesWritable(int k) {
- this.kmerlength = k;
- this.size = KmerUtil.getByteNumFromK(kmerlength);
- this.bytes = new byte[this.size];
- }
+ /**
+ * Initial Kmer space by kmerlength
+ *
+ * @param k
+ * kmerlength
+ */
+ public KmerBytesWritable(int k) {
+ this.kmerlength = k;
+ this.size = KmerUtil.getByteNumFromK(kmerlength);
+ this.bytes = new byte[this.size];
+ }
- public KmerBytesWritable(KmerBytesWritable right) {
- this.kmerlength = right.kmerlength;
- this.size = right.size;
- this.bytes = new byte[right.size];
- set(right);
- }
+ public KmerBytesWritable(KmerBytesWritable right) {
+ this.kmerlength = right.kmerlength;
+ this.size = right.size;
+ this.bytes = new byte[right.size];
+ set(right);
+ }
- public byte getGeneCodeAtPosition(int pos) {
- if (pos >= kmerlength) {
- return -1;
- }
- int posByte = pos / 4;
- int shift = (pos % 4) << 1;
- return (byte) ((bytes[size - 1 - posByte] >> shift) & 0x3);
- }
+ public byte getGeneCodeAtPosition(int pos) {
+ if (pos >= kmerlength) {
+ return -1;
+ }
+ int posByte = pos / 4;
+ int shift = (pos % 4) << 1;
+ return (byte) ((bytes[size - 1 - posByte] >> shift) & 0x3);
+ }
- public int getKmerLength() {
- return this.kmerlength;
- }
+ public int getKmerLength() {
+ return this.kmerlength;
+ }
- @Override
- public byte[] getBytes() {
- return bytes;
- }
+ @Override
+ public byte[] getBytes() {
+ return bytes;
+ }
- @Override
- public int getLength() {
- return size;
- }
+ @Override
+ public int getLength() {
+ return size;
+ }
- /**
- * Read Kmer from read text into bytes array e.g. AATAG will compress as
- * [0x000G, 0xATAA]
- *
- * @param k
- * @param array
- * @param start
- */
- public void setByRead(byte[] array, int start) {
- byte l = 0;
- int bytecount = 0;
- int bcount = this.size - 1;
- for (int i = start; i < start + kmerlength; i++) {
- byte code = GeneCode.getCodeFromSymbol(array[i]);
- l |= (byte) (code << bytecount);
- bytecount += 2;
- if (bytecount == 8) {
- bytes[bcount--] = l;
- l = 0;
- bytecount = 0;
- }
- }
- if (bcount >= 0) {
- bytes[0] = l;
- }
- }
+ /**
+ * Read Kmer from read text into bytes array e.g. AATAG will compress as
+ * [0x000G, 0xATAA]
+ *
+ * @param k
+ * @param array
+ * @param start
+ */
+ public void setByRead(byte[] array, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = this.size - 1;
+ for (int i = start; i < start + kmerlength; i++) {
+ byte code = GeneCode.getCodeFromSymbol(array[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[0] = l;
+ }
+ }
- /**
- * Compress Reversed Kmer into bytes array AATAG will compress as
- * [0x000A,0xATAG]
- *
- * @param input
- * array
- * @param start
- * position
- */
- public void setByReadReverse(byte[] array, int start) {
- byte l = 0;
- int bytecount = 0;
- int bcount = size - 1;
- for (int i = start + kmerlength - 1; i >= 0; i--) {
- byte code = GeneCode.getCodeFromSymbol(array[i]);
- l |= (byte) (code << bytecount);
- bytecount += 2;
- if (bytecount == 8) {
- bytes[bcount--] = l;
- l = 0;
- bytecount = 0;
- }
- }
- if (bcount >= 0) {
- bytes[0] = l;
- }
- }
+ /**
+ * Compress Reversed Kmer into bytes array AATAG will compress as
+ * [0x000A,0xATAG]
+ *
+ * @param input
+ * array
+ * @param start
+ * position
+ */
+ public void setByReadReverse(byte[] array, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = size - 1;
+ for (int i = start + kmerlength - 1; i >= 0; i--) {
+ byte code = GeneCode.getCodeFromSymbol(array[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[0] = l;
+ }
+ }
- /**
- * Shift Kmer to accept new char input
- *
- * @param c
- * Input new gene character
- * @return the shift out gene, in gene code format
- */
- public byte shiftKmerWithNextChar(byte c) {
- return shiftKmerWithNextCode(GeneCode.getCodeFromSymbol(c));
- }
+ /**
+ * Shift Kmer to accept new char input
+ *
+ * @param c
+ * Input new gene character
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextChar(byte c) {
+ return shiftKmerWithNextCode(GeneCode.getCodeFromSymbol(c));
+ }
- /**
- * Shift Kmer to accept new gene code
- *
- * @param c
- * Input new gene code
- * @return the shift out gene, in gene code format
- */
- public byte shiftKmerWithNextCode(byte c) {
- byte output = (byte) (bytes[size - 1] & 0x03);
- for (int i = size - 1; i > 0; i--) {
- byte in = (byte) (bytes[i - 1] & 0x03);
- bytes[i] = (byte) (((bytes[i] >>> 2) & 0x3f) | (in << 6));
- }
- int pos = ((kmerlength - 1) % 4) << 1;
- byte code = (byte) (c << pos);
- bytes[0] = (byte) (((bytes[0] >>> 2) & 0x3f) | code);
- return (byte) (1 << output);
- }
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextCode(byte c) {
+ byte output = (byte) (bytes[size - 1] & 0x03);
+ for (int i = size - 1; i > 0; i--) {
+ byte in = (byte) (bytes[i - 1] & 0x03);
+ bytes[i] = (byte) (((bytes[i] >>> 2) & 0x3f) | (in << 6));
+ }
+ int pos = ((kmerlength - 1) % 4) << 1;
+ byte code = (byte) (c << pos);
+ bytes[0] = (byte) (((bytes[0] >>> 2) & 0x3f) | code);
+ return (byte) (1 << output);
+ }
- /**
- * Shift Kmer to accept new input char
- *
- * @param c
- * Input new gene character
- * @return the shiftout gene, in gene code format
- */
- public byte shiftKmerWithPreChar(byte c) {
- return shiftKmerWithPreCode(GeneCode.getCodeFromSymbol(c));
- }
+ /**
+ * Shift Kmer to accept new input char
+ *
+ * @param c
+ * Input new gene character
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreChar(byte c) {
+ return shiftKmerWithPreCode(GeneCode.getCodeFromSymbol(c));
+ }
- /**
- * Shift Kmer to accept new gene code
- *
- * @param c
- * Input new gene code
- * @return the shiftout gene, in gene code format
- */
- public byte shiftKmerWithPreCode(byte c) {
- int pos = ((kmerlength - 1) % 4) << 1;
- byte output = (byte) ((bytes[0] >> pos) & 0x03);
- for (int i = 0; i < size - 1; i++) {
- byte in = (byte) ((bytes[i + 1] >> 6) & 0x03);
- bytes[i] = (byte) ((bytes[i] << 2) | in);
- }
- // (k%4) * 2
- if (kmerlength % 4 != 0) {
- bytes[0] &= (1 << ((kmerlength % 4) << 1)) - 1;
- }
- bytes[size - 1] = (byte) ((bytes[size - 1] << 2) | c);
- return (byte) (1 << output);
- }
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreCode(byte c) {
+ int pos = ((kmerlength - 1) % 4) << 1;
+ byte output = (byte) ((bytes[0] >> pos) & 0x03);
+ for (int i = 0; i < size - 1; i++) {
+ byte in = (byte) ((bytes[i + 1] >> 6) & 0x03);
+ bytes[i] = (byte) ((bytes[i] << 2) | in);
+ }
+ // (k%4) * 2
+ if (kmerlength % 4 != 0) {
+ bytes[0] &= (1 << ((kmerlength % 4) << 1)) - 1;
+ }
+ bytes[size - 1] = (byte) ((bytes[size - 1] << 2) | c);
+ return (byte) (1 << output);
+ }
- public void set(KmerBytesWritable newData) {
- set(newData.bytes, 0, newData.size);
- }
+ public void set(KmerBytesWritable newData) {
+ set(newData.bytes, 0, newData.size);
+ }
- public void set(byte[] newData, int offset, int length) {
- System.arraycopy(newData, offset, bytes, 0, size);
- }
+ public void set(byte[] newData, int offset, int length) {
+ System.arraycopy(newData, offset, bytes, 0, size);
+ }
- /**
- * Don't read the kmerlength from datastream,
- * Read it from configuration
- */
- @Override
- public void readFields(DataInput in) throws IOException {
- in.readFully(bytes, 0, size);
- }
+ /**
+ * Don't read the kmerlength from datastream,
+ * Read it from configuration
+ */
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ this.kmerlength = in.readInt();
+ this.size = KmerUtil.getByteNumFromK(kmerlength);
+ if ( this.bytes.length < this.size){
+ this.bytes = new byte[this.size];
+ }
+ in.readFully(bytes, 0, size);
+ }
- @Override
- public void write(DataOutput out) throws IOException {
- out.write(bytes, 0, size);
- }
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(kmerlength);
+ out.write(bytes, 0, size);
+ }
- @Override
- public int hashCode() {
- return super.hashCode() * this.kmerlength;
- }
+ @Override
+ public int hashCode() {
+ return super.hashCode() * this.kmerlength;
+ }
- @Override
- public boolean equals(Object right_obj) {
- if (right_obj instanceof KmerBytesWritable)
- return this.kmerlength == ((KmerBytesWritable) right_obj).kmerlength
- && super.equals(right_obj);
- return false;
- }
+ @Override
+ public boolean equals(Object right_obj) {
+ if (right_obj instanceof KmerBytesWritable)
+ return this.kmerlength == ((KmerBytesWritable) right_obj).kmerlength && super.equals(right_obj);
+ return false;
+ }
- @Override
- public String toString() {
- return KmerUtil.recoverKmerFrom(this.kmerlength, this.getBytes(), 0,
- this.getLength());
- }
+ @Override
+ public String toString() {
+ return KmerUtil.recoverKmerFrom(this.kmerlength, this.getBytes(), 0, this.getLength());
+ }
- public static class Comparator extends WritableComparator {
- public Comparator() {
- super(KmerBytesWritable.class);
- }
+ public static class Comparator extends WritableComparator {
+ public Comparator() {
+ super(KmerBytesWritable.class);
+ }
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- return compareBytes(b1, s1, l1, b2, s2, l2);
- }
- }
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ return compareBytes(b1, s1, l1, b2, s2, l2);
+ }
+ }
- static { // register this comparator
- WritableComparator.define(KmerBytesWritable.class, new Comparator());
- }
+ static { // register this comparator
+ WritableComparator.define(KmerBytesWritable.class, new Comparator());
+ }
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerCountValue.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerCountValue.java
index 60ad5a3..fab7001 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerCountValue.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerCountValue.java
@@ -21,50 +21,49 @@
import org.apache.hadoop.io.Writable;
public class KmerCountValue implements Writable {
- private byte adjBitMap;
- private byte count;
+ private byte adjBitMap;
+ private byte count;
- public KmerCountValue(byte bitmap, byte count) {
- set(bitmap, count);
- }
+ public KmerCountValue(byte bitmap, byte count) {
+ set(bitmap, count);
+ }
- public KmerCountValue() {
- adjBitMap = 0;
- count = 0;
- }
+ public KmerCountValue() {
+ adjBitMap = 0;
+ count = 0;
+ }
- @Override
- public void readFields(DataInput arg0) throws IOException {
- adjBitMap = arg0.readByte();
- count = arg0.readByte();
- }
+ @Override
+ public void readFields(DataInput arg0) throws IOException {
+ adjBitMap = arg0.readByte();
+ count = arg0.readByte();
+ }
- @Override
- public void write(DataOutput arg0) throws IOException {
- arg0.writeByte(adjBitMap);
- arg0.writeByte(count);
- }
+ @Override
+ public void write(DataOutput arg0) throws IOException {
+ arg0.writeByte(adjBitMap);
+ arg0.writeByte(count);
+ }
- @Override
- public String toString() {
- return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t'
- + String.valueOf(count);
- }
+ @Override
+ public String toString() {
+ return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(count);
+ }
- public void set(byte bitmap, byte count) {
- this.adjBitMap = bitmap;
- this.count = count;
- }
+ public void set(byte bitmap, byte count) {
+ this.adjBitMap = bitmap;
+ this.count = count;
+ }
- public byte getAdjBitMap() {
- return adjBitMap;
- }
+ public byte getAdjBitMap() {
+ return adjBitMap;
+ }
- public void setAdjBitMap(byte adjBitMap) {
- this.adjBitMap = adjBitMap;
- }
+ public void setAdjBitMap(byte adjBitMap) {
+ this.adjBitMap = adjBitMap;
+ }
- public byte getCount() {
- return count;
- }
+ public byte getCount() {
+ return count;
+ }
}
\ No newline at end of file
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java
index 82fe1a1..9dc1dde 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java
@@ -1,33 +1,31 @@
package edu.uci.ics.genomix.type;
-
public class KmerUtil {
- public static int getByteNumFromK(int k) {
- int x = k / 4;
- if (k % 4 != 0) {
- x += 1;
- }
- return x;
- }
-
- public static byte reverseKmerByte(byte k) {
- int x = (((k >> 2) & 0x33) | ((k << 2) & 0xcc));
- return (byte) (((x >> 4) & 0x0f) | ((x << 4) & 0xf0));
- }
-
- public static String recoverKmerFrom(int k, byte[] keyData, int keyStart,
- int keyLength) {
- StringBuilder strKmer = new StringBuilder();
- int byteId = keyStart + keyLength - 1;
- byte currentbyte = keyData[byteId];
- for (int geneCount = 0; geneCount < k; geneCount++) {
- if (geneCount % 4 == 0 && geneCount > 0) {
- currentbyte = keyData[--byteId];
- }
- strKmer.append((char) GeneCode.GENE_SYMBOL[(currentbyte >> ((geneCount % 4) * 2)) & 0x03]);
- }
- return strKmer.toString();
- }
-
+ public static int getByteNumFromK(int k) {
+ int x = k / 4;
+ if (k % 4 != 0) {
+ x += 1;
+ }
+ return x;
+ }
+
+ public static byte reverseKmerByte(byte k) {
+ int x = (((k >> 2) & 0x33) | ((k << 2) & 0xcc));
+ return (byte) (((x >> 4) & 0x0f) | ((x << 4) & 0xf0));
+ }
+
+ public static String recoverKmerFrom(int k, byte[] keyData, int keyStart, int keyLength) {
+ StringBuilder strKmer = new StringBuilder();
+ int byteId = keyStart + keyLength - 1;
+ byte currentbyte = keyData[byteId];
+ for (int geneCount = 0; geneCount < k; geneCount++) {
+ if (geneCount % 4 == 0 && geneCount > 0) {
+ currentbyte = keyData[--byteId];
+ }
+ strKmer.append((char) GeneCode.GENE_SYMBOL[(currentbyte >> ((geneCount % 4) * 2)) & 0x03]);
+ }
+ return strKmer.toString();
+ }
+
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
index 67de889..b2af5ee 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
@@ -6,119 +6,110 @@
import org.apache.hadoop.io.WritableComparator;
-public class VKmerBytesWritable extends KmerBytesWritable{
+public class VKmerBytesWritable extends KmerBytesWritable {
+ public static final int DEFAULT_KMER_LENGTH = 21;
+
+ public VKmerBytesWritable(){
+ this(DEFAULT_KMER_LENGTH);
+ }
+
+ public VKmerBytesWritable(int k) {
+ super(k);
+ }
- public VKmerBytesWritable(int k) {
- super(k);
- }
-
- public VKmerBytesWritable(KmerBytesWritable other){
- super(other);
- }
+ public VKmerBytesWritable(KmerBytesWritable other) {
+ super(other);
+ }
- public void setSize(int size) {
- if (size > getCapacity()) {
- setCapacity( (size * 3 / 2));
- }
- this.size = size;
- }
+ public void setSize(int size) {
+ if (size > getCapacity()) {
+ setCapacity((size * 3 / 2));
+ }
+ this.size = size;
+ }
- public int getCapacity() {
- return bytes.length;
- }
+ public int getCapacity() {
+ return bytes.length;
+ }
- public void setCapacity(int new_cap) {
- if (new_cap != getCapacity()) {
- byte[] new_data = new byte[new_cap];
- if (new_cap < size) {
- size = new_cap;
- }
- if (size != 0) {
- System.arraycopy(bytes, 0, new_data, 0, size);
- }
- bytes = new_data;
- }
- }
-
- /**
- * Read Kmer from read text into bytes array e.g. AATAG will compress as
- * [0x000G, 0xATAA]
- *
- * @param k
- * @param array
- * @param start
- */
- public void setByRead(int k, byte[] array, int start) {
- reset(k);
- super.setByRead(array, start);
- }
+ public void setCapacity(int new_cap) {
+ if (new_cap != getCapacity()) {
+ byte[] new_data = new byte[new_cap];
+ if (new_cap < size) {
+ size = new_cap;
+ }
+ if (size != 0) {
+ System.arraycopy(bytes, 0, new_data, 0, size);
+ }
+ bytes = new_data;
+ }
+ }
- /**
- * Compress Reversed Kmer into bytes array AATAG will compress as
- * [0x000A,0xATAG]
- *
- * @param input
- * array
- * @param start
- * position
- */
- public void setByReadReverse(int k, byte[] array, int start) {
- reset(k);
- super.setByReadReverse(array, start);
- }
+ /**
+ * Read Kmer from read text into bytes array e.g. AATAG will compress as
+ * [0x000G, 0xATAA]
+ *
+ * @param k
+ * @param array
+ * @param start
+ */
+ public void setByRead(int k, byte[] array, int start) {
+ reset(k);
+ super.setByRead(array, start);
+ }
- public void set(KmerBytesWritable newData) {
- set(newData.kmerlength, newData.bytes, 0, newData.size);
- }
+ /**
+ * Compress Reversed Kmer into bytes array AATAG will compress as
+ * [0x000A,0xATAG]
+ *
+ * @param input
+ * array
+ * @param start
+ * position
+ */
+ public void setByReadReverse(int k, byte[] array, int start) {
+ reset(k);
+ super.setByReadReverse(array, start);
+ }
- public void set(int k, byte[] newData, int offset, int length) {
- reset(k);
- System.arraycopy(newData, offset, bytes, 0, size);
- }
+ public void set(KmerBytesWritable newData) {
+ set(newData.kmerlength, newData.bytes, 0, newData.size);
+ }
- /**
- * Reset array by kmerlength
- * @param k
- */
- public void reset(int k) {
- this.kmerlength = k;
- setSize( 0);
- setSize( KmerUtil.getByteNumFromK(k));
- }
+ public void set(int k, byte[] newData, int offset, int length) {
+ reset(k);
+ System.arraycopy(newData, offset, bytes, 0, size);
+ }
- @Override
- public void readFields(DataInput in) throws IOException {
- reset(in.readInt());
- in.readFully(bytes, 0, size);
- }
+ /**
+ * Reset array by kmerlength
+ *
+ * @param k
+ */
+ public void reset(int k) {
+ this.kmerlength = k;
+ setSize(0);
+ setSize(KmerUtil.getByteNumFromK(k));
+ }
- /**
- * Write the kmer to output
- * we don't need to output size, since size is related to kmerlength
- */
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeInt(this.kmerlength);
- out.write(bytes, 0, size);
- }
+ public static class Comparator extends WritableComparator {
+ public final int LEAD_BYTES = 4;
- public static class Comparator extends WritableComparator {
- public final int LEAD_BYTES = 4;
- public Comparator() {
- super(KmerBytesWritable.class);
- }
+ public Comparator() {
+ super(KmerBytesWritable.class);
+ }
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- int kmerlength1 = readInt(b1,s1);
- int kmerlength2 = readInt(b2,s2);
- if (kmerlength1 == kmerlength2){
- compareBytes(b1, s1 + LEAD_BYTES, l1-LEAD_BYTES, b2, s2+LEAD_BYTES, l2-LEAD_BYTES);
- }
- return kmerlength1 - kmerlength2 ;
- }
- }
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ int kmerlength1 = readInt(b1, s1);
+ int kmerlength2 = readInt(b2, s2);
+ if (kmerlength1 == kmerlength2) {
+ compareBytes(b1, s1 + LEAD_BYTES, l1 - LEAD_BYTES, b2, s2 + LEAD_BYTES, l2 - LEAD_BYTES);
+ }
+ return kmerlength1 - kmerlength2;
+ }
+ }
- static { // register this comparator
- WritableComparator.define(KmerBytesWritable.class, new Comparator());
- }
+ static { // register this comparator
+ WritableComparator.define(KmerBytesWritable.class, new Comparator());
+ }
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritableFactory.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritableFactory.java
index a7bcc8b..dfc0ee3 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritableFactory.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritableFactory.java
@@ -1,241 +1,263 @@
package edu.uci.ics.genomix.type;
public class VKmerBytesWritableFactory {
- private VKmerBytesWritable kmer;
-
- public VKmerBytesWritableFactory(int k){
- kmer = new VKmerBytesWritable(k);
- }
+ private VKmerBytesWritable kmer;
- /**
- * Read Kmer from read text into bytes array e.g. AATAG will compress as
- * [0x000G, 0xATAA]
- *
- * @param k
- * @param array
- * @param start
- */
- public VKmerBytesWritable getKmerByRead(int k, byte[] array, int start) {
- kmer.setByRead(k, array, start);
- return kmer;
- }
+ public VKmerBytesWritableFactory(int k) {
+ kmer = new VKmerBytesWritable(k);
+ }
- /**
- * Compress Reversed Kmer into bytes array AATAG will compress as
- * [0x000A,0xATAG]
- *
- * @param array
- * @param start
- */
- public VKmerBytesWritable getKmerByReadReverse(int k, byte[] array, int start) {
- kmer.setByReadReverse(k, array, start);
- return kmer;
- }
-
- /**
- * Get last kmer from kmer-chain.
- * e.g. kmerChain is AAGCTA, if k =5, it will
- * return AGCTA
- * @param k
- * @param kInChain
- * @param kmerChain
- * @return LastKmer bytes array
- */
- public VKmerBytesWritable getLastKmerFromChain(int lastK, final KmerBytesWritable kmerChain) {
- if (lastK > kmerChain.getKmerLength()) {
- return null;
- }
- if (lastK == kmerChain.getKmerLength()) {
- kmer.set(kmerChain);
- return kmer;
- }
- kmer.reset(lastK);
+ /**
+ * Read Kmer from read text into bytes array e.g. AATAG will compress as
+ * [0x000G, 0xATAA]
+ *
+ * @param k
+ * @param array
+ * @param start
+ */
+ public VKmerBytesWritable getKmerByRead(int k, byte[] array, int start) {
+ kmer.setByRead(k, array, start);
+ return kmer;
+ }
- /** from end to start */
- int byteInChain = kmerChain.getLength() - 1 - (kmerChain.getKmerLength() - lastK) / 4;
- int posInByteOfChain = ((kmerChain.getKmerLength() - lastK) % 4) << 1; // *2
- int byteInKmer = kmer.getLength() - 1;
- for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
- kmer.getBytes()[byteInKmer] = (byte) ((0xff & kmerChain.getBytes()[byteInChain]) >> posInByteOfChain);
- kmer.getBytes()[byteInKmer] |= ((kmerChain.getBytes()[byteInChain - 1] << (8 - posInByteOfChain)));
- }
+ /**
+ * Compress Reversed Kmer into bytes array AATAG will compress as
+ * [0x000A,0xATAG]
+ *
+ * @param array
+ * @param start
+ */
+ public VKmerBytesWritable getKmerByReadReverse(int k, byte[] array, int start) {
+ kmer.setByReadReverse(k, array, start);
+ return kmer;
+ }
- /** last kmer byte */
- if (byteInKmer == 0) {
- kmer.getBytes()[0] = (byte) ((kmerChain.getBytes()[0] & 0xff) >> posInByteOfChain);
- }
- return kmer;
- }
-
- /**
- * Get first kmer from kmer-chain e.g. kmerChain is AAGCTA, if k=5, it will
- * return AAGCT
- *
- * @param k
- * @param kInChain
- * @param kmerChain
- * @return FirstKmer bytes array
- */
- public VKmerBytesWritable getFirstKmerFromChain(int firstK, final KmerBytesWritable kmerChain) {
- if (firstK > kmerChain.getKmerLength()) {
- return null;
- }
- if (firstK == kmerChain.getKmerLength()) {
- kmer.set(kmerChain);
- return kmer;
- }
- kmer.reset(firstK);
+ /**
+ * Get last kmer from kmer-chain.
+ * e.g. kmerChain is AAGCTA, if k =5, it will
+ * return AGCTA
+ *
+ * @param k
+ * @param kInChain
+ * @param kmerChain
+ * @return LastKmer bytes array
+ */
+ public VKmerBytesWritable getLastKmerFromChain(int lastK, final KmerBytesWritable kmerChain) {
+ if (lastK > kmerChain.getKmerLength()) {
+ return null;
+ }
+ if (lastK == kmerChain.getKmerLength()) {
+ kmer.set(kmerChain);
+ return kmer;
+ }
+ kmer.reset(lastK);
- int i = 1;
- for (; i < kmer.getLength(); i++) {
- kmer.getBytes()[kmer.getLength() - i] = kmerChain.getBytes()[kmerChain.getLength() - i];
- }
- int posInByteOfChain = (firstK % 4) << 1; // *2
- if (posInByteOfChain == 0) {
- kmer.getBytes()[0] = kmerChain.getBytes()[kmerChain.getLength() - i];
- } else {
- kmer.getBytes()[0] = (byte) (kmerChain.getBytes()[kmerChain.getLength() - i] & ((1 << posInByteOfChain) - 1));
- }
- return kmer;
- }
-
- /**
- * Merge kmer with next neighbor in gene-code format.
- * The k of new kmer will increase by 1
- * e.g. AAGCT merge with A => AAGCTA
- * @param k :input k of kmer
- * @param kmer : input bytes of kmer
- * @param nextCode: next neighbor in gene-code format
- * @return the merged Kmer, this K of this Kmer is k+1
- */
- public VKmerBytesWritable mergeKmerWithNextCode(final KmerBytesWritable kmer, byte nextCode) {
- this.kmer.reset(kmer.getKmerLength()+1);
- for (int i = 1; i <= kmer.getLength(); i++) {
- this.kmer.getBytes()[this.kmer.getLength() - i] = kmer.getBytes()[kmer.getLength() - i];
- }
- if (this.kmer.getLength() > kmer.getLength()) {
- this.kmer.getBytes()[0] = (byte) (nextCode & 0x3);
- } else {
- this.kmer.getBytes()[0] = (byte) (kmer.getBytes()[0] | ((nextCode & 0x3) << ((kmer.getKmerLength() % 4) << 1)));
- }
- return this.kmer;
- }
-
- /**
- * Merge kmer with previous neighbor in gene-code format.
- * The k of new kmer will increase by 1
- * e.g. AAGCT merge with A => AAAGCT
- * @param k :input k of kmer
- * @param kmer : input bytes of kmer
- * @param preCode: next neighbor in gene-code format
- * @return the merged Kmer,this K of this Kmer is k+1
- */
- public VKmerBytesWritable mergeKmerWithPreCode(final KmerBytesWritable kmer, byte preCode) {
- this.kmer.reset(kmer.getKmerLength()+1);
- int byteInMergedKmer = 0;
- if (kmer.getKmerLength() % 4 == 0) {
- this.kmer.getBytes()[0] = (byte) ((kmer.getBytes()[0] >> 6) & 0x3);
- byteInMergedKmer++;
- }
- for (int i = 0; i < kmer.getLength() - 1; i++, byteInMergedKmer++) {
- this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[i] << 2) | ((kmer.getBytes()[ i + 1] >> 6) & 0x3));
- }
- this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[kmer.getLength() - 1] << 2) | (preCode & 0x3));
- return this.kmer;
- }
-
- /**
- * Merge two kmer to one kmer
- * e.g. ACTA + ACCGT => ACTAACCGT
- * @param preK : previous k of kmer
- * @param kmerPre : bytes array of previous kmer
- * @param nextK : next k of kmer
- * @param kmerNext : bytes array of next kmer
- * @return merged kmer, the new k is @preK + @nextK
- */
- public VKmerBytesWritable mergeTwoKmer(final KmerBytesWritable preKmer, final KmerBytesWritable nextKmer) {
- kmer.reset(preKmer.getKmerLength() + nextKmer.getKmerLength());
- int i = 1;
- for (; i <= preKmer.getLength(); i++) {
- kmer.getBytes()[kmer.getLength() - i] = preKmer.getBytes()[preKmer.getLength() - i];
- }
- if ( i > 1){
- i--;
- }
- if (preKmer.getKmerLength() % 4 == 0) {
- for (int j = 1; j <= nextKmer.getLength(); j++) {
- kmer.getBytes()[kmer.getLength() - i - j] = nextKmer.getBytes()[nextKmer.getLength() - j];
- }
- } else {
- int posNeedToMove = ((preKmer.getKmerLength() % 4) << 1);
- kmer.getBytes()[kmer.getLength() - i] |= nextKmer.getBytes()[ nextKmer.getLength() - 1] << posNeedToMove;
- for (int j = 1; j < nextKmer.getLength(); j++) {
- kmer.getBytes()[kmer.getLength() - i - j] = (byte) (((nextKmer.getBytes()[ nextKmer.getLength()
- - j] & 0xff) >> (8 - posNeedToMove)) | (nextKmer.getBytes()[nextKmer.getLength()
- - j - 1] << posNeedToMove));
- }
- if ( nextKmer.getKmerLength() % 4 == 0 || (nextKmer.getKmerLength() % 4) * 2 + posNeedToMove > 8) {
- kmer.getBytes()[0] = (byte) ((0xff & nextKmer.getBytes()[0] )>> (8 - posNeedToMove));
- }
- }
- return kmer;
- }
-
- /**
- * Safely shifted the kmer forward without change the input kmer
- * e.g. AGCGC shift with T => GCGCT
- * @param k: kmer length
- * @param kmer: input kmer
- * @param afterCode: input genecode
- * @return new created kmer that shifted by afterCode, the K will not change
- */
- public VKmerBytesWritable shiftKmerWithNextCode(final KmerBytesWritable kmer, byte afterCode){
- this.kmer.set(kmer);
- this.kmer.shiftKmerWithNextCode(afterCode);
- return this.kmer;
- }
-
- /**
- * Safely shifted the kmer backward without change the input kmer
- * e.g. AGCGC shift with T => TAGCG
- * @param k: kmer length
- * @param kmer: input kmer
- * @param preCode: input genecode
- * @return new created kmer that shifted by preCode, the K will not change
- */
- public VKmerBytesWritable shiftKmerWithPreCode(final KmerBytesWritable kmer, byte preCode){
- this.kmer.set(kmer);
- this.kmer.shiftKmerWithPreCode(preCode);
- return this.kmer;
- }
-
- /**
- * get the reverse sequence of given kmer
- * @param kmer
- */
- public VKmerBytesWritable reverse(final KmerBytesWritable kmer) {
- this.kmer.reset(kmer.getKmerLength());
+ /** from end to start */
+ int byteInChain = kmerChain.getLength() - 1 - (kmerChain.getKmerLength() - lastK) / 4;
+ int posInByteOfChain = ((kmerChain.getKmerLength() - lastK) % 4) << 1; // *2
+ int byteInKmer = kmer.getLength() - 1;
+ for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
+ kmer.getBytes()[byteInKmer] = (byte) ((0xff & kmerChain.getBytes()[byteInChain]) >> posInByteOfChain);
+ kmer.getBytes()[byteInKmer] |= ((kmerChain.getBytes()[byteInChain - 1] << (8 - posInByteOfChain)));
+ }
- int curPosAtKmer = ((kmer.getKmerLength() - 1) % 4) << 1;
- int curByteAtKmer = 0;
+ /** last kmer byte */
+ if (byteInKmer == 0) {
+ kmer.getBytes()[0] = (byte) ((kmerChain.getBytes()[0] & 0xff) >> posInByteOfChain);
+ }
+ return kmer;
+ }
- int curPosAtReverse = 0;
- int curByteAtReverse = this.kmer.getLength() - 1;
- this.kmer.getBytes()[curByteAtReverse] = 0;
- for (int i = 0; i < kmer.getKmerLength(); i++) {
- byte gene = (byte) ((kmer.getBytes()[curByteAtKmer] >> curPosAtKmer) & 0x03);
- this.kmer.getBytes()[curByteAtReverse] |= gene << curPosAtReverse;
- curPosAtReverse += 2;
- if (curPosAtReverse >= 8) {
- curPosAtReverse = 0;
- this.kmer.getBytes()[--curByteAtReverse] = 0;
- }
- curPosAtKmer -= 2;
- if (curPosAtKmer < 0) {
- curPosAtKmer = 6;
- curByteAtKmer++;
- }
- }
- return this.kmer;
- }
+ /**
+ * Get first kmer from kmer-chain e.g. kmerChain is AAGCTA, if k=5, it will
+ * return AAGCT
+ *
+ * @param k
+ * @param kInChain
+ * @param kmerChain
+ * @return FirstKmer bytes array
+ */
+ public VKmerBytesWritable getFirstKmerFromChain(int firstK, final KmerBytesWritable kmerChain) {
+ if (firstK > kmerChain.getKmerLength()) {
+ return null;
+ }
+ if (firstK == kmerChain.getKmerLength()) {
+ kmer.set(kmerChain);
+ return kmer;
+ }
+ kmer.reset(firstK);
+
+ int i = 1;
+ for (; i < kmer.getLength(); i++) {
+ kmer.getBytes()[kmer.getLength() - i] = kmerChain.getBytes()[kmerChain.getLength() - i];
+ }
+ int posInByteOfChain = (firstK % 4) << 1; // *2
+ if (posInByteOfChain == 0) {
+ kmer.getBytes()[0] = kmerChain.getBytes()[kmerChain.getLength() - i];
+ } else {
+ kmer.getBytes()[0] = (byte) (kmerChain.getBytes()[kmerChain.getLength() - i] & ((1 << posInByteOfChain) - 1));
+ }
+ return kmer;
+ }
+
+ /**
+ * Merge kmer with next neighbor in gene-code format.
+ * The k of new kmer will increase by 1
+ * e.g. AAGCT merge with A => AAGCTA
+ *
+ * @param k
+ * :input k of kmer
+ * @param kmer
+ * : input bytes of kmer
+ * @param nextCode
+ * : next neighbor in gene-code format
+ * @return the merged Kmer, this K of this Kmer is k+1
+ */
+ public VKmerBytesWritable mergeKmerWithNextCode(final KmerBytesWritable kmer, byte nextCode) {
+ this.kmer.reset(kmer.getKmerLength() + 1);
+ for (int i = 1; i <= kmer.getLength(); i++) {
+ this.kmer.getBytes()[this.kmer.getLength() - i] = kmer.getBytes()[kmer.getLength() - i];
+ }
+ if (this.kmer.getLength() > kmer.getLength()) {
+ this.kmer.getBytes()[0] = (byte) (nextCode & 0x3);
+ } else {
+ this.kmer.getBytes()[0] = (byte) (kmer.getBytes()[0] | ((nextCode & 0x3) << ((kmer.getKmerLength() % 4) << 1)));
+ }
+ return this.kmer;
+ }
+
+ /**
+ * Merge kmer with previous neighbor in gene-code format.
+ * The k of new kmer will increase by 1
+ * e.g. AAGCT merge with A => AAAGCT
+ *
+ * @param k
+ * :input k of kmer
+ * @param kmer
+ * : input bytes of kmer
+ * @param preCode
+ * : next neighbor in gene-code format
+ * @return the merged Kmer,this K of this Kmer is k+1
+ */
+ public VKmerBytesWritable mergeKmerWithPreCode(final KmerBytesWritable kmer, byte preCode) {
+ this.kmer.reset(kmer.getKmerLength() + 1);
+ int byteInMergedKmer = 0;
+ if (kmer.getKmerLength() % 4 == 0) {
+ this.kmer.getBytes()[0] = (byte) ((kmer.getBytes()[0] >> 6) & 0x3);
+ byteInMergedKmer++;
+ }
+ for (int i = 0; i < kmer.getLength() - 1; i++, byteInMergedKmer++) {
+ this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[i] << 2) | ((kmer.getBytes()[i + 1] >> 6) & 0x3));
+ }
+ this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[kmer.getLength() - 1] << 2) | (preCode & 0x3));
+ return this.kmer;
+ }
+
+ /**
+ * Merge two kmer to one kmer
+ * e.g. ACTA + ACCGT => ACTAACCGT
+ *
+ * @param preK
+ * : previous k of kmer
+ * @param kmerPre
+ * : bytes array of previous kmer
+ * @param nextK
+ * : next k of kmer
+ * @param kmerNext
+ * : bytes array of next kmer
+ * @return merged kmer, the new k is @preK + @nextK
+ */
+ public VKmerBytesWritable mergeTwoKmer(final KmerBytesWritable preKmer, final KmerBytesWritable nextKmer) {
+ kmer.reset(preKmer.getKmerLength() + nextKmer.getKmerLength());
+ int i = 1;
+ for (; i <= preKmer.getLength(); i++) {
+ kmer.getBytes()[kmer.getLength() - i] = preKmer.getBytes()[preKmer.getLength() - i];
+ }
+ if (i > 1) {
+ i--;
+ }
+ if (preKmer.getKmerLength() % 4 == 0) {
+ for (int j = 1; j <= nextKmer.getLength(); j++) {
+ kmer.getBytes()[kmer.getLength() - i - j] = nextKmer.getBytes()[nextKmer.getLength() - j];
+ }
+ } else {
+ int posNeedToMove = ((preKmer.getKmerLength() % 4) << 1);
+ kmer.getBytes()[kmer.getLength() - i] |= nextKmer.getBytes()[nextKmer.getLength() - 1] << posNeedToMove;
+ for (int j = 1; j < nextKmer.getLength(); j++) {
+ kmer.getBytes()[kmer.getLength() - i - j] = (byte) (((nextKmer.getBytes()[nextKmer.getLength() - j] & 0xff) >> (8 - posNeedToMove)) | (nextKmer
+ .getBytes()[nextKmer.getLength() - j - 1] << posNeedToMove));
+ }
+ if (nextKmer.getKmerLength() % 4 == 0 || (nextKmer.getKmerLength() % 4) * 2 + posNeedToMove > 8) {
+ kmer.getBytes()[0] = (byte) ((0xff & nextKmer.getBytes()[0]) >> (8 - posNeedToMove));
+ }
+ }
+ return kmer;
+ }
+
+ /**
+ * Safely shifted the kmer forward without change the input kmer
+ * e.g. AGCGC shift with T => GCGCT
+ *
+ * @param k
+ * : kmer length
+ * @param kmer
+ * : input kmer
+ * @param afterCode
+ * : input genecode
+ * @return new created kmer that shifted by afterCode, the K will not change
+ */
+ public VKmerBytesWritable shiftKmerWithNextCode(final KmerBytesWritable kmer, byte afterCode) {
+ this.kmer.set(kmer);
+ this.kmer.shiftKmerWithNextCode(afterCode);
+ return this.kmer;
+ }
+
+ /**
+ * Safely shifted the kmer backward without change the input kmer
+ * e.g. AGCGC shift with T => TAGCG
+ *
+ * @param k
+ * : kmer length
+ * @param kmer
+ * : input kmer
+ * @param preCode
+ * : input genecode
+ * @return new created kmer that shifted by preCode, the K will not change
+ */
+ public VKmerBytesWritable shiftKmerWithPreCode(final KmerBytesWritable kmer, byte preCode) {
+ this.kmer.set(kmer);
+ this.kmer.shiftKmerWithPreCode(preCode);
+ return this.kmer;
+ }
+
+ /**
+ * get the reverse sequence of given kmer
+ *
+ * @param kmer
+ */
+ public VKmerBytesWritable reverse(final KmerBytesWritable kmer) {
+ this.kmer.reset(kmer.getKmerLength());
+
+ int curPosAtKmer = ((kmer.getKmerLength() - 1) % 4) << 1;
+ int curByteAtKmer = 0;
+
+ int curPosAtReverse = 0;
+ int curByteAtReverse = this.kmer.getLength() - 1;
+ this.kmer.getBytes()[curByteAtReverse] = 0;
+ for (int i = 0; i < kmer.getKmerLength(); i++) {
+ byte gene = (byte) ((kmer.getBytes()[curByteAtKmer] >> curPosAtKmer) & 0x03);
+ this.kmer.getBytes()[curByteAtReverse] |= gene << curPosAtReverse;
+ curPosAtReverse += 2;
+ if (curPosAtReverse >= 8) {
+ curPosAtReverse = 0;
+ this.kmer.getBytes()[--curByteAtReverse] = 0;
+ }
+ curPosAtKmer -= 2;
+ if (curPosAtKmer < 0) {
+ curPosAtKmer = 6;
+ curByteAtKmer++;
+ }
+ }
+ return this.kmer;
+ }
}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerBytesWritableTest.java
index ea1d0c2..f21da91 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerBytesWritableTest.java
@@ -8,73 +8,71 @@
import edu.uci.ics.genomix.type.KmerBytesWritable;
public class KmerBytesWritableTest {
- static byte[] array = { 'A', 'A', 'T', 'A', 'G', 'A', 'A', 'G' };
- static int k = 7;
-
- @Test
- public void TestCompressKmer() {
- KmerBytesWritable kmer = new KmerBytesWritable(k);
- kmer.setByRead( array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
-
- kmer.setByRead( array, 1);
- Assert.assertEquals(kmer.toString(), "ATAGAAG");
- }
-
- @Test
- public void TestMoveKmer(){
- KmerBytesWritable kmer = new KmerBytesWritable(k);
- kmer.setByRead( array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
-
- for (int i = k; i < array.length-1; i++) {
- kmer.shiftKmerWithNextCode(array[i]);
- Assert.assertTrue(false);
- }
+ static byte[] array = { 'A', 'A', 'T', 'A', 'G', 'A', 'A', 'G' };
+ static int k = 7;
- byte out = kmer.shiftKmerWithNextChar( array[array.length - 1]);
- Assert.assertEquals(out, GeneCode.getAdjBit((byte) 'A'));
- Assert.assertEquals(kmer.toString(), "ATAGAAG");
- }
-
-
- @Test
- public void TestCompressKmerReverse() {
- KmerBytesWritable kmer = new KmerBytesWritable(k);
- kmer.setByRead( array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
-
- kmer.setByReadReverse( array, 1);
- Assert.assertEquals(kmer.toString(), "GAAGATA");
- }
-
- @Test
- public void TestMoveKmerReverse(){
- KmerBytesWritable kmer = new KmerBytesWritable(k);
- kmer.setByRead( array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
-
- for (int i = k; i < array.length-1; i++) {
- kmer.shiftKmerWithPreChar( array[i]);
- Assert.assertTrue(false);
- }
+ @Test
+ public void TestCompressKmer() {
+ KmerBytesWritable kmer = new KmerBytesWritable(k);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
- byte out = kmer.shiftKmerWithPreChar(array[array.length - 1]);
- Assert.assertEquals(out, GeneCode.getAdjBit((byte) 'A'));
- Assert.assertEquals(kmer.toString(), "GAATAGA");
- }
+ kmer.setByRead(array, 1);
+ Assert.assertEquals(kmer.toString(), "ATAGAAG");
+ }
- @Test
- public void TestGetGene(){
- KmerBytesWritable kmer = new KmerBytesWritable(9);
- String text = "AGCTGACCG";
- byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C','G' };
- kmer.setByRead( array, 0);
-
- for(int i =0; i < 9; i++){
- Assert.assertEquals(text.charAt(i),
- (char)(GeneCode.getSymbolFromCode(kmer.getGeneCodeAtPosition(i))));
- }
- }
+ @Test
+ public void TestMoveKmer() {
+ KmerBytesWritable kmer = new KmerBytesWritable(k);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ for (int i = k; i < array.length - 1; i++) {
+ kmer.shiftKmerWithNextCode(array[i]);
+ Assert.assertTrue(false);
+ }
+
+ byte out = kmer.shiftKmerWithNextChar(array[array.length - 1]);
+ Assert.assertEquals(out, GeneCode.getAdjBit((byte) 'A'));
+ Assert.assertEquals(kmer.toString(), "ATAGAAG");
+ }
+
+ @Test
+ public void TestCompressKmerReverse() {
+ KmerBytesWritable kmer = new KmerBytesWritable(k);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ kmer.setByReadReverse(array, 1);
+ Assert.assertEquals(kmer.toString(), "GAAGATA");
+ }
+
+ @Test
+ public void TestMoveKmerReverse() {
+ KmerBytesWritable kmer = new KmerBytesWritable(k);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
+
+ for (int i = k; i < array.length - 1; i++) {
+ kmer.shiftKmerWithPreChar(array[i]);
+ Assert.assertTrue(false);
+ }
+
+ byte out = kmer.shiftKmerWithPreChar(array[array.length - 1]);
+ Assert.assertEquals(out, GeneCode.getAdjBit((byte) 'A'));
+ Assert.assertEquals(kmer.toString(), "GAATAGA");
+ }
+
+ @Test
+ public void TestGetGene() {
+ KmerBytesWritable kmer = new KmerBytesWritable(9);
+ String text = "AGCTGACCG";
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G' };
+ kmer.setByRead(array, 0);
+
+ for (int i = 0; i < 9; i++) {
+ Assert.assertEquals(text.charAt(i), (char) (GeneCode.getSymbolFromCode(kmer.getGeneCodeAtPosition(i))));
+ }
+ }
}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/VKmerBytesWritableFactoryTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/VKmerBytesWritableFactoryTest.java
index a0b8845..c40729c 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/VKmerBytesWritableFactoryTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/VKmerBytesWritableFactoryTest.java
@@ -9,150 +9,150 @@
import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
public class VKmerBytesWritableFactoryTest {
- static byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C','G','T'};
-
- VKmerBytesWritableFactory kmerFactory = new VKmerBytesWritableFactory(8);
-
- @Test
- public void TestDegree(){
- Assert.assertTrue(GeneCode.inDegree((byte) 0xff) == 4);
- Assert.assertTrue(GeneCode.outDegree((byte) 0xff) == 4);
- Assert.assertTrue(GeneCode.inDegree((byte) 0x3f) == 2);
- Assert.assertTrue(GeneCode.outDegree((byte) 0x01) == 1);
- Assert.assertTrue(GeneCode.inDegree((byte) 0x01) == 0);
- }
-
- @Test
- public void TestGetLastKmer(){
- KmerBytesWritable kmer = new KmerBytesWritable(9);
- kmer.setByRead( array, 0);
- Assert.assertEquals("AGCTGACCG", kmer.toString());
- KmerBytesWritable lastKmer ;
- for(int i = 8; i>0 ; i--){
- lastKmer = kmerFactory.getLastKmerFromChain(i, kmer);
- Assert.assertEquals("AGCTGACCG".substring(9-i), lastKmer.toString());
- }
- VKmerBytesWritable vlastKmer ;
- for(int i = 8; i>0 ; i--){
- vlastKmer = kmerFactory.getLastKmerFromChain(i, kmer);
- Assert.assertEquals("AGCTGACCG".substring(9-i), vlastKmer.toString());
- }
- }
-
- @Test
- public void TestMergeNext(){
- KmerBytesWritable kmer = new KmerBytesWritable(9);
- kmer.setByRead(array, 0);
- Assert.assertEquals("AGCTGACCG", kmer.toString());
-
- String text = "AGCTGACCG";
- for(byte x = GeneCode.A; x<= GeneCode.T ; x++){
- KmerBytesWritable newkmer = kmerFactory.mergeKmerWithNextCode(kmer, x);
- text = text + (char)GeneCode.GENE_SYMBOL[x];
- Assert.assertEquals(text, newkmer.toString());
- kmer = new KmerBytesWritable(newkmer);
- }
- for(byte x = GeneCode.A; x<= GeneCode.T ; x++){
- KmerBytesWritable newkmer = kmerFactory.mergeKmerWithNextCode(kmer, x);
- text = text + (char)GeneCode.GENE_SYMBOL[x];
- Assert.assertEquals(text, newkmer.toString());
- kmer = new KmerBytesWritable(newkmer);
- }
- }
-
- @Test
- public void TestMergePre(){
- KmerBytesWritable kmer = new KmerBytesWritable(9);
- kmer.setByRead(array, 0);
- Assert.assertEquals("AGCTGACCG", kmer.toString());
- String text = "AGCTGACCG";
- for(byte x = GeneCode.A; x<= GeneCode.T ; x++){
- KmerBytesWritable newkmer = kmerFactory.mergeKmerWithPreCode(kmer, x);
- text = (char)GeneCode.GENE_SYMBOL[x] + text;
- Assert.assertEquals(text , newkmer.toString());
- kmer = new KmerBytesWritable(newkmer);
- }
- for(byte x = GeneCode.A; x<= GeneCode.T ; x++){
- KmerBytesWritable newkmer = kmerFactory.mergeKmerWithPreCode(kmer, x);
- text = (char)GeneCode.GENE_SYMBOL[x] + text;
- Assert.assertEquals(text , newkmer.toString());
- kmer = new KmerBytesWritable(newkmer);
- }
- }
-
- @Test
- public void TestMergeTwoKmer(){
- KmerBytesWritable kmer1 = new KmerBytesWritable(9);
- kmer1.setByRead( array, 0);
- String text1 = "AGCTGACCG";
- KmerBytesWritable kmer2 = new KmerBytesWritable(9);
- kmer2.setByRead(array, 1);
- String text2 = "GCTGACCGT";
- Assert.assertEquals(text1, kmer1.toString());
- Assert.assertEquals(text2, kmer2.toString());
-
- KmerBytesWritable merged = kmerFactory.mergeTwoKmer(kmer1, kmer2);
- Assert.assertEquals(text1+text2, merged.toString());
-
- KmerBytesWritable kmer3 = new KmerBytesWritable(3);
- kmer3.setByRead(array, 1);
- String text3 = "GCT";
- Assert.assertEquals(text3, kmer3.toString());
-
- merged = kmerFactory.mergeTwoKmer(kmer1, kmer3);
- Assert.assertEquals(text1+text3, merged.toString());
- merged = kmerFactory.mergeTwoKmer( kmer3, kmer1);
- Assert.assertEquals(text3+text1, merged.toString());
-
- KmerBytesWritable kmer4 = new KmerBytesWritable(8);
- kmer4.setByRead( array, 0);
- String text4 = "AGCTGACC";
- Assert.assertEquals(text4, kmer4.toString());
- merged = kmerFactory.mergeTwoKmer(kmer4, kmer3);
- Assert.assertEquals(text4+text3, merged.toString());
-
- KmerBytesWritable kmer5 = new KmerBytesWritable(7);
- kmer5.setByRead( array, 0);
- String text5 = "AGCTGAC";
- VKmerBytesWritable kmer6 = new VKmerBytesWritable(9);
- kmer6.setByRead(9, array, 1);
- String text6 = "GCTGACCGT";
- merged = kmerFactory.mergeTwoKmer(kmer5, kmer6);
- Assert.assertEquals(text5+text6, merged.toString());
-
- kmer6.setByRead(6, array, 1);
- String text7 = "GCTGAC";
- merged = kmerFactory.mergeTwoKmer(kmer5, kmer6);
- Assert.assertEquals(text5+text7, merged.toString());
-
- kmer6.setByRead(4, array, 1);
- String text8 = "GCTG";
- merged = kmerFactory.mergeTwoKmer( kmer5, kmer6);
- Assert.assertEquals(text5+text8, merged.toString());
+ static byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
- }
- @Test
- public void TestShift(){
- VKmerBytesWritable kmer = new VKmerBytesWritable(kmerFactory.getKmerByRead(9, array, 0));
- String text = "AGCTGACCG";
- Assert.assertEquals(text, kmer.toString());
-
- VKmerBytesWritable kmerForward = kmerFactory.shiftKmerWithNextCode(kmer,GeneCode.A);
- Assert.assertEquals(text, kmer.toString());
- Assert.assertEquals("GCTGACCGA", kmerForward.toString());
- VKmerBytesWritable kmerBackward = kmerFactory.shiftKmerWithPreCode(kmer,GeneCode.C);
- Assert.assertEquals(text, kmer.toString());
- Assert.assertEquals("CAGCTGACC", kmerBackward.toString());
-
- }
+ VKmerBytesWritableFactory kmerFactory = new VKmerBytesWritableFactory(8);
-
- @Test
- public void TestReverseKmer(){
- KmerBytesWritable kmer = new KmerBytesWritable(7);
- kmer.setByRead( array, 0);
- Assert.assertEquals(kmer.toString(), "AGCTGAC");
- KmerBytesWritable reversed = kmerFactory.reverse(kmer);
- Assert.assertEquals(reversed.toString(), "CAGTCGA");
- }
+ @Test
+ public void TestDegree() {
+ Assert.assertTrue(GeneCode.inDegree((byte) 0xff) == 4);
+ Assert.assertTrue(GeneCode.outDegree((byte) 0xff) == 4);
+ Assert.assertTrue(GeneCode.inDegree((byte) 0x3f) == 2);
+ Assert.assertTrue(GeneCode.outDegree((byte) 0x01) == 1);
+ Assert.assertTrue(GeneCode.inDegree((byte) 0x01) == 0);
+ }
+
+ @Test
+ public void TestGetLastKmer() {
+ KmerBytesWritable kmer = new KmerBytesWritable(9);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals("AGCTGACCG", kmer.toString());
+ KmerBytesWritable lastKmer;
+ for (int i = 8; i > 0; i--) {
+ lastKmer = kmerFactory.getLastKmerFromChain(i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(9 - i), lastKmer.toString());
+ }
+ VKmerBytesWritable vlastKmer;
+ for (int i = 8; i > 0; i--) {
+ vlastKmer = kmerFactory.getLastKmerFromChain(i, kmer);
+ Assert.assertEquals("AGCTGACCG".substring(9 - i), vlastKmer.toString());
+ }
+ }
+
+ @Test
+ public void TestMergeNext() {
+ KmerBytesWritable kmer = new KmerBytesWritable(9);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals("AGCTGACCG", kmer.toString());
+
+ String text = "AGCTGACCG";
+ for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
+ KmerBytesWritable newkmer = kmerFactory.mergeKmerWithNextCode(kmer, x);
+ text = text + (char) GeneCode.GENE_SYMBOL[x];
+ Assert.assertEquals(text, newkmer.toString());
+ kmer = new KmerBytesWritable(newkmer);
+ }
+ for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
+ KmerBytesWritable newkmer = kmerFactory.mergeKmerWithNextCode(kmer, x);
+ text = text + (char) GeneCode.GENE_SYMBOL[x];
+ Assert.assertEquals(text, newkmer.toString());
+ kmer = new KmerBytesWritable(newkmer);
+ }
+ }
+
+ @Test
+ public void TestMergePre() {
+ KmerBytesWritable kmer = new KmerBytesWritable(9);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals("AGCTGACCG", kmer.toString());
+ String text = "AGCTGACCG";
+ for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
+ KmerBytesWritable newkmer = kmerFactory.mergeKmerWithPreCode(kmer, x);
+ text = (char) GeneCode.GENE_SYMBOL[x] + text;
+ Assert.assertEquals(text, newkmer.toString());
+ kmer = new KmerBytesWritable(newkmer);
+ }
+ for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
+ KmerBytesWritable newkmer = kmerFactory.mergeKmerWithPreCode(kmer, x);
+ text = (char) GeneCode.GENE_SYMBOL[x] + text;
+ Assert.assertEquals(text, newkmer.toString());
+ kmer = new KmerBytesWritable(newkmer);
+ }
+ }
+
+ @Test
+ public void TestMergeTwoKmer() {
+ KmerBytesWritable kmer1 = new KmerBytesWritable(9);
+ kmer1.setByRead(array, 0);
+ String text1 = "AGCTGACCG";
+ KmerBytesWritable kmer2 = new KmerBytesWritable(9);
+ kmer2.setByRead(array, 1);
+ String text2 = "GCTGACCGT";
+ Assert.assertEquals(text1, kmer1.toString());
+ Assert.assertEquals(text2, kmer2.toString());
+
+ KmerBytesWritable merged = kmerFactory.mergeTwoKmer(kmer1, kmer2);
+ Assert.assertEquals(text1 + text2, merged.toString());
+
+ KmerBytesWritable kmer3 = new KmerBytesWritable(3);
+ kmer3.setByRead(array, 1);
+ String text3 = "GCT";
+ Assert.assertEquals(text3, kmer3.toString());
+
+ merged = kmerFactory.mergeTwoKmer(kmer1, kmer3);
+ Assert.assertEquals(text1 + text3, merged.toString());
+ merged = kmerFactory.mergeTwoKmer(kmer3, kmer1);
+ Assert.assertEquals(text3 + text1, merged.toString());
+
+ KmerBytesWritable kmer4 = new KmerBytesWritable(8);
+ kmer4.setByRead(array, 0);
+ String text4 = "AGCTGACC";
+ Assert.assertEquals(text4, kmer4.toString());
+ merged = kmerFactory.mergeTwoKmer(kmer4, kmer3);
+ Assert.assertEquals(text4 + text3, merged.toString());
+
+ KmerBytesWritable kmer5 = new KmerBytesWritable(7);
+ kmer5.setByRead(array, 0);
+ String text5 = "AGCTGAC";
+ VKmerBytesWritable kmer6 = new VKmerBytesWritable(9);
+ kmer6.setByRead(9, array, 1);
+ String text6 = "GCTGACCGT";
+ merged = kmerFactory.mergeTwoKmer(kmer5, kmer6);
+ Assert.assertEquals(text5 + text6, merged.toString());
+
+ kmer6.setByRead(6, array, 1);
+ String text7 = "GCTGAC";
+ merged = kmerFactory.mergeTwoKmer(kmer5, kmer6);
+ Assert.assertEquals(text5 + text7, merged.toString());
+
+ kmer6.setByRead(4, array, 1);
+ String text8 = "GCTG";
+ merged = kmerFactory.mergeTwoKmer(kmer5, kmer6);
+ Assert.assertEquals(text5 + text8, merged.toString());
+
+ }
+
+ @Test
+ public void TestShift() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable(kmerFactory.getKmerByRead(9, array, 0));
+ String text = "AGCTGACCG";
+ Assert.assertEquals(text, kmer.toString());
+
+ VKmerBytesWritable kmerForward = kmerFactory.shiftKmerWithNextCode(kmer, GeneCode.A);
+ Assert.assertEquals(text, kmer.toString());
+ Assert.assertEquals("GCTGACCGA", kmerForward.toString());
+ VKmerBytesWritable kmerBackward = kmerFactory.shiftKmerWithPreCode(kmer, GeneCode.C);
+ Assert.assertEquals(text, kmer.toString());
+ Assert.assertEquals("CAGCTGACC", kmerBackward.toString());
+
+ }
+
+ @Test
+ public void TestReverseKmer() {
+ KmerBytesWritable kmer = new KmerBytesWritable(7);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AGCTGAC");
+ KmerBytesWritable reversed = kmerFactory.reverse(kmer);
+ Assert.assertEquals(reversed.toString(), "CAGTCGA");
+ }
}
diff --git a/genomix/genomix-data/src/test/resources/hadoop/conf/core-site.xml b/genomix/genomix-data/src/test/resources/hadoop/conf/core-site.xml
index 47dfac5..3e5bacb 100644
--- a/genomix/genomix-data/src/test/resources/hadoop/conf/core-site.xml
+++ b/genomix/genomix-data/src/test/resources/hadoop/conf/core-site.xml
@@ -5,14 +5,14 @@
<configuration>
-<property>
- <name>fs.default.name</name>
- <value>hdfs://127.0.0.1:31888</value>
-</property>
-<property>
- <name>hadoop.tmp.dir</name>
- <value>/tmp/hadoop</value>
-</property>
+ <property>
+ <name>fs.default.name</name>
+ <value>hdfs://127.0.0.1:31888</value>
+ </property>
+ <property>
+ <name>hadoop.tmp.dir</name>
+ <value>/tmp/hadoop</value>
+ </property>
</configuration>
diff --git a/genomix/genomix-data/src/test/resources/hadoop/conf/hdfs-site.xml b/genomix/genomix-data/src/test/resources/hadoop/conf/hdfs-site.xml
index 8d29b1d..b1b1902 100644
--- a/genomix/genomix-data/src/test/resources/hadoop/conf/hdfs-site.xml
+++ b/genomix/genomix-data/src/test/resources/hadoop/conf/hdfs-site.xml
@@ -5,14 +5,14 @@
<configuration>
-<property>
- <name>dfs.replication</name>
- <value>1</value>
-</property>
+ <property>
+ <name>dfs.replication</name>
+ <value>1</value>
+ </property>
-<property>
- <name>dfs.block.size</name>
- <value>65536</value>
-</property>
+ <property>
+ <name>dfs.block.size</name>
+ <value>65536</value>
+ </property>
</configuration>
diff --git a/genomix/genomix-data/src/test/resources/hadoop/conf/mapred-site.xml b/genomix/genomix-data/src/test/resources/hadoop/conf/mapred-site.xml
index 39b6505..525e7d5 100644
--- a/genomix/genomix-data/src/test/resources/hadoop/conf/mapred-site.xml
+++ b/genomix/genomix-data/src/test/resources/hadoop/conf/mapred-site.xml
@@ -5,21 +5,21 @@
<configuration>
- <property>
- <name>mapred.job.tracker</name>
- <value>localhost:29007</value>
- </property>
- <property>
- <name>mapred.tasktracker.map.tasks.maximum</name>
- <value>20</value>
- </property>
- <property>
- <name>mapred.tasktracker.reduce.tasks.maximum</name>
- <value>20</value>
- </property>
- <property>
- <name>mapred.max.split.size</name>
- <value>2048</value>
- </property>
+ <property>
+ <name>mapred.job.tracker</name>
+ <value>localhost:29007</value>
+ </property>
+ <property>
+ <name>mapred.tasktracker.map.tasks.maximum</name>
+ <value>20</value>
+ </property>
+ <property>
+ <name>mapred.tasktracker.reduce.tasks.maximum</name>
+ <value>20</value>
+ </property>
+ <property>
+ <name>mapred.max.split.size</name>
+ <value>2048</value>
+ </property>
</configuration>