update the genomix-data
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/NodeWritable.java
index 9fc1829..7a49512 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/NodeWritable.java
@@ -23,6 +23,7 @@
import org.apache.hadoop.io.WritableComparable;
import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
public class NodeWritable implements WritableComparable<NodeWritable>, Serializable {
/**
@@ -45,7 +46,7 @@
private PositionListWritable forwardReverseList;
private PositionListWritable reverseForwardList;
private PositionListWritable reverseReverseList;
- private KmerBytesWritable kmer;
+ private VKmerBytesWritable kmer;
public NodeWritable() {
this(21);
@@ -57,7 +58,7 @@
forwardReverseList = new PositionListWritable();
reverseForwardList = new PositionListWritable();
reverseReverseList = new PositionListWritable();
- kmer = new KmerBytesWritable(kmerSize);
+ kmer = new VKmerBytesWritable();
}
public NodeWritable(PositionWritable nodeID, PositionListWritable FFList, PositionListWritable FRList,
@@ -68,7 +69,7 @@
forwardReverseList.set(FRList);
reverseForwardList.set(RFList);
reverseReverseList.set(RRList);
- kmer.set(kmer);
+ kmer.setAsCopy(kmer);
}
public void set(PositionWritable nodeID, PositionListWritable FFList, PositionListWritable FRList,
@@ -78,7 +79,7 @@
this.forwardReverseList.set(FRList);
this.reverseForwardList.set(RFList);
this.reverseReverseList.set(RRList);
- this.kmer.set(kmer);
+ this.kmer.setAsCopy(kmer);
}
public void setNodeID(PositionWritable ref) {
@@ -90,7 +91,7 @@
}
public void setKmer(KmerBytesWritable right) {
- this.kmer.set(right);
+ this.kmer.setAsCopy(right);
}
public void reset(int kmerSize) {
@@ -163,7 +164,7 @@
this.forwardReverseList.set(node.forwardReverseList);
this.reverseForwardList.set(node.reverseForwardList);
this.reverseReverseList.set(node.reverseReverseList);
- this.kmer.set(node.kmer);
+ this.kmer.setAsCopy(node.kmer);
}
@Override
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
index 284c2e7..e042840 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
@@ -26,583 +26,349 @@
import edu.uci.ics.genomix.data.KmerUtil;
import edu.uci.ics.genomix.data.Marshal;
-import edu.uci.ics.genomix.oldtype.NodeWritable.DirectionFlag;
/**
- * Variable kmer length byteswritable It was used to generate the graph in which
- * phase the kmer length doesn't change. Thus the kmerByteSize of bytes doesn't
- * change either.
+ * Fixed, static-length Kmer used as the key and edge values of each
+ * NodeWritable. Kmer length should be set once during configuration and should
+ * never change.
*/
-public class KmerBytesWritable extends BinaryComparable implements
- Serializable, WritableComparable<BinaryComparable> {
+public class KmerBytesWritable extends BinaryComparable implements Serializable, WritableComparable<BinaryComparable> {
- private static final long serialVersionUID = 1L;
- protected static final byte[] EMPTY_BYTES = {};
+ private static final long serialVersionUID = 1L;
+ protected static final byte[] EMPTY_BYTES = {};
- protected int lettersInKmer;
- protected int bytesUsed;
- protected byte[] bytes;
- protected int offset;
+ protected static int lettersInKmer;
+ protected static int bytesUsed;
+ protected byte[] bytes;
+ protected int offset;
- /**
- * Initialize as empty kmer
- */
- public KmerBytesWritable() {
- this(0, EMPTY_BYTES, 0);
- }
+ /**
+ * set the *GLOBAL* kmer length to the given k value.
+ * NOTE: this will invalidate ALL previously created kmers. This function
+ * should be called before any kmers are created
+ */
+ public static void setGlobalKmerLength(int k) {
+ bytesUsed = KmerUtil.getByteNumFromK(k);
+ lettersInKmer = k;
+ }
- /**
- * Copy contents of kmer string
- */
- public KmerBytesWritable(String kmer) {
- setAsCopy(kmer);
- }
+ /**
+ * Initialize as empty kmer
+ */
+ public KmerBytesWritable() {
+ bytes = new byte[bytesUsed];
+ offset = 0;
+ }
- /**
- * Set as reference to given data
- */
- public KmerBytesWritable(int k, byte[] storage, int offset) {
- setAsReference(k, storage, offset);
- }
+ /**
+ * Copy contents of kmer string
+ */
+ public KmerBytesWritable(String kmer) {
+ this();
+ setByRead(kmer.getBytes(), 0);
+ }
- /**
- * Reserve space for k letters
- */
- public KmerBytesWritable(int k) {
- if (k > 0) {
- this.bytes = new byte[KmerUtil.getByteNumFromK(k)];
- } else {
- this.bytes = EMPTY_BYTES;
- }
- this.offset = 0;
- setKmerLength(k);
- }
+ /**
+ * Set as reference to existing data
+ */
+ public KmerBytesWritable(byte[] storage, int offset) {
+ setAsReference(storage, offset);
+ }
- /**
- * copy kmer in other
- *
- * @param other
- */
- public KmerBytesWritable(KmerBytesWritable other) {
- this(other.lettersInKmer);
- setAsCopy(other);
- }
+ /**
+ * copy kmer in other
+ *
+ * @param other
+ */
+ public KmerBytesWritable(KmerBytesWritable other) {
+ this();
+ setAsCopy(other);
+ }
- /**
- * Deep copy of the given kmer
- *
- * @param other
- */
- public void setAsCopy(KmerBytesWritable other) {
- reset(other.lettersInKmer);
- if (lettersInKmer > 0) {
- System.arraycopy(other.bytes, other.offset, bytes, this.offset,
- bytesUsed);
- }
- }
+ /**
+ * Deep copy of the given kmer
+ *
+ * @param other
+ */
+ public void setAsCopy(KmerBytesWritable other) {
+ if (lettersInKmer > 0) {
+ System.arraycopy(other.bytes, other.offset, bytes, this.offset, bytesUsed);
+ }
+ }
- /**
- * set from String kmer
- */
- public void setAsCopy(String kmer) {
- setKmerLength(kmer.length());
- bytes = kmer.getBytes();
- offset = 0;
- }
+ /**
+ * Deep copy of the given bytes data
+ *
+ * @param newData
+ * @param offset
+ */
+ public void setAsCopy(byte[] newData, int offset) {
+ if (newData.length - offset < bytesUsed) {
+ throw new IllegalArgumentException("Requested " + bytesUsed + " bytes (k=" + lettersInKmer
+ + ") but buffer has only " + (newData.length - offset) + " bytes");
+ }
+ System.arraycopy(newData, offset, bytes, this.offset, bytesUsed);
+ }
- /**
- * Deep copy of the given bytes data
- *
- * @param newData
- * @param offset
- */
- public void setAsCopy(int k, byte[] newData, int offset) {
- reset(k);
- System.arraycopy(newData, offset, bytes, this.offset, k);
- }
+ /**
+ * Point this datablock to the given bytes array It works like the pointer
+ * to new datablock.
+ *
+ * @param newData
+ * @param offset
+ */
+ public void setAsReference(byte[] newData, int offset) {
+ if (newData.length - offset < bytesUsed) {
+ throw new IllegalArgumentException("Requested " + bytesUsed + " bytes (k=" + lettersInKmer
+ + ") but buffer has only " + (newData.length - offset) + " bytes");
+ }
+ bytes = newData;
+ this.offset = offset;
+ }
- /**
- * Reset array by kmerlength
- *
- * @param k
- */
- public void reset(int k) {
- setKmerLength(k);
- setSize(bytesUsed);
- clearLeadBit();
- }
+ /**
+ * Get one genecode (A|G|C|T) from the given kmer index e.g. Get the 4th
+ * gene of the kmer ACGTA will return T
+ *
+ * @param pos
+ * @return
+ */
+ public byte getGeneCodeAtPosition(int pos) {
+ if (pos >= lettersInKmer || pos < 0) {
+ throw new ArrayIndexOutOfBoundsException("Gene position (" + pos + ") out of bounds for k=" + lettersInKmer);
+ }
+ return geneCodeAtPosition(pos);
+ }
- /**
- * Point this datablock to the given bytes array It works like the pointer
- * to new datablock.
- *
- * @param newData
- * @param offset
- */
- public void setAsReference(int k, byte[] newData, int offset) {
- this.bytes = newData;
- this.offset = offset;
- // my java skills are lacking. In inherited classes with a header, this
- // will use the header version...
- // setKmerLength(k);
- bytesUsed = KmerUtil.getByteNumFromK(k);
- lettersInKmer = k;
- if (newData.length - offset < bytesUsed) {
- throw new IllegalArgumentException("Requested " + bytesUsed
- + " bytes (k=" + k + ") but buffer has only "
- + (newData.length - offset) + " bytes");
- }
- }
+ /**
+ * unchecked version of getGeneCodeAtPosition. Used when kmerlength is
+ * inaccurate (mid-merge)
+ */
+ private byte geneCodeAtPosition(int pos) {
+ int posByte = pos / 4;
+ int shift = (pos % 4) << 1;
+ return (byte) ((bytes[offset + bytesUsed - 1 - posByte] >> shift) & 0x3);
+ }
- /**
- * Ensures that there is space for at least `size` bytes of kmer (not
- * including any header)
- *
- */
- protected void setSize(int size) {
- if (size > getCapacity()) {
- setCapacity((size * 3 / 2));
- }
- this.bytesUsed = size;
- }
+ public static int getKmerLength() {
+ return lettersInKmer;
+ }
+
+ public static int getBytesPerKmer() {
+ return bytesUsed;
+ }
- /**
- * return the number of bytes in use for the kmer (not including any header)
- */
- protected int getCapacity() {
- return bytes.length;
- }
+ @Override
+ public byte[] getBytes() {
+ return bytes;
+ }
- /**
- * shrinks/expands the storage area to allow new_cap bytes for the kmer (no
- * header included)
- */
- protected void setCapacity(int new_cap) {
- if (new_cap != getCapacity()) {
- byte[] new_data = new byte[new_cap];
- if (new_cap < bytesUsed) {
- bytesUsed = new_cap;
- }
- if (bytesUsed != 0) {
- System.arraycopy(bytes, offset, new_data, 0, bytesUsed);
- }
- bytes = new_data;
- offset = 0;
- }
- }
+ public int getOffset() {
+ return offset;
+ }
- /**
- * Get one genecode (A|G|C|T) from the given kmer index e.g. Get the 4th
- * gene of the kmer ACGTA will return T
- *
- * @param pos
- * @return
- */
- public byte getGeneCodeAtPosition(int pos) {
- if (pos >= lettersInKmer) {
- throw new IllegalArgumentException("gene position out of bound");
- }
- return geneCodeAtPosition(pos);
- }
+ @Override
+ public int getLength() {
+ return bytesUsed;
+ }
- // unchecked version of above. Used when kmerlength is inaccurate
- // (mid-merge)
- private byte geneCodeAtPosition(int pos) {
- int posByte = pos / 4;
- int shift = (pos % 4) << 1;
- return (byte) ((bytes[offset + bytesUsed - 1 - posByte] >> shift) & 0x3);
- }
+ /**
+ * Read Kmer from read text into bytes array e.g. AATAG will compress as
+ * [0x000G, 0xATAA]
+ *
+ * @param stringBytes
+ * @param start
+ */
+ public void setByRead(byte[] stringBytes, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = this.bytesUsed - 1;
+ for (int i = start; i < start + lettersInKmer && i < stringBytes.length; i++) {
+ byte code = GeneCode.getCodeFromSymbol(stringBytes[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[offset + bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[offset] = l;
+ }
+ }
- public void setKmerLength(int k) {
- this.bytesUsed = KmerUtil.getByteNumFromK(k);
- this.lettersInKmer = k;
- }
+ /**
+ * Compress Reversed read into bytes array e.g. AATAG will paired to CTATT,
+ * and then compress as [0x000T,0xTATC]
+ *
+ * @param input
+ * array
+ * @param start
+ * position
+ */
+ public void setByReadReverse(byte[] array, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = bytesUsed - 1;
+ // for (int i = start + kmerlength - 1; i >= 0 && i < array.length; i--)
+ // {
+ for (int i = start + lettersInKmer - 1; i >= start && i < array.length; i--) {
+ byte code = GeneCode.getPairedCodeFromSymbol(array[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[offset + bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[offset] = l;
+ }
+ }
- public int getKmerLength() {
- return lettersInKmer;
- }
+ /**
+ * Shift Kmer to accept new char input
+ *
+ * @param c
+ * Input new gene character
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextChar(byte c) {
+ return shiftKmerWithNextCode(GeneCode.getCodeFromSymbol(c));
+ }
- @Override
- public byte[] getBytes() {
- return bytes;
- }
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextCode(byte c) {
+ byte output = (byte) (bytes[offset + bytesUsed - 1] & 0x03);
+ for (int i = bytesUsed - 1; i > 0; i--) {
+ byte in = (byte) (bytes[offset + i - 1] & 0x03);
+ bytes[offset + i] = (byte) (((bytes[offset + i] >>> 2) & 0x3f) | (in << 6));
+ }
+ int pos = ((lettersInKmer - 1) % 4) << 1;
+ byte code = (byte) (c << pos);
+ bytes[offset] = (byte) (((bytes[offset] >>> 2) & 0x3f) | code);
+ clearLeadBit();
+ return output;
+ }
- public int getOffset() {
- return offset;
- }
+ /**
+ * Shift Kmer to accept new input char
+ *
+ * @param c
+ * Input new gene character
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreChar(byte c) {
+ return shiftKmerWithPreCode(GeneCode.getCodeFromSymbol(c));
+ }
- @Override
- public int getLength() {
- return bytesUsed;
- }
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreCode(byte c) {
+ int pos = ((lettersInKmer - 1) % 4) << 1;
+ byte output = (byte) ((bytes[offset] >> pos) & 0x03);
+ for (int i = 0; i < bytesUsed - 1; i++) {
+ byte in = (byte) ((bytes[offset + i + 1] >> 6) & 0x03);
+ bytes[offset + i] = (byte) ((bytes[offset + i] << 2) | in);
+ }
+ bytes[offset + bytesUsed - 1] = (byte) ((bytes[offset + bytesUsed - 1] << 2) | c);
+ clearLeadBit();
+ return output;
+ }
- /**
- * Read Kmer from read text into bytes array e.g. AATAG will compress as
- * [0x000G, 0xATAA]
- *
- * @param k
- * @param stringBytes
- * : byte array from a _string_. Meaning there's no header
- * @param start
- */
- public void setByRead(byte[] stringBytes, int start) {
- byte l = 0;
- int bytecount = 0;
- int bcount = this.bytesUsed - 1;
- for (int i = start; i < start + lettersInKmer && i < stringBytes.length; i++) {
- byte code = GeneCode.getCodeFromSymbol(stringBytes[i]);
- l |= (byte) (code << bytecount);
- bytecount += 2;
- if (bytecount == 8) {
- bytes[offset + bcount--] = l;
- l = 0;
- bytecount = 0;
- }
- }
- if (bcount >= 0) {
- bytes[offset] = l;
- }
- }
+ public static void appendOneByteAtPosition(int k, byte onebyte, byte[] buffer, int start, int length) {
+ int position = start + length - 1 - k / 4;
+ if (position < start) {
+ throw new IllegalArgumentException("Buffer for kmer storage is invalid");
+ }
+ int shift = ((k) % 4) << 1;
+ int mask = shift == 0 ? 0 : ((1 << shift) - 1);
- public void setByRead(int k, byte[] array, int start) {
- reset(k);
- setByRead(array, start);
- }
+ buffer[position] = (byte) ((buffer[position] & mask) | ((0xff & onebyte) << shift));
+ if (position > start && shift != 0) {
+ buffer[position - 1] = (byte) ((buffer[position - 1] & (0xff - mask)) | ((byte) ((0xff & onebyte) >>> (8 - shift))));
+ }
+ }
- /**
- * Compress Reversed read into bytes array e.g. AATAG will paired to CTATT,
- * and then compress as [0x000T,0xTATC]
- *
- * @param input
- * array
- * @param start
- * position
- */
- public void setByReadReverse(byte[] array, int start) {
- byte l = 0;
- int bytecount = 0;
- int bcount = bytesUsed - 1;
- // for (int i = start + kmerlength - 1; i >= 0 && i < array.length; i--)
- // {
- for (int i = start + lettersInKmer - 1; i >= start && i < array.length; i--) {
- byte code = GeneCode.getPairedCodeFromSymbol(array[i]);
- l |= (byte) (code << bytecount);
- bytecount += 2;
- if (bytecount == 8) {
- bytes[offset + bcount--] = l;
- l = 0;
- bytecount = 0;
- }
- }
- if (bcount >= 0) {
- bytes[offset] = l;
- }
- }
+ public static byte getOneByteFromKmerAtPosition(int k, byte[] buffer, int start, int length) {
+ int position = start + length - 1 - k / 4;
+ if (position < start) {
+ throw new IllegalArgumentException("Buffer of kmer storage is invalid");
+ }
+ int shift = (k % 4) << 1;
+ byte data = (byte) (((0xff) & buffer[position]) >>> shift);
+ if (shift != 0 && position > start) {
+ data |= 0xff & (buffer[position - 1] << (8 - shift));
+ }
+ return data;
+ }
- public void setByReadReverse(int k, byte[] array, int start) {
- reset(k);
- setByReadReverse(array, start);
- }
+ protected void clearLeadBit() {
+ if (lettersInKmer % 4 != 0) {
+ bytes[offset] &= (1 << ((lettersInKmer % 4) << 1)) - 1;
+ }
+ }
- /**
- * Shift Kmer to accept new char input
- *
- * @param c
- * Input new gene character
- * @return the shift out gene, in gene code format
- */
- public byte shiftKmerWithNextChar(byte c) {
- return shiftKmerWithNextCode(GeneCode.getCodeFromSymbol(c));
- }
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ in.readFully(bytes, offset, bytesUsed);
+ }
- /**
- * Shift Kmer to accept new gene code
- *
- * @param c
- * Input new gene code
- * @return the shift out gene, in gene code format
- */
- public byte shiftKmerWithNextCode(byte c) {
- byte output = (byte) (bytes[offset + bytesUsed - 1] & 0x03);
- for (int i = bytesUsed - 1; i > 0; i--) {
- byte in = (byte) (bytes[offset + i - 1] & 0x03);
- bytes[offset + i] = (byte) (((bytes[offset + i] >>> 2) & 0x3f) | (in << 6));
- }
- int pos = ((lettersInKmer - 1) % 4) << 1;
- byte code = (byte) (c << pos);
- bytes[offset] = (byte) (((bytes[offset] >>> 2) & 0x3f) | code);
- clearLeadBit();
- return output;
- }
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.write(bytes, offset, bytesUsed);
+ }
- /**
- * Shift Kmer to accept new input char
- *
- * @param c
- * Input new gene character
- * @return the shiftout gene, in gene code format
- */
- public byte shiftKmerWithPreChar(byte c) {
- return shiftKmerWithPreCode(GeneCode.getCodeFromSymbol(c));
- }
+ @Override
+ public int hashCode() {
+ return Marshal.hashBytes(bytes, offset, bytesUsed);
+ }
- /**
- * Shift Kmer to accept new gene code
- *
- * @param c
- * Input new gene code
- * @return the shiftout gene, in gene code format
- */
- public byte shiftKmerWithPreCode(byte c) {
- int pos = ((lettersInKmer - 1) % 4) << 1;
- byte output = (byte) ((bytes[offset] >> pos) & 0x03);
- for (int i = 0; i < bytesUsed - 1; i++) {
- byte in = (byte) ((bytes[offset + i + 1] >> 6) & 0x03);
- bytes[offset + i] = (byte) ((bytes[offset + i] << 2) | in);
- }
- bytes[offset + bytesUsed - 1] = (byte) ((bytes[offset + bytesUsed - 1] << 2) | c);
- clearLeadBit();
- return output;
- }
+ @Override
+ public boolean equals(Object right_obj) {
+ if (right_obj instanceof KmerBytesWritable) {
+ // since these may be backed by storage of different sizes, we have to manually check each byte
+ KmerBytesWritable right = (KmerBytesWritable) right_obj;
+ for (int i=0; i < bytesUsed; i++) {
+ if (bytes[offset + i] != right.bytes[right.offset + i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
- /**
- * Merge Kmer with the next connected Kmer e.g. AAGCTAA merge with AACAACC,
- * if the initial kmerSize = 3 then it will return AAGCTAACAACC
- *
- * @param initialKmerSize
- * : the initial kmerSize
- * @param kmer
- * : the next kmer
- */
- public void mergeWithFFKmer(int initialKmerSize, KmerBytesWritable kmer) {
- int preKmerLength = lettersInKmer;
- int preSize = bytesUsed;
- lettersInKmer += kmer.lettersInKmer - initialKmerSize + 1;
- setSize(KmerUtil.getByteNumFromK(lettersInKmer));
- for (int i = 1; i <= preSize; i++) {
- bytes[offset + bytesUsed - i] = bytes[offset + preSize - i];
- }
- for (int k = initialKmerSize - 1; k < kmer.getKmerLength(); k += 4) {
- byte onebyte = getOneByteFromKmerAtPosition(k, kmer.getBytes(),
- kmer.getOffset(), kmer.getLength());
- appendOneByteAtPosition(preKmerLength + k - initialKmerSize + 1,
- onebyte, bytes, offset, bytesUsed);
- }
- clearLeadBit();
- }
+ @Override
+ public String toString() {
+ return KmerUtil.recoverKmerFrom(lettersInKmer, bytes, offset, bytesUsed);
+ }
- /**
- * Merge Kmer with the next connected Kmer, when that Kmer needs to be
- * reverse-complemented e.g. AAGCTAA merge with GGTTGTT, if the initial
- * kmerSize = 3 then it will return AAGCTAACAACC A merge B => A B~
- *
- * @param initialKmerSize
- * : the initial kmerSize
- * @param kmer
- * : the next kmer
- */
- public void mergeWithFRKmer(int initialKmerSize, KmerBytesWritable kmer) {
- int preSize = bytesUsed;
- int preKmerLength = lettersInKmer;
- lettersInKmer += kmer.lettersInKmer - initialKmerSize + 1;
- setSize(KmerUtil.getByteNumFromK(lettersInKmer));
- // copy prefix into right-side of buffer
- for (int i = 1; i <= preSize; i++) {
- bytes[offset + bytesUsed - i] = bytes[offset + preSize - i];
- }
+ public static class Comparator extends WritableComparator {
+ public Comparator() {
+ super(KmerBytesWritable.class);
+ }
- int bytecount = (preKmerLength % 4) * 2;
- int bcount = bytesUsed - preSize - bytecount / 8; // may overlap
- // previous kmer
- byte l = bcount == bytesUsed - preSize ? bytes[offset + bcount] : 0x00;
- bytecount %= 8;
- for (int i = kmer.lettersInKmer - initialKmerSize; i >= 0; i--) {
- byte code = GeneCode.getPairedGeneCode(kmer
- .getGeneCodeAtPosition(i));
- l |= (byte) (code << bytecount);
- bytecount += 2;
- if (bytecount == 8) {
- bytes[offset + bcount--] = l;
- l = 0;
- bytecount = 0;
- }
- }
- if (bcount >= 0) {
- bytes[offset] = l;
- }
- }
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ return compareBytes(b1, s1, l1, b2, s2, l2);
+ }
+ }
- /**
- * Merge Kmer with the previous connected Kmer, when that kmer needs to be
- * reverse-complemented e.g. AACAACC merge with TTCTGCC, if the initial
- * kmerSize = 3 then it will return GGCAGAACAACC
- *
- * @param initialKmerSize
- * : the initial kmerSize
- * @param preKmer
- * : the previous kmer
- */
- public void mergeWithRFKmer(int initialKmerSize, KmerBytesWritable preKmer) {
- KmerBytesWritable reversed = new KmerBytesWritable(preKmer.lettersInKmer);
- reversed.setByReadReverse(preKmer.toString().getBytes(), 0);
- mergeWithRRKmer(initialKmerSize, reversed);
- }
-
- /**
- * Merge Kmer with the previous connected Kmer e.g. AACAACC merge with
- * AAGCTAA, if the initial kmerSize = 3 then it will return AAGCTAACAACC
- *
- * @param initialKmerSize
- * : the initial kmerSize
- * @param preKmer
- * : the previous kmer
- */
- public void mergeWithRRKmer(int initialKmerSize, KmerBytesWritable preKmer) {
- int preKmerLength = lettersInKmer;
- int preSize = bytesUsed;
- lettersInKmer += preKmer.lettersInKmer - initialKmerSize + 1;
- setSize(KmerUtil.getByteNumFromK(lettersInKmer));
- byte cacheByte = getOneByteFromKmerAtPosition(0, bytes, offset, preSize);
-
- // copy prekmer
- for (int k = 0; k < preKmer.lettersInKmer - initialKmerSize + 1; k += 4) {
- byte onebyte = getOneByteFromKmerAtPosition(k, preKmer.bytes,
- preKmer.offset, preKmer.bytesUsed);
- appendOneByteAtPosition(k, onebyte, bytes, offset, bytesUsed);
- }
-
- // copy current kmer
- int k = 4;
- for (; k < preKmerLength; k += 4) {
- byte onebyte = getOneByteFromKmerAtPosition(k, bytes, offset,
- preSize);
- appendOneByteAtPosition(preKmer.lettersInKmer - initialKmerSize + k
- - 4 + 1, cacheByte, bytes, offset, bytesUsed);
- cacheByte = onebyte;
- }
- appendOneByteAtPosition(preKmer.lettersInKmer - initialKmerSize + k - 4
- + 1, cacheByte, bytes, offset, bytesUsed);
- clearLeadBit();
- }
-
- public void mergeWithKmerInDir(byte dir, int initialKmerSize,
- KmerBytesWritable kmer) {
- switch (dir & DirectionFlag.DIR_MASK) {
- case DirectionFlag.DIR_FF:
- mergeWithFFKmer(initialKmerSize, kmer);
- break;
- case DirectionFlag.DIR_FR:
- mergeWithFRKmer(initialKmerSize, kmer);
- break;
- case DirectionFlag.DIR_RF:
- mergeWithRFKmer(initialKmerSize, kmer);
- break;
- case DirectionFlag.DIR_RR:
- mergeWithRRKmer(initialKmerSize, kmer);
- break;
- default:
- throw new RuntimeException("Direction not recognized: " + dir);
- }
- }
-
- public static void appendOneByteAtPosition(int k, byte onebyte,
- byte[] buffer, int start, int length) {
- int position = start + length - 1 - k / 4;
- if (position < start) {
- throw new IllegalArgumentException(
- "Buffer for kmer storage is invalid");
- }
- int shift = ((k) % 4) << 1;
- int mask = shift == 0 ? 0 : ((1 << shift) - 1);
-
- buffer[position] = (byte) ((buffer[position] & mask) | ((0xff & onebyte) << shift));
- if (position > start && shift != 0) {
- buffer[position - 1] = (byte) ((buffer[position - 1] & (0xff - mask)) | ((byte) ((0xff & onebyte) >>> (8 - shift))));
- }
- }
-
- public static byte getOneByteFromKmerAtPosition(int k, byte[] buffer,
- int start, int length) {
- int position = start + length - 1 - k / 4;
- if (position < start) {
- throw new IllegalArgumentException(
- "Buffer of kmer storage is invalid");
- }
- int shift = (k % 4) << 1;
- byte data = (byte) (((0xff) & buffer[position]) >>> shift);
- if (shift != 0 && position > start) {
- data |= 0xff & (buffer[position - 1] << (8 - shift));
- }
- return data;
- }
-
- protected void clearLeadBit() {
- if (lettersInKmer % 4 != 0) {
- bytes[offset] &= (1 << ((lettersInKmer % 4) << 1)) - 1;
- }
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- lettersInKmer = in.readInt();
- bytesUsed = KmerUtil.getByteNumFromK(lettersInKmer);
- if (lettersInKmer > 0) {
- if (this.bytes.length < this.bytesUsed) {
- this.bytes = new byte[this.bytesUsed];
- this.offset = 0;
-
- }
- in.readFully(bytes, offset, bytesUsed);
- }
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeInt(lettersInKmer);
- if (lettersInKmer > 0) {
- out.write(bytes, offset, bytesUsed);
- }
- }
-
- @Override
- public int hashCode() {
- return super.hashCode() * 31 + this.lettersInKmer;
- }
-
- @Override
- public boolean equals(Object right_obj) {
- if (right_obj instanceof KmerBytesWritable)
- return this.lettersInKmer == ((KmerBytesWritable) right_obj).lettersInKmer
- && super.equals(right_obj);
- return false;
- }
-
- @Override
- public String toString() {
- return KmerUtil.recoverKmerFrom(this.lettersInKmer, this.getBytes(),
- offset, this.getLength());
- }
-
- public static class Comparator extends WritableComparator {
- private static final int LEADING_BYTES = 4;
-
- public Comparator() {
- super(KmerBytesWritable.class);
- }
-
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- int kmerlength1 = Marshal.getInt(b1, s1);
- int kmerlength2 = Marshal.getInt(b2, s2);
- if (kmerlength1 == kmerlength2) {
- return compareBytes(b1, s1 + LEADING_BYTES, l1 - LEADING_BYTES,
- b2, s2 + LEADING_BYTES, l2 - LEADING_BYTES);
- }
- return kmerlength1 - kmerlength2;
- }
- }
-
- static { // register this comparator
- WritableComparator.define(KmerBytesWritable.class, new Comparator());
- }
+ static { // register this comparator
+ WritableComparator.define(KmerBytesWritable.class, new Comparator());
+ }
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java
index f805610..16df821 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java
@@ -16,10 +16,10 @@
package edu.uci.ics.genomix.type;
public class KmerBytesWritableFactory {
- private KmerBytesWritable kmer;
+ private VKmerBytesWritable kmer;
public KmerBytesWritableFactory(int k) {
- kmer = new KmerBytesWritable(k);
+ kmer = new VKmerBytesWritable(k);
}
/**
@@ -30,7 +30,7 @@
* @param array
* @param start
*/
- public KmerBytesWritable getKmerByRead(int k, byte[] array, int start) {
+ public VKmerBytesWritable getKmerByRead(int k, byte[] array, int start) {
kmer.reset(k);
kmer.setByRead(array, start);
return kmer;
@@ -43,7 +43,7 @@
* @param array
* @param start
*/
- public KmerBytesWritable getKmerByReadReverse(int k, byte[] array, int start) {
+ public VKmerBytesWritable getKmerByReadReverse(int k, byte[] array, int start) {
kmer.reset(k);
kmer.setByReadReverse(array, start);
return kmer;
@@ -59,28 +59,28 @@
* @param kmerChain
* @return LastKmer bytes array
*/
- public KmerBytesWritable getLastKmerFromChain(int lastK, final KmerBytesWritable kmerChain) {
- if (lastK > kmerChain.getKmerLength()) {
+ public VKmerBytesWritable getLastKmerFromChain(int lastK, final VKmerBytesWritable kmerChain) {
+ if (lastK > kmerChain.getKmerLetterLength()) {
return null;
}
- if (lastK == kmerChain.getKmerLength()) {
+ if (lastK == kmerChain.getKmerLetterLength()) {
kmer.setAsCopy(kmerChain);
return kmer;
}
kmer.reset(lastK);
/** from end to start */
- int byteInChain = kmerChain.getLength() - 1 - (kmerChain.getKmerLength() - lastK) / 4;
- int posInByteOfChain = ((kmerChain.getKmerLength() - lastK) % 4) << 1; // *2
- int byteInKmer = kmer.getLength() - 1;
+ int byteInChain = kmerChain.getKmerByteLength() - 1 - (kmerChain.getKmerLetterLength() - lastK) / 4;
+ int posInByteOfChain = ((kmerChain.getKmerLetterLength() - lastK) % 4) << 1; // *2
+ int byteInKmer = kmer.getKmerByteLength() - 1;
for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
- kmer.getBytes()[byteInKmer] = (byte) ((0xff & kmerChain.getBytes()[byteInChain]) >> posInByteOfChain);
- kmer.getBytes()[byteInKmer] |= ((kmerChain.getBytes()[byteInChain - 1] << (8 - posInByteOfChain)));
+ kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] = (byte) ((0xff & kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset()]) >> posInByteOfChain);
+ kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] |= ((kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset() - 1] << (8 - posInByteOfChain)));
}
/** last kmer byte */
if (byteInKmer == 0) {
- kmer.getBytes()[0] = (byte) ((kmerChain.getBytes()[0] & 0xff) >> posInByteOfChain);
+ kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmerChain.getBytes()[0 + kmerChain.getKmerOffset()] & 0xff) >> posInByteOfChain);
}
kmer.clearLeadBit();
return kmer;
@@ -95,52 +95,52 @@
* @param kmerChain
* @return FirstKmer bytes array
*/
- public KmerBytesWritable getFirstKmerFromChain(int firstK, final KmerBytesWritable kmerChain) {
- if (firstK > kmerChain.getKmerLength()) {
+ public VKmerBytesWritable getFirstKmerFromChain(int firstK, final VKmerBytesWritable kmerChain) {
+ if (firstK > kmerChain.getKmerLetterLength()) {
return null;
}
- if (firstK == kmerChain.getKmerLength()) {
+ if (firstK == kmerChain.getKmerLetterLength()) {
kmer.setAsCopy(kmerChain);
return kmer;
}
kmer.reset(firstK);
int i = 1;
- for (; i < kmer.getLength(); i++) {
- kmer.getBytes()[kmer.getLength() - i] = kmerChain.getBytes()[kmerChain.getLength() - i];
+ for (; i < kmer.getKmerByteLength(); i++) {
+ kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] = kmerChain.getBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i];
}
int posInByteOfChain = (firstK % 4) << 1; // *2
if (posInByteOfChain == 0) {
- kmer.getBytes()[0] = kmerChain.getBytes()[kmerChain.getLength() - i];
+ kmer.getBytes()[0 + kmer.getKmerOffset()] = kmerChain.getBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i];
} else {
- kmer.getBytes()[0] = (byte) (kmerChain.getBytes()[kmerChain.getLength() - i] & ((1 << posInByteOfChain) - 1));
+ kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) (kmerChain.getBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i] & ((1 << posInByteOfChain) - 1));
}
kmer.clearLeadBit();
return kmer;
}
- public KmerBytesWritable getSubKmerFromChain(int startK, int kSize, final KmerBytesWritable kmerChain) {
- if (startK + kSize > kmerChain.getKmerLength()) {
+ public VKmerBytesWritable getSubKmerFromChain(int startK, int kSize, final VKmerBytesWritable kmerChain) {
+ if (startK + kSize > kmerChain.getKmerLetterLength()) {
return null;
}
- if (startK == 0 && kSize == kmerChain.getKmerLength()) {
+ if (startK == 0 && kSize == kmerChain.getKmerLetterLength()) {
kmer.setAsCopy(kmerChain);
return kmer;
}
kmer.reset(kSize);
/** from end to start */
- int byteInChain = kmerChain.getLength() - 1 - startK / 4;
+ int byteInChain = kmerChain.getKmerByteLength() - 1 - startK / 4;
int posInByteOfChain = startK % 4 << 1; // *2
- int byteInKmer = kmer.getLength() - 1;
+ int byteInKmer = kmer.getKmerByteLength() - 1;
for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
- kmer.getBytes()[byteInKmer] = (byte) ((0xff & kmerChain.getBytes()[byteInChain]) >> posInByteOfChain);
- kmer.getBytes()[byteInKmer] |= ((kmerChain.getBytes()[byteInChain - 1] << (8 - posInByteOfChain)));
+ kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] = (byte) ((0xff & kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset()]) >> posInByteOfChain);
+ kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] |= ((kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset() - 1] << (8 - posInByteOfChain)));
}
/** last kmer byte */
if (byteInKmer == 0) {
- kmer.getBytes()[0] = (byte) ((kmerChain.getBytes()[0] & 0xff) >> posInByteOfChain);
+ kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmerChain.getBytes()[0 + kmerChain.getKmerOffset()] & 0xff) >> posInByteOfChain);
}
kmer.clearLeadBit();
return kmer;
@@ -159,15 +159,15 @@
* : next neighbor in gene-code format
* @return the merged Kmer, this K of this Kmer is k+1
*/
- public KmerBytesWritable mergeKmerWithNextCode(final KmerBytesWritable kmer, byte nextCode) {
- this.kmer.reset(kmer.getKmerLength() + 1);
- for (int i = 1; i <= kmer.getLength(); i++) {
- this.kmer.getBytes()[this.kmer.getLength() - i] = kmer.getBytes()[kmer.getLength() - i];
+ public VKmerBytesWritable mergeKmerWithNextCode(final VKmerBytesWritable kmer, byte nextCode) {
+ this.kmer.reset(kmer.getKmerLetterLength() + 1);
+ for (int i = 1; i <= kmer.getKmerByteLength(); i++) {
+ this.kmer.getBytes()[this.kmer.getKmerOffset() + this.kmer.getKmerByteLength() - i] = kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i];
}
- if (this.kmer.getLength() > kmer.getLength()) {
- this.kmer.getBytes()[0] = (byte) (nextCode & 0x3);
+ if (this.kmer.getKmerByteLength() > kmer.getKmerByteLength()) {
+ this.kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) (nextCode & 0x3);
} else {
- this.kmer.getBytes()[0] = (byte) (kmer.getBytes()[0] | ((nextCode & 0x3) << ((kmer.getKmerLength() % 4) << 1)));
+ this.kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) (kmer.getBytes()[0 + kmer.getKmerOffset()] | ((nextCode & 0x3) << ((kmer.getKmerLetterLength() % 4) << 1)));
}
this.kmer.clearLeadBit();
return this.kmer;
@@ -186,17 +186,17 @@
* : next neighbor in gene-code format
* @return the merged Kmer,this K of this Kmer is k+1
*/
- public KmerBytesWritable mergeKmerWithPreCode(final KmerBytesWritable kmer, byte preCode) {
- this.kmer.reset(kmer.getKmerLength() + 1);
+ public VKmerBytesWritable mergeKmerWithPreCode(final VKmerBytesWritable kmer, byte preCode) {
+ this.kmer.reset(kmer.getKmerLetterLength() + 1);
int byteInMergedKmer = 0;
- if (kmer.getKmerLength() % 4 == 0) {
- this.kmer.getBytes()[0] = (byte) ((kmer.getBytes()[0] >> 6) & 0x3);
+ if (kmer.getKmerLetterLength() % 4 == 0) {
+ this.kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmer.getBytes()[0 + kmer.getKmerOffset()] >> 6) & 0x3);
byteInMergedKmer++;
}
- for (int i = 0; i < kmer.getLength() - 1; i++, byteInMergedKmer++) {
- this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[i] << 2) | ((kmer.getBytes()[i + 1] >> 6) & 0x3));
+ for (int i = 0; i < kmer.getKmerByteLength() - 1; i++, byteInMergedKmer++) {
+ this.kmer.getBytes()[byteInMergedKmer + kmer.getKmerOffset()] = (byte) ((kmer.getBytes()[i + kmer.getKmerOffset()] << 2) | ((kmer.getBytes()[i + kmer.getKmerOffset() + 1] >> 6) & 0x3));
}
- this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[kmer.getLength() - 1] << 2) | (preCode & 0x3));
+ this.kmer.getBytes()[byteInMergedKmer + kmer.getKmerOffset()] = (byte) ((kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - 1] << 2) | (preCode & 0x3));
this.kmer.clearLeadBit();
return this.kmer;
}
@@ -215,28 +215,28 @@
* : bytes array of next kmer
* @return merged kmer, the new k is @preK + @nextK
*/
- public KmerBytesWritable mergeTwoKmer(final KmerBytesWritable preKmer, final KmerBytesWritable nextKmer) {
- kmer.reset(preKmer.getKmerLength() + nextKmer.getKmerLength());
+ public VKmerBytesWritable mergeTwoKmer(final VKmerBytesWritable preKmer, final VKmerBytesWritable nextKmer) {
+ kmer.reset(preKmer.getKmerLetterLength() + nextKmer.getKmerLetterLength());
int i = 1;
- for (; i <= preKmer.getLength(); i++) {
- kmer.getBytes()[kmer.getLength() - i] = preKmer.getBytes()[preKmer.getLength() - i];
+ for (; i <= preKmer.getKmerByteLength(); i++) {
+ kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] = preKmer.getBytes()[preKmer.getKmerOffset() + preKmer.getKmerByteLength() - i];
}
if (i > 1) {
i--;
}
- if (preKmer.getKmerLength() % 4 == 0) {
- for (int j = 1; j <= nextKmer.getLength(); j++) {
- kmer.getBytes()[kmer.getLength() - i - j] = nextKmer.getBytes()[nextKmer.getLength() - j];
+ if (preKmer.getKmerLetterLength() % 4 == 0) {
+ for (int j = 1; j <= nextKmer.getKmerByteLength(); j++) {
+ kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i - j] = nextKmer.getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j];
}
} else {
- int posNeedToMove = ((preKmer.getKmerLength() % 4) << 1);
- kmer.getBytes()[kmer.getLength() - i] |= nextKmer.getBytes()[nextKmer.getLength() - 1] << posNeedToMove;
- for (int j = 1; j < nextKmer.getLength(); j++) {
- kmer.getBytes()[kmer.getLength() - i - j] = (byte) (((nextKmer.getBytes()[nextKmer.getLength() - j] & 0xff) >> (8 - posNeedToMove)) | (nextKmer
- .getBytes()[nextKmer.getLength() - j - 1] << posNeedToMove));
+ int posNeedToMove = ((preKmer.getKmerLetterLength() % 4) << 1);
+ kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] |= nextKmer.getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - 1] << posNeedToMove;
+ for (int j = 1; j < nextKmer.getKmerByteLength(); j++) {
+ kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i - j] = (byte) (((nextKmer.getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j] & 0xff) >> (8 - posNeedToMove)) | (nextKmer
+ .getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j - 1] << posNeedToMove));
}
- if (nextKmer.getKmerLength() % 4 == 0 || (nextKmer.getKmerLength() % 4) * 2 + posNeedToMove > 8) {
- kmer.getBytes()[0] = (byte) ((0xff & nextKmer.getBytes()[0]) >> (8 - posNeedToMove));
+ if (nextKmer.getKmerLetterLength() % 4 == 0 || (nextKmer.getKmerLetterLength() % 4) * 2 + posNeedToMove > 8) {
+ kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((0xff & nextKmer.getBytes()[0 + nextKmer.getKmerOffset()]) >> (8 - posNeedToMove));
}
}
kmer.clearLeadBit();
@@ -255,7 +255,7 @@
* : input genecode
* @return new created kmer that shifted by afterCode, the K will not change
*/
- public KmerBytesWritable shiftKmerWithNextCode(final KmerBytesWritable kmer, byte afterCode) {
+ public VKmerBytesWritable shiftKmerWithNextCode(final VKmerBytesWritable kmer, byte afterCode) {
this.kmer.setAsCopy(kmer);
this.kmer.shiftKmerWithNextCode(afterCode);
return this.kmer;
@@ -273,7 +273,7 @@
* : input genecode
* @return new created kmer that shifted by preCode, the K will not change
*/
- public KmerBytesWritable shiftKmerWithPreCode(final KmerBytesWritable kmer, byte preCode) {
+ public VKmerBytesWritable shiftKmerWithPreCode(final VKmerBytesWritable kmer, byte preCode) {
this.kmer.setAsCopy(kmer);
this.kmer.shiftKmerWithPreCode(preCode);
return this.kmer;
@@ -284,22 +284,22 @@
*
* @param kmer
*/
- public KmerBytesWritable reverse(final KmerBytesWritable kmer) {
- this.kmer.reset(kmer.getKmerLength());
+ public VKmerBytesWritable reverse(final VKmerBytesWritable kmer) {
+ this.kmer.reset(kmer.getKmerLetterLength());
- int curPosAtKmer = ((kmer.getKmerLength() - 1) % 4) << 1;
+ int curPosAtKmer = ((kmer.getKmerLetterLength() - 1) % 4) << 1;
int curByteAtKmer = 0;
int curPosAtReverse = 0;
- int curByteAtReverse = this.kmer.getLength() - 1;
- this.kmer.getBytes()[curByteAtReverse] = 0;
- for (int i = 0; i < kmer.getKmerLength(); i++) {
- byte gene = (byte) ((kmer.getBytes()[curByteAtKmer] >> curPosAtKmer) & 0x03);
- this.kmer.getBytes()[curByteAtReverse] |= gene << curPosAtReverse;
+ int curByteAtReverse = this.kmer.getKmerByteLength() - 1;
+ this.kmer.getBytes()[curByteAtReverse + this.kmer.getKmerOffset()] = 0;
+ for (int i = 0; i < kmer.getKmerLetterLength(); i++) {
+ byte gene = (byte) ((kmer.getBytes()[curByteAtKmer + kmer.getKmerOffset()] >> curPosAtKmer) & 0x03);
+ this.kmer.getBytes()[curByteAtReverse + this.kmer.getKmerOffset()] |= gene << curPosAtReverse;
curPosAtReverse += 2;
if (curPosAtReverse >= 8) {
curPosAtReverse = 0;
- this.kmer.getBytes()[--curByteAtReverse] = 0;
+ this.kmer.getBytes()[--curByteAtReverse + this.kmer.getKmerOffset()] = 0;
}
curPosAtKmer -= 2;
if (curPosAtKmer < 0) {
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
index 6bf8dac..2aee32d 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
@@ -10,259 +10,250 @@
import org.apache.hadoop.io.Writable;
-import edu.uci.ics.genomix.data.KmerUtil;
import edu.uci.ics.genomix.data.Marshal;
-
/**
- * A list of fixed-length kmers. The length of this list is stored internally
- *
+ * A list of fixed-length kmers. The length of this list is stored internally.
*/
-public class KmerListWritable implements Writable, Iterable<KmerBytesWritable>,
- Serializable {
- private static final long serialVersionUID = 1L;
- protected static final byte[] EMPTY_BYTES = { 0, 0, 0, 0 };
- protected static final int HEADER_SIZE = 4;
+public class KmerListWritable implements Writable, Iterable<KmerBytesWritable>, Serializable {
+ private static final long serialVersionUID = 1L;
+ protected static final byte[] EMPTY_BYTES = { 0, 0, 0, 0 };
+ protected static final int HEADER_SIZE = 4;
- protected byte[] storage;
- protected int offset;
- protected int valueCount;
+ protected byte[] storage;
+ protected int offset;
+ protected int valueCount;
+ protected int storageMaxSize; // since we may be a reference inside a larger datablock, we must track our maximum size
- protected int bytesPerKmer = 0;
- protected int lettersPerKmer = 0;
- private KmerBytesWritable posIter = new KmerBytesWritable();
+ private KmerBytesWritable posIter = new KmerBytesWritable();
- public KmerListWritable() {
- this.storage = EMPTY_BYTES;
- this.valueCount = 0;
- this.offset = 0;
- }
+ public KmerListWritable() {
+ storage = EMPTY_BYTES;
+ valueCount = 0;
+ offset = 0;
+ storageMaxSize = storage.length;
+ }
- public KmerListWritable(int kmerlength) {
- this();
- this.lettersPerKmer = kmerlength;
- this.bytesPerKmer = KmerUtil.getByteNumFromK(kmerlength);
- }
+ public KmerListWritable(byte[] data, int offset) {
+ setNewReference(data, offset);
+ }
- public KmerListWritable(int kmerlength, byte[] data, int offset) {
- this.lettersPerKmer = kmerlength;
- this.bytesPerKmer = KmerUtil.getByteNumFromK(kmerlength);
- setNewReference(data, offset);
- }
+ public KmerListWritable(List<KmerBytesWritable> kmers) {
+ this();
+ setSize(kmers.size() * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE); // reserve space for all elements
+ for (KmerBytesWritable kmer : kmers) {
+ append(kmer);
+ }
+ }
- public KmerListWritable(List<KmerBytesWritable> kmers) {
- this();
- setSize(kmers.size()); // reserve space for all elements
- for (KmerBytesWritable kmer : kmers) {
- if (kmer.getKmerLength() != lettersPerKmer)
- throw new IllegalArgumentException("Kmer " + kmer.toString()
- + " is of incorrect length (l=" + kmer.getKmerLength()
- + ") for this list (should be " + lettersPerKmer + ").");
- append(kmer);
- }
- }
+ public void setNewReference(byte[] data, int offset) {
+ valueCount = Marshal.getInt(data, offset);
+ if (valueCount * KmerBytesWritable.getBytesPerKmer() > data.length - offset) {
+ throw new IllegalArgumentException("Specified data buffer (len=" + (data.length - offset)
+ + ") is not large enough to store requested number of elements (" + valueCount + ")!");
+ }
+ this.storage = data;
+ this.offset = offset;
+ this.storageMaxSize = valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE;
+ }
- public void setNewReference(byte[] data, int offset) {
- this.valueCount = Marshal.getInt(data, offset);
- this.storage = data;
- this.offset = offset;
- }
+ public void append(KmerBytesWritable kmer) {
+ setSize((1 + valueCount) * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+ System.arraycopy(kmer.getBytes(), 0, storage,
+ offset + HEADER_SIZE + valueCount * KmerBytesWritable.getBytesPerKmer(),
+ KmerBytesWritable.getBytesPerKmer());
+ valueCount += 1;
+ Marshal.putInt(valueCount, storage, offset);
+ }
- public void append(KmerBytesWritable kmer) {
- setSize((1 + valueCount) * bytesPerKmer);
- System.arraycopy(kmer.getBytes(), 0, storage, offset + valueCount
- * bytesPerKmer, bytesPerKmer);
- valueCount += 1;
- }
+ /*
+ * Append the otherList to the end of myList
+ */
+ public void appendList(KmerListWritable otherList) {
+ if (otherList.valueCount > 0) {
+ setSize((valueCount + otherList.valueCount) * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+ // copy contents of otherList into the end of my storage
+ System.arraycopy(otherList.storage, otherList.offset + HEADER_SIZE, storage, offset + HEADER_SIZE
+ + valueCount * KmerBytesWritable.getBytesPerKmer(),
+ otherList.valueCount * KmerBytesWritable.getBytesPerKmer());
+ valueCount += otherList.valueCount;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+ }
- /*
- * Append the otherList to the end of myList
- */
- public void appendList(KmerListWritable otherList) {
- if (otherList.valueCount > 0) {
- setSize((valueCount + otherList.valueCount) * bytesPerKmer);
- // copy contents of otherList into the end of my storage
- System.arraycopy(otherList.storage, otherList.offset, storage,
- offset + valueCount * bytesPerKmer, otherList.valueCount
- * bytesPerKmer);
- valueCount += otherList.valueCount;
- }
- }
+ /**
+ * Save the union of my list and otherList. Uses a temporary HashSet for
+ * uniquefication
+ */
+ public void unionUpdate(KmerListWritable otherList) {
+ int newSize = valueCount + otherList.valueCount;
+ HashSet<KmerBytesWritable> uniqueElements = new HashSet<KmerBytesWritable>(newSize);
+ for (KmerBytesWritable kmer : this) {
+ uniqueElements.add(kmer);
+ }
+ for (KmerBytesWritable kmer : otherList) {
+ uniqueElements.add(kmer);
+ }
+ valueCount = 0;
+ setSize(newSize * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+ for (KmerBytesWritable kmer : uniqueElements) {
+ append(kmer);
+ }
+ Marshal.putInt(valueCount, storage, offset);
+ }
- /**
- * Save the union of my list and otherList. Uses a temporary HashSet for
- * uniquefication
- */
- public void unionUpdate(KmerListWritable otherList) {
- int newSize = valueCount + otherList.valueCount;
- HashSet<KmerBytesWritable> uniqueElements = new HashSet<KmerBytesWritable>(
- newSize);
- for (KmerBytesWritable kmer : this) {
- uniqueElements.add(kmer);
- }
- for (KmerBytesWritable kmer : otherList) {
- uniqueElements.add(kmer);
- }
- valueCount = 0;
- setSize(newSize);
- for (KmerBytesWritable kmer : uniqueElements) {
- append(kmer);
- }
- }
+ protected void setSize(int size) {
+ if (size > getCapacity()) {
+ setCapacity((size * 3 / 2));
+ }
+ }
- protected void setSize(int size) {
- if (size > getCapacity()) {
- setCapacity((size * 3 / 2));
- }
- }
+ protected int getCapacity() {
+ return storageMaxSize - offset;
+ }
- protected int getCapacity() {
- return storage.length - offset;
- }
+ protected void setCapacity(int new_cap) {
+ if (new_cap > getCapacity()) {
+ byte[] new_data = new byte[new_cap];
+ if (valueCount > 0) {
+ System.arraycopy(storage, offset, new_data, 0, valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+ }
+ storage = new_data;
+ offset = 0;
+ storageMaxSize = storage.length;
+ }
+ }
- protected void setCapacity(int new_cap) {
- if (new_cap > getCapacity()) {
- byte[] new_data = new byte[new_cap];
- if (storage.length - offset > 0) {
- System.arraycopy(storage, offset, new_data, 0, storage.length
- - offset);
- }
- storage = new_data;
- offset = 0;
- }
- }
+ public void reset() {
+ valueCount = 0;
+ }
- public void reset(int kmerSize) {
- lettersPerKmer = kmerSize;
- bytesPerKmer = KmerUtil.getByteNumFromK(lettersPerKmer);
- storage = EMPTY_BYTES;
- valueCount = 0;
- offset = 0;
- }
+ public KmerBytesWritable getPosition(int i) {
+ if (i >= valueCount) {
+ throw new ArrayIndexOutOfBoundsException("No such positions");
+ }
+ posIter.setAsReference(storage, offset + HEADER_SIZE + i * KmerBytesWritable.getBytesPerKmer());
+ return posIter;
+ }
- public KmerBytesWritable getPosition(int i) {
- if (i >= valueCount) {
- throw new ArrayIndexOutOfBoundsException("No such positions");
- }
- posIter.setAsReference(lettersPerKmer, storage, offset + i
- * bytesPerKmer);
- return posIter;
- }
+ public void setCopy(KmerListWritable otherList) {
+ setCopy(otherList.storage, otherList.offset);
+ }
- public void set(KmerListWritable otherList) {
- this.lettersPerKmer = otherList.lettersPerKmer;
- this.bytesPerKmer = otherList.bytesPerKmer;
- set(otherList.valueCount, otherList.storage, otherList.offset);
- }
+ /**
+ * read a KmerListWritable from newData, which should include the header
+ */
+ public void setCopy(byte[] newData, int offset) {
+ int newValueCount = Marshal.getInt(newData, offset);
+ setSize(newValueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+ if (newValueCount > 0) {
+ System.arraycopy(newData, offset + HEADER_SIZE, storage, this.offset + HEADER_SIZE, newValueCount
+ * KmerBytesWritable.getBytesPerKmer());
+ }
+ valueCount = newValueCount;
+ Marshal.putInt(valueCount, storage, this.offset);
+ }
- public void set(int valueCount, byte[] newData, int offset) {
- this.valueCount = valueCount;
- setSize(valueCount * bytesPerKmer);
- if (valueCount > 0) {
- System.arraycopy(newData, offset, storage, this.offset, valueCount
- * bytesPerKmer);
- }
- }
+ @Override
+ public Iterator<KmerBytesWritable> iterator() {
+ Iterator<KmerBytesWritable> it = new Iterator<KmerBytesWritable>() {
- @Override
- public Iterator<KmerBytesWritable> iterator() {
- Iterator<KmerBytesWritable> it = new Iterator<KmerBytesWritable>() {
+ private int currentIndex = 0;
- private int currentIndex = 0;
+ @Override
+ public boolean hasNext() {
+ return currentIndex < valueCount;
+ }
- @Override
- public boolean hasNext() {
- return currentIndex < valueCount;
- }
+ @Override
+ public KmerBytesWritable next() {
+ return getPosition(currentIndex++);
+ }
- @Override
- public KmerBytesWritable next() {
- return getPosition(currentIndex++);
- }
+ @Override
+ public void remove() {
+ if (currentIndex < valueCount)
+ System.arraycopy(storage, offset + currentIndex * KmerBytesWritable.getBytesPerKmer(), storage,
+ offset + (currentIndex - 1) * KmerBytesWritable.getBytesPerKmer(),
+ (valueCount - currentIndex) * KmerBytesWritable.getBytesPerKmer());
+ valueCount--;
+ currentIndex--;
+ Marshal.putInt(valueCount, storage, offset);
+ }
+ };
+ return it;
+ }
- @Override
- public void remove() {
- if (currentIndex < valueCount)
- System.arraycopy(storage, offset + currentIndex
- * bytesPerKmer, storage, offset
- + (currentIndex - 1) * bytesPerKmer,
- (valueCount - currentIndex) * bytesPerKmer);
- valueCount--;
- currentIndex--;
- }
- };
- return it;
- }
+ /*
+ * remove the first instance of `toRemove`. Uses a linear scan. Throws an
+ * exception if not in this list.
+ */
+ public void remove(KmerBytesWritable toRemove, boolean ignoreMissing) {
+ Iterator<KmerBytesWritable> posIterator = this.iterator();
+ while (posIterator.hasNext()) {
+ if (toRemove.equals(posIterator.next())) {
+ posIterator.remove();
+ return; // break as soon as the element is found
+ }
+ }
+ // element was not found
+ if (!ignoreMissing) {
+ throw new ArrayIndexOutOfBoundsException("the KmerBytesWritable `" + toRemove.toString()
+ + "` was not found in this list.");
+ }
+ }
- /*
- * remove the first instance of `toRemove`. Uses a linear scan. Throws an
- * exception if not in this list.
- */
- public void remove(KmerBytesWritable toRemove, boolean ignoreMissing) {
- Iterator<KmerBytesWritable> posIterator = this.iterator();
- while (posIterator.hasNext()) {
- if (toRemove.equals(posIterator.next())) {
- posIterator.remove();
- return;
- }
- }
- if (!ignoreMissing) {
- throw new ArrayIndexOutOfBoundsException("the KmerBytesWritable `"
- + toRemove.toString() + "` was not found in this list.");
- }
- }
+ public void remove(KmerBytesWritable toRemove) {
+ remove(toRemove, false);
+ }
- public void remove(KmerBytesWritable toRemove) {
- remove(toRemove, false);
- }
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ valueCount = in.readInt();
+ setSize(valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+ in.readFully(storage, offset + HEADER_SIZE, valueCount * KmerBytesWritable.getBytesPerKmer() - HEADER_SIZE);
+ Marshal.putInt(valueCount, storage, offset);
+ }
- @Override
- public void readFields(DataInput in) throws IOException {
- this.valueCount = in.readInt();
- setSize(valueCount * bytesPerKmer);// kmerByteSize
- in.readFully(storage, offset, valueCount * bytesPerKmer);// kmerByteSize
- }
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.write(storage, offset, valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE);
+ }
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeInt(valueCount);
- out.write(storage, offset, valueCount * bytesPerKmer);
- }
+ public int getCountOfPosition() {
+ return valueCount;
+ }
- public int getCountOfPosition() {
- return valueCount;
- }
+ public byte[] getByteArray() {
+ return storage;
+ }
- public byte[] getByteArray() {
- return storage;
- }
+ public int getStartOffset() {
+ return offset;
+ }
- public int getStartOffset() {
- return offset;
- }
+ public int getLength() {
+ return valueCount * KmerBytesWritable.getBytesPerKmer() + HEADER_SIZE;
+ }
- public int getLength() {
- return valueCount * bytesPerKmer;
- }
+ @Override
+ public String toString() {
+ StringBuilder sbuilder = new StringBuilder();
+ sbuilder.append('[');
+ for (int i = 0; i < valueCount; i++) {
+ sbuilder.append(getPosition(i).toString());
+ sbuilder.append(',');
+ }
+ if (valueCount > 0) {
+ sbuilder.setCharAt(sbuilder.length() - 1, ']');
+ } else {
+ sbuilder.append(']');
+ }
+ return sbuilder.toString();
+ }
- @Override
- public String toString() {
- StringBuilder sbuilder = new StringBuilder();
- sbuilder.append('[');
- for (int i = 0; i < valueCount; i++) {
- sbuilder.append(getPosition(i).toString());
- sbuilder.append(',');
- }
- if (valueCount > 0) {
- sbuilder.setCharAt(sbuilder.length() - 1, ']');
- } else {
- sbuilder.append(']');
- }
- return sbuilder.toString();
- }
-
- @Override
- public int hashCode() {
- return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
- }
+ @Override
+ public int hashCode() {
+ return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
+ }
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index 98e37dc..362c12e 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -1,25 +1,27 @@
package edu.uci.ics.genomix.type;
+import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataOutput;
+import java.io.DataOutputStream;
import java.io.IOException;
import java.io.Serializable;
+import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.WritableComparable;
public class NodeWritable implements WritableComparable<NodeWritable>, Serializable{
private static final long serialVersionUID = 1L;
- public static final NodeWritable EMPTY_NODE = new NodeWritable(0);
+ public static final NodeWritable EMPTY_NODE = new NodeWritable();
private PositionListWritable nodeIdList;
private KmerListWritable forwardForwardList;
private KmerListWritable forwardReverseList;
private KmerListWritable reverseForwardList;
private KmerListWritable reverseReverseList;
- private KmerBytesWritable kmer;
- private int kmerlength = 0;
+ private VKmerBytesWritable kmer;
// merge/update directions
public static class DirectionFlag {
@@ -31,48 +33,41 @@
}
public NodeWritable() {
- this(0);
- }
-
- public NodeWritable(int kmerlenth) {
- this.kmerlength = kmerlenth;
nodeIdList = new PositionListWritable();
- forwardForwardList = new KmerListWritable(kmerlenth);
- forwardReverseList = new KmerListWritable(kmerlenth);
- reverseForwardList = new KmerListWritable(kmerlenth);
- reverseReverseList = new KmerListWritable(kmerlenth);
- kmer = new KmerBytesWritable(); //in graph construction - not set kmerlength Optimization: VKmer
+ forwardForwardList = new KmerListWritable();
+ forwardReverseList = new KmerListWritable();
+ reverseForwardList = new KmerListWritable();
+ reverseReverseList = new KmerListWritable();
+ kmer = new VKmerBytesWritable(); // in graph construction - not set kmerlength Optimization: VKmer
}
public NodeWritable(PositionListWritable nodeIdList, KmerListWritable FFList, KmerListWritable FRList,
- KmerListWritable RFList, KmerListWritable RRList, KmerBytesWritable kmer) {
- this(kmer.getKmerLength());
+ KmerListWritable RFList, KmerListWritable RRList, VKmerBytesWritable kmer) {
+ this();
set(nodeIdList, FFList, FRList, RFList, RRList, kmer);
}
public void set(NodeWritable node){
- this.kmerlength = node.kmerlength;
set(node.nodeIdList, node.forwardForwardList, node.forwardReverseList, node.reverseForwardList,
node.reverseReverseList, node.kmer);
}
public void set(PositionListWritable nodeIdList, KmerListWritable FFList, KmerListWritable FRList,
- KmerListWritable RFList, KmerListWritable RRList, KmerBytesWritable kmer) {
+ KmerListWritable RFList, KmerListWritable RRList, VKmerBytesWritable kmer2) {
this.nodeIdList.set(nodeIdList);
- this.forwardForwardList.set(FFList);
- this.forwardReverseList.set(FRList);
- this.reverseForwardList.set(RFList);
- this.reverseReverseList.set(RRList);
- this.kmer.setAsCopy(kmer);
+ this.forwardForwardList.setCopy(FFList);
+ this.forwardReverseList.setCopy(FRList);
+ this.reverseForwardList.setCopy(RFList);
+ this.reverseReverseList.setCopy(RRList);
+ this.kmer.setAsCopy(kmer2);
}
- public void reset(int kmerSize) {
- this.kmerlength = kmerSize;
+ public void reset() {
this.nodeIdList.reset();
- this.forwardForwardList.reset(kmerSize);
- this.forwardReverseList.reset(kmerSize);
- this.reverseForwardList.reset(kmerSize);
- this.reverseReverseList.reset(kmerSize);
+ this.forwardForwardList.reset();
+ this.forwardReverseList.reset();
+ this.reverseForwardList.reset();
+ this.reverseReverseList.reset();
this.kmer.reset(0);
}
@@ -85,24 +80,16 @@
this.nodeIdList.set(nodeIdList);
}
- public KmerBytesWritable getKmer() {
+ public VKmerBytesWritable getKmer() {
return kmer;
}
- public void setKmer(KmerBytesWritable kmer) {
+ public void setKmer(VKmerBytesWritable kmer) {
this.kmer.setAsCopy(kmer);
}
- public int getKmerlength() {
- return kmerlength;
- }
-
- public void setKmerlength(int kmerlength) {
- this.kmerlength = kmerlength;
- }
-
- public int getCount() {
- return kmer.getKmerLength();
+ public int getKmerLength() {
+ return kmer.getKmerLetterLength();
}
public KmerListWritable getFFList() {
@@ -122,19 +109,19 @@
}
public void setFFList(KmerListWritable forwardForwardList) {
- this.forwardForwardList.set(forwardForwardList);
+ this.forwardForwardList.setCopy(forwardForwardList);
}
public void setFRList(KmerListWritable forwardReverseList) {
- this.forwardReverseList.set(forwardReverseList);
+ this.forwardReverseList.setCopy(forwardReverseList);
}
public void setRFList(KmerListWritable reverseForwardList) {
- this.reverseForwardList.set(reverseForwardList);
+ this.reverseForwardList.setCopy(reverseForwardList);
}
public void setRRList(KmerListWritable reverseReverseList) {
- this.reverseReverseList.set(reverseReverseList);
+ this.reverseReverseList.setCopy(reverseReverseList);
}
public KmerListWritable getListFromDir(byte dir) {
@@ -152,9 +139,60 @@
}
}
+ /**
+ * Returns the length of the byte-array version of this node
+ */
+ public int getSerializedLength() {
+ return nodeIdList.getLength() + forwardForwardList.getLength() + forwardReverseList.getLength() +
+ reverseForwardList.getLength() + reverseReverseList.getLength() + kmer.getLength();
+ }
+
+ /**
+ * Return this Node's representation as a new byte array
+ */
+ public byte[] marshalToByteArray() throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(getSerializedLength());
+ DataOutputStream out = new DataOutputStream(baos);
+ write(out);
+ return baos.toByteArray();
+ }
+
+ public void setAsCopy(byte[] data, int offset) {
+ int curOffset = offset;
+ nodeIdList.set(data, curOffset);
+
+ curOffset += nodeIdList.getLength();
+ forwardForwardList.setCopy(data, curOffset);
+ curOffset += forwardForwardList.getLength();
+ forwardReverseList.setCopy(data, curOffset);
+ curOffset += forwardReverseList.getLength();
+ reverseForwardList.setCopy(data, curOffset);
+ curOffset += reverseForwardList.getLength();
+ reverseReverseList.setCopy(data, curOffset);
+
+ curOffset += reverseReverseList.getLength();
+ kmer.setAsCopy(data, curOffset);
+ }
+
+ public void setAsReference(byte[] data, int offset) {
+ int curOffset = offset;
+ nodeIdList.setNewReference(data, curOffset);
+
+ curOffset += nodeIdList.getLength();
+ forwardForwardList.setNewReference(data, curOffset);
+ curOffset += forwardForwardList.getLength();
+ forwardReverseList.setNewReference(data, curOffset);
+ curOffset += forwardReverseList.getLength();
+ reverseForwardList.setNewReference(data, curOffset);
+ curOffset += reverseForwardList.getLength();
+ reverseReverseList.setNewReference(data, curOffset);
+
+ curOffset += reverseReverseList.getLength();
+ kmer.setAsReference(data, curOffset);
+ }
+
@Override
public void write(DataOutput out) throws IOException {
- out.writeInt(kmerlength);
this.nodeIdList.write(out);
this.forwardForwardList.write(out);
this.forwardReverseList.write(out);
@@ -165,8 +203,7 @@
@Override
public void readFields(DataInput in) throws IOException {
- this.kmerlength = in.readInt();
- reset(kmerlength);
+ reset();
this.nodeIdList.readFields(in);
this.forwardForwardList.readFields(in);
this.forwardReverseList.readFields(in);
@@ -211,15 +248,15 @@
return sbuilder.toString();
}
- public void mergeForwardNext(NodeWritable nextNode, int initialKmerSize) {
- this.forwardForwardList.set(nextNode.forwardForwardList);
- this.forwardReverseList.set(nextNode.forwardReverseList);
+ public void mergeForwardNext(final NodeWritable nextNode, int initialKmerSize) {
+ this.forwardForwardList.setCopy(nextNode.forwardForwardList);
+ this.forwardReverseList.setCopy(nextNode.forwardReverseList);
kmer.mergeWithFFKmer(initialKmerSize, nextNode.getKmer());
}
- public void mergeForwardPre(NodeWritable preNode, int initialKmerSize) {
- this.reverseForwardList.set(preNode.reverseForwardList);
- this.reverseReverseList.set(preNode.reverseReverseList);
+ public void mergeForwardPre(final NodeWritable preNode, int initialKmerSize) {
+ this.reverseForwardList.setCopy(preNode.reverseForwardList);
+ this.reverseReverseList.setCopy(preNode.reverseReverseList);
kmer.mergeWithRRKmer(initialKmerSize, preNode.getKmer());
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
index b056c14..8de4b0e 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
@@ -13,78 +13,84 @@
import edu.uci.ics.genomix.data.Marshal;
import edu.uci.ics.genomix.type.PositionWritable;
-public class PositionListWritable implements Writable, Iterable<PositionWritable>, Serializable{
+public class PositionListWritable implements Writable, Iterable<PositionWritable>, Serializable {
private static final long serialVersionUID = 1L;
+ protected static final byte[] EMPTY_BYTES = {0,0,0,0};
+ protected static final int HEADER_SIZE = 4;
+
protected byte[] storage;
protected int offset;
protected int valueCount;
- protected static final byte[] EMPTY = {};
-
+ protected int maxStorageSize;
+
+
protected PositionWritable posIter = new PositionWritable();
-
+
public PositionListWritable() {
- this.storage = EMPTY;
- this.valueCount = 0;
- this.offset = 0;
+ storage = EMPTY_BYTES;
+ valueCount = 0;
+ offset = 0;
+ maxStorageSize = storage.length;
}
-
- public PositionListWritable(int count, byte[] data, int offset) {
- setNewReference(count, data, offset);
+
+ public PositionListWritable(byte[] data, int offset) {
+ setNewReference(data, offset);
}
-
+
public PositionListWritable(List<PositionWritable> posns) {
this();
- setSize(posns.size()); // reserve space for all elements
+ setSize(posns.size() * PositionWritable.LENGTH + HEADER_SIZE); // reserve space for all elements
for (PositionWritable p : posns) {
append(p);
}
}
-
- public void setNewReference(int count, byte[] data, int offset) {
- this.valueCount = count;
+
+ public void setNewReference(byte[] data, int offset) {
+ this.valueCount = Marshal.getInt(data, offset);
this.storage = data;
this.offset = offset;
+ maxStorageSize = valueCount * PositionWritable.LENGTH + HEADER_SIZE;
}
-
+
public void append(long uuid) {
- setSize((1 + valueCount) * PositionWritable.LENGTH);
- Marshal.putLong(uuid, storage, offset + valueCount * PositionWritable.LENGTH);
+ setSize((1 + valueCount) * PositionWritable.LENGTH + HEADER_SIZE);
+ Marshal.putLong(uuid, storage, offset + valueCount * PositionWritable.LENGTH + HEADER_SIZE);
valueCount += 1;
+ Marshal.putInt(valueCount, storage, offset);
}
-
- public void append(byte mateId, long readId, int posId){
+
+ public void append(byte mateId, long readId, int posId) {
append(PositionWritable.makeUUID(mateId, readId, posId));
}
-
+
public void append(PositionWritable pos) {
- if(pos != null)
+ if (pos != null)
append(pos.getUUID());
else
throw new RuntimeException("This position is null pointer!");
}
-
+
/*
* Append the otherList to the end of myList
*/
public void appendList(PositionListWritable otherList) {
if (otherList.valueCount > 0) {
- setSize((valueCount + otherList.valueCount) * PositionWritable.LENGTH);
+ setSize((valueCount + otherList.valueCount) * PositionWritable.LENGTH + HEADER_SIZE);
// copy contents of otherList into the end of my storage
- System.arraycopy(otherList.storage, otherList.offset,
- storage, offset + valueCount * PositionWritable.LENGTH,
- otherList.valueCount * PositionWritable.LENGTH);
+ System.arraycopy(otherList.storage, otherList.offset + HEADER_SIZE, storage, offset + valueCount
+ * PositionWritable.LENGTH + HEADER_SIZE, otherList.valueCount * PositionWritable.LENGTH);
valueCount += otherList.valueCount;
+ Marshal.putInt(valueCount, storage, offset);
}
}
-
+
/**
* Save the union of my list and otherList. Uses a temporary HashSet for
* uniquefication
*/
public void unionUpdate(PositionListWritable otherList) {
int newSize = valueCount + otherList.valueCount;
- HashSet<PositionWritable> uniqueElements = new HashSet<PositionWritable>(
- newSize);
+ HashSet<PositionWritable> uniqueElements = new HashSet<PositionWritable>(newSize);
for (PositionWritable pos : this) {
uniqueElements.add(pos);
}
@@ -92,71 +98,75 @@
uniqueElements.add(pos);
}
valueCount = 0;
- setSize(newSize);
+ setSize(newSize * PositionWritable.LENGTH + HEADER_SIZE);
for (PositionWritable pos : uniqueElements) {
append(pos);
}
}
-
+
public static int getCountByDataLength(int length) {
if (length % PositionWritable.LENGTH != 0) {
throw new IllegalArgumentException("Length of positionlist is invalid");
}
return length / PositionWritable.LENGTH;
}
-
+
public void set(PositionListWritable otherList) {
- set(otherList.valueCount, otherList.storage, otherList.offset);
+ set(otherList.storage, otherList.offset);
}
- public void set(int valueCount, byte[] newData, int offset) {
- this.valueCount = valueCount;
- setSize(valueCount * PositionWritable.LENGTH);
- if (valueCount > 0) {
- System.arraycopy(newData, offset, storage, this.offset, valueCount * PositionWritable.LENGTH);
+ public void set(byte[] newData, int newOffset) {
+ int newValueCount = Marshal.getInt(newData, newOffset);
+ setSize(newValueCount * PositionWritable.LENGTH + HEADER_SIZE);
+ if (newValueCount > 0) {
+ System.arraycopy(newData, newOffset + HEADER_SIZE, storage, this.offset + HEADER_SIZE, newValueCount * PositionWritable.LENGTH);
}
+ valueCount = newValueCount;
+ Marshal.putInt(valueCount, storage, this.offset);
}
public void reset() {
valueCount = 0;
+ Marshal.putInt(valueCount, storage, offset);
}
-
+
protected void setSize(int size) {
if (size > getCapacity()) {
setCapacity((size * 3 / 2));
}
}
-
+
protected int getCapacity() {
- return storage.length - offset;
+ return maxStorageSize - offset;
}
protected void setCapacity(int new_cap) {
if (new_cap > getCapacity()) {
byte[] new_data = new byte[new_cap];
- if (storage.length - offset > 0) {
- System.arraycopy(storage, offset, new_data, 0, storage.length - offset);
+ if (valueCount > 0) {
+ System.arraycopy(storage, offset, new_data, 0, valueCount * PositionWritable.LENGTH + HEADER_SIZE);
}
storage = new_data;
offset = 0;
+ maxStorageSize = storage.length;
}
}
-
+
public PositionWritable getPosition(int i) {
if (i >= valueCount) {
throw new ArrayIndexOutOfBoundsException("No such positions");
}
- posIter.setNewReference(storage, offset + i * PositionWritable.LENGTH);
+ posIter.setNewReference(storage, offset + i * PositionWritable.LENGTH + HEADER_SIZE);
return posIter;
}
-
+
public void resetPosition(int i, long uuid) {
if (i >= valueCount) {
throw new ArrayIndexOutOfBoundsException("No such positions");
}
- Marshal.putLong(uuid, storage, offset + i * PositionWritable.LENGTH);
+ Marshal.putLong(uuid, storage, offset + i * PositionWritable.LENGTH + HEADER_SIZE);
}
-
+
public int getCountOfPosition() {
return valueCount;
}
@@ -170,9 +180,9 @@
}
public int getLength() {
- return valueCount * PositionWritable.LENGTH;
+ return valueCount * PositionWritable.LENGTH + HEADER_SIZE;
}
-
+
@Override
public Iterator<PositionWritable> iterator() {
Iterator<PositionWritable> it = new Iterator<PositionWritable>() {
@@ -191,50 +201,54 @@
@Override
public void remove() {
- if(currentIndex < valueCount)
- System.arraycopy(storage, offset + currentIndex * PositionWritable.LENGTH,
- storage, offset + (currentIndex - 1) * PositionWritable.LENGTH,
- (valueCount - currentIndex) * PositionWritable.LENGTH);
+ if (currentIndex < valueCount)
+ System.arraycopy(storage, offset + currentIndex * PositionWritable.LENGTH + HEADER_SIZE, storage, offset
+ + (currentIndex - 1) * PositionWritable.LENGTH + HEADER_SIZE, (valueCount - currentIndex)
+ * PositionWritable.LENGTH);
valueCount--;
currentIndex--;
+ Marshal.putInt(valueCount, storage, offset);
}
};
return it;
}
-
+
/*
* remove the first instance of @toRemove. Uses a linear scan. Throws an exception if not in this list.
*/
public void remove(PositionWritable toRemove, boolean ignoreMissing) {
Iterator<PositionWritable> posIterator = this.iterator();
while (posIterator.hasNext()) {
- if(toRemove.equals(posIterator.next())) {
+ if (toRemove.equals(posIterator.next())) {
posIterator.remove();
- return;
+ return; // found it. return early.
}
}
+ // element not found.
if (!ignoreMissing) {
- throw new ArrayIndexOutOfBoundsException("the PositionWritable `" + toRemove.toString() + "` was not found in this list.");
+ throw new ArrayIndexOutOfBoundsException("the PositionWritable `" + toRemove.toString()
+ + "` was not found in this list.");
}
}
-
+
public void remove(PositionWritable toRemove) {
- remove(toRemove, false);
+ remove(toRemove, false);
}
-
+
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(valueCount);
- out.write(storage, offset, valueCount * PositionWritable.LENGTH);
+ out.write(storage, offset + HEADER_SIZE, valueCount * PositionWritable.LENGTH);
}
-
+
@Override
public void readFields(DataInput in) throws IOException {
this.valueCount = in.readInt();
- setSize(valueCount * PositionWritable.LENGTH);
- in.readFully(storage, offset, valueCount * PositionWritable.LENGTH);
+ setSize(valueCount * PositionWritable.LENGTH + HEADER_SIZE);
+ in.readFully(storage, offset + HEADER_SIZE, valueCount * PositionWritable.LENGTH);
+ Marshal.putInt(valueCount, storage, offset);
}
-
+
@Override
public String toString() {
StringBuilder sbuilder = new StringBuilder();
@@ -250,12 +264,12 @@
}
return sbuilder.toString();
}
-
+
@Override
public int hashCode() {
return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
}
-
+
@Override
public boolean equals(Object o) {
if (!(o instanceof PositionListWritable))
@@ -263,9 +277,9 @@
PositionListWritable other = (PositionListWritable) o;
if (this.valueCount != other.valueCount)
return false;
- for (int i=0; i < this.valueCount; i++) {
- if (!this.getPosition(i).equals(other.getPosition(i)))
- return false;
+ for (int i = 0; i < this.valueCount; i++) {
+ if (!this.getPosition(i).equals(other.getPosition(i)))
+ return false;
}
return true;
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
index 1079677..03d66a6 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
@@ -119,10 +119,10 @@
}
/*
- * String of form "(readId-posID_mate)" where mate is _1 or _2
+ * String of form "(readId-posID_mate)" where mate is _0 or _1
*/
@Override
public String toString() {
- return "(" + this.getReadId() + "-" + this.getPosId() + "_" + (this.getMateId() + 1) + ")";
+ return "(" + this.getReadId() + "-" + this.getPosId() + "_" + (this.getMateId()) + ")";
}
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
index 7fed5c7..c38e35d 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
@@ -18,255 +18,596 @@
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.io.Serializable;
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import edu.uci.ics.genomix.data.KmerUtil;
import edu.uci.ics.genomix.data.Marshal;
+import edu.uci.ics.genomix.oldtype.NodeWritable.DirectionFlag;
/**
* Variable-length kmer which stores its length internally.
- *
* Note: `offset` as used in this class is the offset at which the *kmer*
* begins. There is a {@value HEADER_SIZE}-byte header preceding the kmer
*/
-public class VKmerBytesWritable extends KmerBytesWritable {
+public class VKmerBytesWritable extends BinaryComparable implements Serializable, WritableComparable<BinaryComparable> {
+ private static final long serialVersionUID = 1L;
+ protected static final byte[] EMPTY_BYTES = { 0, 0, 0, 0 }; // int indicating 0 length
+ protected static final int HEADER_SIZE = 4; // number of bytes for header info
- private static final long serialVersionUID = 1L;
- protected static final byte[] EMPTY_BYTES = { 0, 0, 0, 0 }; // int
- // indicating 0
- // length
- protected static final int HEADER_SIZE = 4; // number of bytes for header
- // info
+ protected int lettersInKmer;
+ protected int bytesUsed;
+ protected byte[] bytes;
+ protected int kmerStartOffset;
+ protected int storageMaxSize; // since we may be a reference inside a larger datablock, we must track our maximum size
- /**
- * Initialize as empty kmer
- */
- public VKmerBytesWritable() {
- this(EMPTY_BYTES, HEADER_SIZE);
- }
+ /**
+ * Initialize as empty kmer
+ */
+ public VKmerBytesWritable() {
+ this(EMPTY_BYTES, 0);
+ }
- /**
- * Copy contents of kmer string
- */
- public VKmerBytesWritable(String kmer) {
- bytes = new byte[HEADER_SIZE + KmerUtil.getByteNumFromK(kmer.length())];
- offset = HEADER_SIZE;
- setAsCopy(kmer);
- }
+ /**
+ * Copy contents of kmer string
+ */
+ public VKmerBytesWritable(String kmer) {
+ bytes = new byte[HEADER_SIZE + KmerUtil.getByteNumFromK(kmer.length())];
+ kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
+ setAsCopy(kmer);
+ }
- /**
- * Set as reference to given data
- *
- * @param storage
- * : byte array with header
- * @param offset
- */
- public VKmerBytesWritable(byte[] storage, int offset) {
- setAsReference(storage, offset);
- }
+ /**
+ * Set as reference to given data
+ *
+ * @param storage
+ * : byte array with header
+ * @param offset
+ */
+ public VKmerBytesWritable(byte[] storage, int offset) {
+ setAsReference(storage, offset);
+ }
- /**
- * Reserve space for k letters
- */
- public VKmerBytesWritable(int k) {
- if (k > 0) {
- bytes = new byte[HEADER_SIZE + KmerUtil.getByteNumFromK(k)];
- } else {
- bytes = EMPTY_BYTES;
- }
- offset = HEADER_SIZE;
- setKmerLength(k);
- }
+ /**
+ * Reserve space for k letters
+ */
+ public VKmerBytesWritable(int k) {
+ if (k > 0) {
+ bytes = new byte[HEADER_SIZE + KmerUtil.getByteNumFromK(k)];
+ } else if (k == 0) {
+ bytes = EMPTY_BYTES;
+ } else {
+ throw new IllegalArgumentException("Invalid K (" + k + ").");
+ }
+ kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
+ setKmerLength(k);
+ }
- /**
- * deep copy of kmer in other
- *
- * @param other
- */
- public VKmerBytesWritable(VKmerBytesWritable other) {
- this(other.lettersInKmer);
- setAsCopy(other);
- }
+ /**
+ * deep copy of kmer in other
+ *
+ * @param other
+ */
+ public VKmerBytesWritable(VKmerBytesWritable other) {
+ this(other.lettersInKmer);
+ setAsCopy(other);
+ }
- /**
- * Deep copy of the given kmer
- *
- * @param other
- */
- @Override
- public void setAsCopy(KmerBytesWritable other) {
- reset(other.lettersInKmer);
- if (lettersInKmer > 0) {
- System.arraycopy(other.bytes, other.offset, bytes, this.offset,
- bytesUsed);
- }
- }
+ /**
+ * Deep copy of the given kmer
+ *
+ * @param other
+ */
+ public void setAsCopy(VKmerBytesWritable other) {
+ reset(other.lettersInKmer);
+ if (lettersInKmer > 0) {
+ System.arraycopy(other.bytes, other.kmerStartOffset, bytes, this.kmerStartOffset, bytesUsed);
+ }
+ }
- /**
- * set from String kmer
- */
- @Override
- public void setAsCopy(String kmer) {
- int k = kmer.length();
- reset(k);
- System.arraycopy(kmer.getBytes(), 0, bytes, offset, bytesUsed);
- }
+ /**
+ * set from String kmer
+ */
+ public void setAsCopy(String kmer) {
+ int k = kmer.length();
+ reset(k);
+ System.arraycopy(kmer.getBytes(), 0, bytes, kmerStartOffset, bytesUsed);
+ }
- /**
- * Deep copy of the given bytes data
- *
- * @param newData
- * : byte array to copy (should have a header)
- * @param offset
- */
- public void setAsCopy(byte[] newData, int offset) {
- int k = Marshal.getInt(newData, offset);
- reset(k);
- System.arraycopy(newData, offset + HEADER_SIZE, bytes, this.offset,
- bytesUsed);
- }
+ /**
+ * Deep copy of the given bytes data
+ *
+ * @param newData
+ * : byte array to copy (should have a header)
+ * @param offset
+ */
+ public void setAsCopy(byte[] newData, int offset) {
+ int k = Marshal.getInt(newData, offset);
+ reset(k);
+ System.arraycopy(newData, offset + HEADER_SIZE, bytes, this.kmerStartOffset, bytesUsed);
+ }
- /**
- * Point this datablock to the given bytes array It works like the pointer
- * to new datablock.
- *
- * @param newData
- * : byte array to copy (should have a header)
- * @param blockOffset
- */
- public void setAsReference(byte[] newData, int blockOffset) {
- this.bytes = newData;
- this.offset = blockOffset + HEADER_SIZE;
- int kRequested = Marshal.getInt(newData, blockOffset);
- int bytesRequested = KmerUtil.getByteNumFromK(kRequested) + HEADER_SIZE;
- if (newData.length - blockOffset < bytesRequested) {
- throw new IllegalArgumentException("Requested " + bytesRequested
- + " bytes (k=" + kRequested + ") but buffer has only "
- + (newData.length - blockOffset) + " bytes");
- }
- setKmerLength(kRequested);
- }
+ /**
+ * Point this datablock to the given bytes array It works like the pointer
+ * to new datablock.
+ *
+ * @param newData
+ * : byte array to copy (should have a header)
+ * @param blockOffset
+ */
+ public void setAsReference(byte[] newData, int blockOffset) {
+ bytes = newData;
+ kmerStartOffset = blockOffset + HEADER_SIZE;
+ int kRequested = Marshal.getInt(newData, blockOffset);
+ int bytesRequested = KmerUtil.getByteNumFromK(kRequested) + HEADER_SIZE;
+ if (newData.length - blockOffset < bytesRequested) {
+ throw new IllegalArgumentException("Requested " + bytesRequested + " bytes (k=" + kRequested
+ + ") but buffer has only " + (newData.length - blockOffset) + " bytes");
+ }
+ storageMaxSize = bytesRequested; // since we are a reference, store our max capacity
+ setKmerLength(kRequested);
+ }
- @Override
- public void setKmerLength(int k) {
- this.bytesUsed = KmerUtil.getByteNumFromK(k);
- this.lettersInKmer = k;
- Marshal.putInt(k, bytes, offset - HEADER_SIZE);
- }
+ /**
+ * Reset array by kmerlength
+ *
+ * @param k
+ */
+ public void reset(int k) {
+ int newByteLength = KmerUtil.getByteNumFromK(k);
+ if (bytesUsed < newByteLength) {
+ bytes = new byte[newByteLength + HEADER_SIZE];
+ kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
+ }
+ setKmerLength(k);
+ }
- @Override
- protected int getCapacity() {
- return bytes.length - HEADER_SIZE;
- }
+ protected void clearLeadBit() {
+ if (lettersInKmer % 4 != 0) {
+ bytes[kmerStartOffset] &= (1 << ((lettersInKmer % 4) << 1)) - 1;
+ }
+ }
- @Override
- protected void setCapacity(int new_cap) {
- if (new_cap != getCapacity()) {
- byte[] new_data = new byte[new_cap + HEADER_SIZE];
- if (new_cap < bytesUsed) {
- bytesUsed = new_cap;
- }
- if (bytesUsed != 0) {
- System.arraycopy(bytes, offset, new_data, HEADER_SIZE,
- bytesUsed);
- }
- bytes = new_data;
- offset = HEADER_SIZE;
- }
- }
+ /**
+ * Get one genecode (A|G|C|T) from the given kmer index e.g. Get the 4th
+ * gene of the kmer ACGTA will return T
+ *
+ * @param pos
+ * @return
+ */
+ public byte getGeneCodeAtPosition(int pos) {
+ if (pos >= lettersInKmer || pos < 0) {
+ throw new ArrayIndexOutOfBoundsException("Gene position (" + pos + ") out of bounds for k=" + lettersInKmer);
+ }
+ return geneCodeAtPosition(pos);
+ }
- @Override
- public void mergeWithFFKmer(int initialKmerSize, KmerBytesWritable kmer) {
- super.mergeWithFFKmer(initialKmerSize, kmer);
- Marshal.putInt(lettersInKmer, bytes, offset - HEADER_SIZE);
- }
+ /**
+ * unchecked version of getGeneCodeAtPosition. Used when kmerlength is
+ * inaccurate (mid-merge)
+ */
+ private byte geneCodeAtPosition(int pos) {
+ int posByte = pos / 4;
+ int shift = (pos % 4) << 1;
+ return (byte) ((bytes[kmerStartOffset + bytesUsed - 1 - posByte] >> shift) & 0x3);
+ }
- @Override
- public void mergeWithFRKmer(int initialKmerSize, KmerBytesWritable kmer) {
- super.mergeWithFRKmer(initialKmerSize, kmer);
- Marshal.putInt(lettersInKmer, bytes, offset - HEADER_SIZE);
- }
+ /**
+ * Shift Kmer to accept new char input
+ *
+ * @param c
+ * Input new gene character
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextChar(byte c) {
+ return shiftKmerWithNextCode(GeneCode.getCodeFromSymbol(c));
+ }
- @Override
- public void mergeWithRFKmer(int initialKmerSize, KmerBytesWritable preKmer) {
- super.mergeWithRFKmer(initialKmerSize, preKmer);
- Marshal.putInt(lettersInKmer, bytes, offset - HEADER_SIZE);
- }
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextCode(byte c) {
+ byte output = (byte) (bytes[kmerStartOffset + bytesUsed - 1] & 0x03);
+ for (int i = bytesUsed - 1; i > 0; i--) {
+ byte in = (byte) (bytes[kmerStartOffset + i - 1] & 0x03);
+ bytes[kmerStartOffset + i] = (byte) (((bytes[kmerStartOffset + i] >>> 2) & 0x3f) | (in << 6));
+ }
+ int pos = ((lettersInKmer - 1) % 4) << 1;
+ byte code = (byte) (c << pos);
+ bytes[kmerStartOffset] = (byte) (((bytes[kmerStartOffset] >>> 2) & 0x3f) | code);
+ clearLeadBit();
+ return output;
+ }
- @Override
- public void mergeWithRRKmer(int initialKmerSize, KmerBytesWritable preKmer) {
- super.mergeWithRRKmer(initialKmerSize, preKmer);
- Marshal.putInt(lettersInKmer, bytes, offset - HEADER_SIZE);
- }
+ /**
+ * Shift Kmer to accept new input char
+ *
+ * @param c
+ * Input new gene character
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreChar(byte c) {
+ return shiftKmerWithPreCode(GeneCode.getCodeFromSymbol(c));
+ }
- @Override
- public void readFields(DataInput in) throws IOException {
- lettersInKmer = in.readInt();
- bytesUsed = KmerUtil.getByteNumFromK(lettersInKmer);
- if (lettersInKmer > 0) {
- if (getCapacity() < this.bytesUsed) {
- this.bytes = new byte[this.bytesUsed + HEADER_SIZE];
- this.offset = HEADER_SIZE;
- }
- in.readFully(bytes, offset, bytesUsed);
- }
- Marshal.putInt(lettersInKmer, bytes, offset - HEADER_SIZE);
- }
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreCode(byte c) {
+ int pos = ((lettersInKmer - 1) % 4) << 1;
+ byte output = (byte) ((bytes[kmerStartOffset] >> pos) & 0x03);
+ for (int i = 0; i < bytesUsed - 1; i++) {
+ byte in = (byte) ((bytes[kmerStartOffset + i + 1] >> 6) & 0x03);
+ bytes[kmerStartOffset + i] = (byte) ((bytes[kmerStartOffset + i] << 2) | in);
+ }
+ bytes[kmerStartOffset + bytesUsed - 1] = (byte) ((bytes[kmerStartOffset + bytesUsed - 1] << 2) | c);
+ clearLeadBit();
+ return output;
+ }
- /**
- * write the entire byte array including the header
- */
- @Override
- public void write(DataOutput out) throws IOException {
- out.write(bytes, offset - HEADER_SIZE, bytesUsed + HEADER_SIZE);
- }
+ public int getKmerLetterLength() {
+ return lettersInKmer;
+ }
- @Override
- public boolean equals(Object right) {
- if (right instanceof VKmerBytesWritable) {
- return super.equals(right); // compare bytes directly
- } else if (right instanceof KmerBytesWritable) {
- // for Kmers, we need to skip our header
- KmerBytesWritable rightKmer = (KmerBytesWritable) right;
- if (lettersInKmer != rightKmer.lettersInKmer) { // check length
- return false;
- }
- for (int i = 0; i < lettersInKmer; i++) { // check letters
- if (bytes[i + HEADER_SIZE] != rightKmer.bytes[i]) {
- return false;
- }
- }
- return true;
- }
- return false;
- }
+ @Override
+ public byte[] getBytes() {
+ return bytes;
+ }
- @Override
- public String toString() {
- return KmerUtil.recoverKmerFrom(this.lettersInKmer, this.getBytes(),
- offset, this.getLength());
- }
+ /**
+ * Return the (hyracks-specific) data block offset. This includes the header.
+ */
+ public int getBlockOffset() {
+ return kmerStartOffset - HEADER_SIZE;
+ }
- public static class Comparator extends WritableComparator {
+ /**
+ * Return the data block offset where the kmer data begins. This excludes the header.
+ */
+ public int getKmerOffset() {
+ return kmerStartOffset;
+ }
- public Comparator() {
- super(VKmerBytesWritable.class);
- }
+ /**
+ * Return the number of bytes used by both header and kmer chain
+ */
+ @Override
+ public int getLength() {
+ return bytesUsed + HEADER_SIZE;
+ }
+
+ /**
+ * Return the number of bytes used by the kmer chain
+ */
+ public int getKmerByteLength() {
+ return bytesUsed;
+ }
+
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- int kmerlength1 = Marshal.getInt(b1, s1);
- int kmerlength2 = Marshal.getInt(b2, s2);
- if (kmerlength1 == kmerlength2) {
- return compareBytes(b1, s1 + HEADER_SIZE, l1 - HEADER_SIZE, b2,
- s2 + HEADER_SIZE, l2 - HEADER_SIZE);
- }
- return kmerlength1 - kmerlength2;
- }
- }
+ public void setKmerLength(int k) {
+ this.bytesUsed = KmerUtil.getByteNumFromK(k);
+ this.lettersInKmer = k;
+ saveHeader(k);
+ }
- static { // register this comparator
- WritableComparator.define(VKmerBytesWritable.class, new Comparator());
- }
+ protected int getKmerByteCapacity() {
+ return storageMaxSize - HEADER_SIZE;
+ }
+
+ protected void setKmerByteCapacity(int new_cap) {
+ if (new_cap != getKmerByteCapacity()) {
+ byte[] new_data = new byte[new_cap + HEADER_SIZE];
+ if (new_cap < bytesUsed) {
+ bytesUsed = new_cap;
+ }
+ if (bytesUsed != 0) {
+ System.arraycopy(bytes, kmerStartOffset, new_data, HEADER_SIZE, bytesUsed);
+ }
+ bytes = new_data;
+ kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
+ }
+ }
+
+ private void saveHeader(int length) {
+ Marshal.putInt(length, bytes, kmerStartOffset - HEADER_SIZE);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ lettersInKmer = in.readInt();
+ bytesUsed = KmerUtil.getByteNumFromK(lettersInKmer);
+ if (lettersInKmer > 0) {
+ if (getKmerByteCapacity() < this.bytesUsed) {
+ this.bytes = new byte[this.bytesUsed + HEADER_SIZE];
+ this.kmerStartOffset = HEADER_SIZE;
+ storageMaxSize = bytes.length;
+ }
+ in.readFully(bytes, kmerStartOffset, bytesUsed);
+ }
+ }
+
+ /**
+ * write the entire byte array including the header
+ */
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.write(bytes, kmerStartOffset - HEADER_SIZE, bytesUsed + HEADER_SIZE);
+ }
+
+ @Override
+ public int hashCode() {
+ return Marshal.hashBytes(bytes, kmerStartOffset - HEADER_SIZE, bytesUsed + HEADER_SIZE);
+ }
+
+ @Override
+ public boolean equals(Object right_obj) {
+ if (right_obj instanceof VKmerBytesWritable) {
+ // since these may be backed by storage of different sizes, we have to manually check each byte, including the header
+ VKmerBytesWritable right = (VKmerBytesWritable) right_obj;
+ for (int i = -HEADER_SIZE; i < bytesUsed; i++) {
+ if (bytes[kmerStartOffset + i] != right.bytes[right.kmerStartOffset + i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return KmerUtil.recoverKmerFrom(this.lettersInKmer, bytes, kmerStartOffset, bytesUsed);
+ }
+
+ public static class Comparator extends WritableComparator {
+
+ public Comparator() {
+ super(VKmerBytesWritable.class);
+ }
+
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ int kmerlength1 = Marshal.getInt(b1, s1);
+ int kmerlength2 = Marshal.getInt(b2, s2);
+ if (kmerlength1 == kmerlength2) {
+ return compareBytes(b1, s1 + HEADER_SIZE, l1 - HEADER_SIZE, b2, s2 + HEADER_SIZE, l2 - HEADER_SIZE);
+ }
+ return kmerlength1 - kmerlength2;
+ }
+ }
+
+ static { // register this comparator
+ WritableComparator.define(VKmerBytesWritable.class, new Comparator());
+ }
+
+ /**
+ * Ensures that there is space for at least `size` bytes of kmer (not
+ * including any header)
+ */
+ protected void setSize(int size) {
+ if (size > getKmerByteCapacity()) {
+ setKmerByteCapacity((size * 3 / 2));
+ }
+ this.bytesUsed = size;
+ }
+
+ public void setByRead(int k, byte[] stringBytes, int start) {
+ reset(k);
+ setByRead(stringBytes, start);
+ }
+
+ /**
+ * Read Kmer from read text into bytes array e.g. AATAG will compress as
+ * [0x000G, 0xATAA]
+ *
+ * @param stringBytes
+ * @param start
+ */
+ public void setByRead(byte[] stringBytes, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = this.bytesUsed - 1;
+ for (int i = start; i < start + lettersInKmer && i < stringBytes.length; i++) {
+ byte code = GeneCode.getCodeFromSymbol(stringBytes[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[kmerStartOffset + bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[kmerStartOffset] = l;
+ }
+ }
+
+ public void setByReadReverse(int k, byte[] stringBytes, int start) {
+ reset(k);
+ setByReadReverse(stringBytes, start);
+ }
+
+ /**
+ * Compress Reversed read into bytes array e.g. AATAG will paired to CTATT,
+ * and then compress as [0x000T,0xTATC]
+ *
+ * @param input
+ * array
+ * @param start
+ * position
+ */
+ public void setByReadReverse(byte[] array, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = bytesUsed - 1;
+ // for (int i = start + kmerlength - 1; i >= 0 && i < array.length; i--)
+ // {
+ for (int i = start + lettersInKmer - 1; i >= start && i < array.length; i--) {
+ byte code = GeneCode.getPairedCodeFromSymbol(array[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[kmerStartOffset + bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[kmerStartOffset] = l;
+ }
+ }
+
+ /**
+ * Merge Kmer with the next connected Kmer e.g. AAGCTAA merge with AACAACC,
+ * if the initial kmerSize = 3 then it will return AAGCTAACAACC
+ *
+ * @param initialKmerSize
+ * : the initial kmerSize
+ * @param kmer
+ * : the next kmer
+ */
+ public void mergeWithFFKmer(int initialKmerSize, VKmerBytesWritable kmer) {
+ int preKmerLength = lettersInKmer;
+ int preSize = bytesUsed;
+ lettersInKmer += kmer.lettersInKmer - initialKmerSize + 1;
+ setSize(KmerUtil.getByteNumFromK(lettersInKmer));
+ for (int i = 1; i <= preSize; i++) {
+ bytes[kmerStartOffset + bytesUsed - i] = bytes[kmerStartOffset + preSize - i];
+ }
+ for (int k = initialKmerSize - 1; k < kmer.getKmerLetterLength(); k += 4) {
+ byte onebyte = KmerBytesWritable.getOneByteFromKmerAtPosition(k, kmer.bytes, kmer.kmerStartOffset,
+ kmer.bytesUsed);
+ KmerBytesWritable.appendOneByteAtPosition(preKmerLength + k - initialKmerSize + 1, onebyte, bytes,
+ kmerStartOffset, bytesUsed);
+ }
+ clearLeadBit();
+ saveHeader(lettersInKmer);
+ }
+
+ /**
+ * Merge Kmer with the next connected Kmer, when that Kmer needs to be
+ * reverse-complemented e.g. AAGCTAA merge with GGTTGTT, if the initial
+ * kmerSize = 3 then it will return AAGCTAACAACC A merge B => A B~
+ *
+ * @param initialKmerSize
+ * : the initial kmerSize
+ * @param kmer
+ * : the next kmer
+ */
+ public void mergeWithFRKmer(int initialKmerSize, VKmerBytesWritable kmer) {
+ int preSize = bytesUsed;
+ int preKmerLength = lettersInKmer;
+ lettersInKmer += kmer.lettersInKmer - initialKmerSize + 1;
+ setSize(KmerUtil.getByteNumFromK(lettersInKmer));
+ // copy prefix into right-side of buffer
+ for (int i = 1; i <= preSize; i++) {
+ bytes[kmerStartOffset + bytesUsed - i] = bytes[kmerStartOffset + preSize - i];
+ }
+
+ int bytecount = (preKmerLength % 4) * 2;
+ int bcount = bytesUsed - preSize - bytecount / 8; // may overlap
+ // previous kmer
+ byte l = bcount == bytesUsed - preSize ? bytes[kmerStartOffset + bcount] : 0x00;
+ bytecount %= 8;
+ for (int i = kmer.lettersInKmer - initialKmerSize; i >= 0; i--) {
+ byte code = GeneCode.getPairedGeneCode(kmer.getGeneCodeAtPosition(i));
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[kmerStartOffset + bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[kmerStartOffset] = l;
+ }
+ saveHeader(lettersInKmer);
+ }
+
+ /**
+ * Merge Kmer with the previous connected Kmer, when that kmer needs to be
+ * reverse-complemented e.g. AACAACC merge with TTCTGCC, if the initial
+ * kmerSize = 3 then it will return GGCAGAACAACC
+ *
+ * @param initialKmerSize
+ * : the initial kmerSize
+ * @param preKmer
+ * : the previous kmer
+ */
+ public void mergeWithRFKmer(int initialKmerSize, VKmerBytesWritable preKmer) {
+ VKmerBytesWritable reversed = new VKmerBytesWritable(preKmer.lettersInKmer);
+ reversed.setByReadReverse(preKmer.toString().getBytes(), 0);
+ mergeWithRRKmer(initialKmerSize, reversed);
+ }
+
+ /**
+ * Merge Kmer with the previous connected Kmer e.g. AACAACC merge with
+ * AAGCTAA, if the initial kmerSize = 3 then it will return AAGCTAACAACC
+ *
+ * @param initialKmerSize
+ * : the initial kmerSize
+ * @param preKmer
+ * : the previous kmer
+ */
+ public void mergeWithRRKmer(int initialKmerSize, VKmerBytesWritable preKmer) {
+ int preKmerLength = lettersInKmer;
+ int preSize = bytesUsed;
+ lettersInKmer += preKmer.lettersInKmer - initialKmerSize + 1;
+ setSize(KmerUtil.getByteNumFromK(lettersInKmer));
+ byte cacheByte = KmerBytesWritable.getOneByteFromKmerAtPosition(0, bytes, kmerStartOffset, preSize);
+
+ // copy prekmer
+ for (int k = 0; k < preKmer.lettersInKmer - initialKmerSize + 1; k += 4) {
+ byte onebyte = KmerBytesWritable.getOneByteFromKmerAtPosition(k, preKmer.bytes, preKmer.kmerStartOffset,
+ preKmer.bytesUsed);
+ KmerBytesWritable.appendOneByteAtPosition(k, onebyte, bytes, kmerStartOffset, bytesUsed);
+ }
+
+ // copy current kmer
+ int k = 4;
+ for (; k < preKmerLength; k += 4) {
+ byte onebyte = KmerBytesWritable.getOneByteFromKmerAtPosition(k, bytes, kmerStartOffset, preSize);
+ KmerBytesWritable.appendOneByteAtPosition(preKmer.lettersInKmer - initialKmerSize + k - 4 + 1, cacheByte,
+ bytes, kmerStartOffset, bytesUsed);
+ cacheByte = onebyte;
+ }
+ KmerBytesWritable.appendOneByteAtPosition(preKmer.lettersInKmer - initialKmerSize + k - 4 + 1, cacheByte,
+ bytes, kmerStartOffset, bytesUsed);
+ clearLeadBit();
+ }
+
+ public void mergeWithKmerInDir(byte dir, int initialKmerSize, VKmerBytesWritable kmer) {
+ switch (dir & DirectionFlag.DIR_MASK) {
+ case DirectionFlag.DIR_FF:
+ mergeWithFFKmer(initialKmerSize, kmer);
+ break;
+ case DirectionFlag.DIR_FR:
+ mergeWithFRKmer(initialKmerSize, kmer);
+ break;
+ case DirectionFlag.DIR_RF:
+ mergeWithRFKmer(initialKmerSize, kmer);
+ break;
+ case DirectionFlag.DIR_RR:
+ mergeWithRRKmer(initialKmerSize, kmer);
+ break;
+ default:
+ throw new RuntimeException("Direction not recognized: " + dir);
+ }
+ }
}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableFactoryTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableFactoryTest.java
index 1eb58c8..663d8dd 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableFactoryTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableFactoryTest.java
@@ -19,8 +19,8 @@
import org.junit.Test;
import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.KmerBytesWritableFactory;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
public class KmerBytesWritableFactoryTest {
static byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
@@ -29,17 +29,17 @@
@Test
public void TestGetLastKmer() {
- KmerBytesWritable kmer = new KmerBytesWritable(9);
+ VKmerBytesWritable kmer = new VKmerBytesWritable(9);
kmer.setByRead(array, 0);
Assert.assertEquals("AGCTGACCG", kmer.toString());
- KmerBytesWritable lastKmer;
+ VKmerBytesWritable lastKmer;
for (int i = 8; i > 0; i--) {
lastKmer = kmerFactory.getLastKmerFromChain(i, kmer);
Assert.assertEquals("AGCTGACCG".substring(9 - i), lastKmer.toString());
lastKmer = kmerFactory.getSubKmerFromChain(9 - i, i, kmer);
Assert.assertEquals("AGCTGACCG".substring(9 - i), lastKmer.toString());
}
- KmerBytesWritable vlastKmer;
+ VKmerBytesWritable vlastKmer;
for (int i = 8; i > 0; i--) {
vlastKmer = kmerFactory.getLastKmerFromChain(i, kmer);
Assert.assertEquals("AGCTGACCG".substring(9 - i), vlastKmer.toString());
@@ -50,17 +50,17 @@
@Test
public void TestGetFirstKmer() {
- KmerBytesWritable kmer = new KmerBytesWritable(9);
+ VKmerBytesWritable kmer = new VKmerBytesWritable(9);
kmer.setByRead(array, 0);
Assert.assertEquals("AGCTGACCG", kmer.toString());
- KmerBytesWritable firstKmer;
+ VKmerBytesWritable firstKmer;
for (int i = 8; i > 0; i--) {
firstKmer = kmerFactory.getFirstKmerFromChain(i, kmer);
Assert.assertEquals("AGCTGACCG".substring(0, i), firstKmer.toString());
firstKmer = kmerFactory.getSubKmerFromChain(0, i, kmer);
Assert.assertEquals("AGCTGACCG".substring(0, i), firstKmer.toString());
}
- KmerBytesWritable vfirstKmer;
+ VKmerBytesWritable vfirstKmer;
for (int i = 8; i > 0; i--) {
vfirstKmer = kmerFactory.getFirstKmerFromChain(i, kmer);
Assert.assertEquals("AGCTGACCG".substring(0, i), vfirstKmer.toString());
@@ -71,12 +71,12 @@
@Test
public void TestGetSubKmer() {
- KmerBytesWritable kmer = new KmerBytesWritable(9);
+ VKmerBytesWritable kmer = new VKmerBytesWritable(9);
kmer.setByRead(array, 0);
Assert.assertEquals("AGCTGACCG", kmer.toString());
- KmerBytesWritable subKmer;
- for (int istart = 0; istart < kmer.getKmerLength() - 1; istart++) {
- for (int isize = 1; isize + istart <= kmer.getKmerLength(); isize++) {
+ VKmerBytesWritable subKmer;
+ for (int istart = 0; istart < kmer.getKmerLetterLength() - 1; istart++) {
+ for (int isize = 1; isize + istart <= kmer.getKmerLetterLength(); isize++) {
subKmer = kmerFactory.getSubKmerFromChain(istart, isize, kmer);
Assert.assertEquals("AGCTGACCG".substring(istart, istart + isize), subKmer.toString());
}
@@ -85,60 +85,60 @@
@Test
public void TestMergeNext() {
- KmerBytesWritable kmer = new KmerBytesWritable(9);
+ VKmerBytesWritable kmer = new VKmerBytesWritable(9);
kmer.setByRead(array, 0);
Assert.assertEquals("AGCTGACCG", kmer.toString());
String text = "AGCTGACCG";
for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
- KmerBytesWritable newkmer = kmerFactory.mergeKmerWithNextCode(kmer, x);
+ VKmerBytesWritable newkmer = kmerFactory.mergeKmerWithNextCode(kmer, x);
text = text + (char) GeneCode.GENE_SYMBOL[x];
Assert.assertEquals(text, newkmer.toString());
- kmer = new KmerBytesWritable(newkmer);
+ kmer = new VKmerBytesWritable(newkmer);
}
for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
- KmerBytesWritable newkmer = kmerFactory.mergeKmerWithNextCode(kmer, x);
+ VKmerBytesWritable newkmer = kmerFactory.mergeKmerWithNextCode(kmer, x);
text = text + (char) GeneCode.GENE_SYMBOL[x];
Assert.assertEquals(text, newkmer.toString());
- kmer = new KmerBytesWritable(newkmer);
+ kmer = new VKmerBytesWritable(newkmer);
}
}
@Test
public void TestMergePre() {
- KmerBytesWritable kmer = new KmerBytesWritable(9);
+ VKmerBytesWritable kmer = new VKmerBytesWritable(9);
kmer.setByRead(array, 0);
Assert.assertEquals("AGCTGACCG", kmer.toString());
String text = "AGCTGACCG";
for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
- KmerBytesWritable newkmer = kmerFactory.mergeKmerWithPreCode(kmer, x);
+ VKmerBytesWritable newkmer = kmerFactory.mergeKmerWithPreCode(kmer, x);
text = (char) GeneCode.GENE_SYMBOL[x] + text;
Assert.assertEquals(text, newkmer.toString());
- kmer = new KmerBytesWritable(newkmer);
+ kmer = new VKmerBytesWritable(newkmer);
}
for (byte x = GeneCode.A; x <= GeneCode.T; x++) {
- KmerBytesWritable newkmer = kmerFactory.mergeKmerWithPreCode(kmer, x);
+ VKmerBytesWritable newkmer = kmerFactory.mergeKmerWithPreCode(kmer, x);
text = (char) GeneCode.GENE_SYMBOL[x] + text;
Assert.assertEquals(text, newkmer.toString());
- kmer = new KmerBytesWritable(newkmer);
+ kmer = new VKmerBytesWritable(newkmer);
}
}
@Test
public void TestMergeTwoKmer() {
- KmerBytesWritable kmer1 = new KmerBytesWritable(9);
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable(9);
kmer1.setByRead(array, 0);
String text1 = "AGCTGACCG";
- KmerBytesWritable kmer2 = new KmerBytesWritable(9);
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable(9);
kmer2.setByRead(array, 1);
String text2 = "GCTGACCGT";
Assert.assertEquals(text1, kmer1.toString());
Assert.assertEquals(text2, kmer2.toString());
- KmerBytesWritable merged = kmerFactory.mergeTwoKmer(kmer1, kmer2);
+ VKmerBytesWritable merged = kmerFactory.mergeTwoKmer(kmer1, kmer2);
Assert.assertEquals(text1 + text2, merged.toString());
- KmerBytesWritable kmer3 = new KmerBytesWritable(3);
+ VKmerBytesWritable kmer3 = new VKmerBytesWritable(3);
kmer3.setByRead(array, 1);
String text3 = "GCT";
Assert.assertEquals(text3, kmer3.toString());
@@ -148,17 +148,17 @@
merged = kmerFactory.mergeTwoKmer(kmer3, kmer1);
Assert.assertEquals(text3 + text1, merged.toString());
- KmerBytesWritable kmer4 = new KmerBytesWritable(8);
+ VKmerBytesWritable kmer4 = new VKmerBytesWritable(8);
kmer4.setByRead(array, 0);
String text4 = "AGCTGACC";
Assert.assertEquals(text4, kmer4.toString());
merged = kmerFactory.mergeTwoKmer(kmer4, kmer3);
Assert.assertEquals(text4 + text3, merged.toString());
- KmerBytesWritable kmer5 = new KmerBytesWritable(7);
+ VKmerBytesWritable kmer5 = new VKmerBytesWritable(7);
kmer5.setByRead(array, 0);
String text5 = "AGCTGAC";
- KmerBytesWritable kmer6 = new KmerBytesWritable(9);
+ VKmerBytesWritable kmer6 = new VKmerBytesWritable(9);
kmer6.setByRead(9, array, 1);
String text6 = "GCTGACCGT";
merged = kmerFactory.mergeTwoKmer(kmer5, kmer6);
@@ -173,19 +173,18 @@
String text8 = "GCTG";
merged = kmerFactory.mergeTwoKmer(kmer5, kmer6);
Assert.assertEquals(text5 + text8, merged.toString());
-
}
@Test
public void TestShift() {
- KmerBytesWritable kmer = new KmerBytesWritable(kmerFactory.getKmerByRead(9, array, 0));
+ VKmerBytesWritable kmer = new VKmerBytesWritable(kmerFactory.getKmerByRead(9, array, 0));
String text = "AGCTGACCG";
Assert.assertEquals(text, kmer.toString());
- KmerBytesWritable kmerForward = kmerFactory.shiftKmerWithNextCode(kmer, GeneCode.A);
+ VKmerBytesWritable kmerForward = kmerFactory.shiftKmerWithNextCode(kmer, GeneCode.A);
Assert.assertEquals(text, kmer.toString());
Assert.assertEquals("GCTGACCGA", kmerForward.toString());
- KmerBytesWritable kmerBackward = kmerFactory.shiftKmerWithPreCode(kmer, GeneCode.C);
+ VKmerBytesWritable kmerBackward = kmerFactory.shiftKmerWithPreCode(kmer, GeneCode.C);
Assert.assertEquals(text, kmer.toString());
Assert.assertEquals("CAGCTGACC", kmerBackward.toString());
@@ -193,10 +192,10 @@
@Test
public void TestReverseKmer() {
- KmerBytesWritable kmer = new KmerBytesWritable(7);
+ VKmerBytesWritable kmer = new VKmerBytesWritable(7);
kmer.setByRead(array, 0);
Assert.assertEquals(kmer.toString(), "AGCTGAC");
- KmerBytesWritable reversed = kmerFactory.reverse(kmer);
+ VKmerBytesWritable reversed = kmerFactory.reverse(kmer);
Assert.assertEquals(reversed.toString(), "CAGTCGA");
}
}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
index 807ac13..54d29eb 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
@@ -28,7 +28,8 @@
@Test
public void TestCompressKmer() {
- KmerBytesWritable kmer = new KmerBytesWritable(k);
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
kmer.setByRead(array, 0);
Assert.assertEquals(kmer.toString(), "AATAGAA");
@@ -38,7 +39,8 @@
@Test
public void TestMoveKmer() {
- KmerBytesWritable kmer = new KmerBytesWritable(k);
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
kmer.setByRead(array, 0);
Assert.assertEquals(kmer.toString(), "AATAGAA");
@@ -54,7 +56,8 @@
@Test
public void TestCompressKmerReverse() {
- KmerBytesWritable kmer = new KmerBytesWritable(k);
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
kmer.setByRead(array, 0);
Assert.assertEquals(kmer.toString(), "AATAGAA");
@@ -64,7 +67,8 @@
@Test
public void TestMoveKmerReverse() {
- KmerBytesWritable kmer = new KmerBytesWritable(k);
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
kmer.setByRead(array, 0);
Assert.assertEquals(kmer.toString(), "AATAGAA");
@@ -80,7 +84,8 @@
@Test
public void TestGetGene() {
- KmerBytesWritable kmer = new KmerBytesWritable(9);
+ KmerBytesWritable.setGlobalKmerLength(9);
+ KmerBytesWritable kmer = new KmerBytesWritable();
String text = "AGCTGACCG";
byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G' };
kmer.setByRead(array, 0);
@@ -95,8 +100,9 @@
byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
String string = "AGCTGACCGT";
for (int k = 3; k <= 10; k++) {
- KmerBytesWritable kmer = new KmerBytesWritable(k);
- KmerBytesWritable kmerAppend = new KmerBytesWritable(k);
+ KmerBytesWritable.setGlobalKmerLength(k);
+ KmerBytesWritable kmer = new KmerBytesWritable();
+ KmerBytesWritable kmerAppend = new KmerBytesWritable();
kmer.setByRead(array, 0);
Assert.assertEquals(string.substring(0, k), kmer.toString());
for (int b = 0; b < k; b++) {
@@ -113,201 +119,4 @@
Assert.assertEquals(kmer.toString(), kmerAppend.toString());
}
}
-
- @Test
- public void TestMergeFFKmer() {
- byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
- String text = "AGCTGACCGT";
- KmerBytesWritable kmer1 = new KmerBytesWritable(8);
- kmer1.setByRead(array, 0);
- String text1 = "AGCTGACC";
- KmerBytesWritable kmer2 = new KmerBytesWritable(8);
- kmer2.setByRead(array, 1);
- String text2 = "GCTGACCG";
- Assert.assertEquals(text2, kmer2.toString());
- KmerBytesWritable merge = new KmerBytesWritable(kmer1);
- int kmerSize = 8;
- merge.mergeWithFFKmer(kmerSize, kmer2);
- Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
-
- for (int i = 1; i < 8; i++) {
- merge.setAsCopy(kmer1);
- merge.mergeWithFFKmer(i, kmer2);
- Assert.assertEquals(text1 + text2.substring(i - 1), merge.toString());
- }
-
- for (int ik = 1; ik <= 10; ik++) {
- for (int jk = 1; jk <= 10; jk++) {
- kmer1 = new KmerBytesWritable(ik);
- kmer2 = new KmerBytesWritable(jk);
- kmer1.setByRead(array, 0);
- kmer2.setByRead(array, 0);
- text1 = text.substring(0, ik);
- text2 = text.substring(0, jk);
- Assert.assertEquals(text1, kmer1.toString());
- Assert.assertEquals(text2, kmer2.toString());
- for (int x = 1; x < jk; x++) {
- merge.setAsCopy(kmer1);
- merge.mergeWithFFKmer(x, kmer2);
- Assert.assertEquals(text1 + text2.substring(x - 1), merge.toString());
- }
- }
- }
- }
-
- @Test
- public void TestMergeFRKmer() {
- int kmerSize = 3;
- String result = "AAGCTAACAACC";
- byte[] resultArray = result.getBytes();
-
- String text1 = "AAGCTAA";
- KmerBytesWritable kmer1 = new KmerBytesWritable(text1.length());
- kmer1.setByRead(resultArray, 0);
- Assert.assertEquals(text1, kmer1.toString());
-
- // kmer2 is the rc of the end of the read
- String text2 = "GGTTGTT";
- KmerBytesWritable kmer2 = new KmerBytesWritable(text2.length());
- kmer2.setByReadReverse(resultArray, result.length() - text2.length());
- Assert.assertEquals(text2, kmer2.toString());
-
- KmerBytesWritable merge = new KmerBytesWritable(kmer1);
- merge.mergeWithFRKmer(kmerSize, kmer2);
- Assert.assertEquals(result, merge.toString());
-
- int i = 1;
- merge.setAsCopy(kmer1);
- merge.mergeWithFRKmer(i, kmer2);
- Assert.assertEquals("AAGCTAAAACAACC", merge.toString());
-
- i = 2;
- merge.setAsCopy(kmer1);
- merge.mergeWithFRKmer(i, kmer2);
- Assert.assertEquals("AAGCTAAACAACC", merge.toString());
-
- i = 3;
- merge.setAsCopy(kmer1);
- merge.mergeWithFRKmer(i, kmer2);
- Assert.assertEquals("AAGCTAACAACC", merge.toString());
- }
-
-
- @Test
- public void TestMergeRFKmer() {
- int kmerSize = 3;
- String result = "GGCACAACAACCC";
- byte[] resultArray = result.getBytes();
-
- String text1 = "AACAACCC";
- KmerBytesWritable kmer1 = new KmerBytesWritable(text1.length());
- kmer1.setByRead(resultArray, 5);
- Assert.assertEquals(text1, kmer1.toString());
-
- // kmer2 is the rc of the end of the read
- String text2 = "TTGTGCC";
- KmerBytesWritable kmer2 = new KmerBytesWritable(text2.length());
- kmer2.setByReadReverse(resultArray, 0);
- Assert.assertEquals(text2, kmer2.toString());
-
- KmerBytesWritable merge = new KmerBytesWritable(kmer1);
- merge.mergeWithRFKmer(kmerSize, kmer2);
- Assert.assertEquals(result, merge.toString());
-
- int i = 1;
- merge.setAsCopy(kmer1);
- merge.mergeWithRFKmer(i, kmer2);
- Assert.assertEquals("GGCACAAAACAACCC", merge.toString());
-
- i = 2;
- merge.setAsCopy(kmer1);
- merge.mergeWithRFKmer(i, kmer2);
- Assert.assertEquals("GGCACAAACAACCC", merge.toString());
-
- i = 3;
- merge.setAsCopy(kmer1);
- merge.mergeWithRFKmer(i, kmer2);
- Assert.assertEquals("GGCACAACAACCC", merge.toString());
-
-// String test1 = "CTTAT";
-// String test2 = "AGACC"; // rc = GGTCT
-// KmerBytesWritable k1 = new KmerBytesWritable(5);
-// KmerBytesWritable k2 = new KmerBytesWritable(5);
-// k1.setByRead(test1.getBytes(), 0);
-// k2.setByRead(test2.getBytes(), 0);
-// k1.mergeWithRFKmer(3, k2);
-// Assert.assertEquals("GGTCTTAT", k1.toString()); //GGTCGTCT -> AGACGACC ??
-
- String test3 = "CTA";
- String test4 = "AGA"; // rc = TCT
- KmerBytesWritable k3 = new KmerBytesWritable(3);
- KmerBytesWritable k4 = new KmerBytesWritable(3);
- k3.setByRead(test3.getBytes(), 0);
- k4.setByRead(test4.getBytes(), 0);
- k3.mergeWithRFKmer(3, k4);
- Assert.assertEquals("TCTA", k3.toString());
-// Assert.assertEquals("CTAT", k3); // this is an incorrect test case-- the merge always flips the passed-in kmer
- }
-
-
-
- @Test
- public void TestMergeRRKmer() {
- byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
- String text = "AGCTGACCGT";
- KmerBytesWritable kmer1 = new KmerBytesWritable(8);
- kmer1.setByRead(array, 0);
- String text1 = "AGCTGACC";
- KmerBytesWritable kmer2 = new KmerBytesWritable(8);
- kmer2.setByRead(array, 1);
- String text2 = "GCTGACCG";
- Assert.assertEquals(text2, kmer2.toString());
- KmerBytesWritable merge = new KmerBytesWritable(kmer2);
- int kmerSize = 8;
- merge.mergeWithRRKmer(kmerSize, kmer1);
- Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
-
- for (int i = 1; i < 8; i++) {
- merge.setAsCopy(kmer2);
- merge.mergeWithRRKmer(i, kmer1);
- Assert.assertEquals(text1.substring(0, text1.length() - i + 1) + text2, merge.toString());
- }
-
- for (int ik = 1; ik <= 10; ik++) {
- for (int jk = 1; jk <= 10; jk++) {
- kmer1 = new KmerBytesWritable(ik);
- kmer2 = new KmerBytesWritable(jk);
- kmer1.setByRead(array, 0);
- kmer2.setByRead(array, 0);
- text1 = text.substring(0, ik);
- text2 = text.substring(0, jk);
- Assert.assertEquals(text1, kmer1.toString());
- Assert.assertEquals(text2, kmer2.toString());
- for (int x = 1; x < ik; x++) {
- merge.setAsCopy(kmer2);
- merge.mergeWithRRKmer(x, kmer1);
- Assert.assertEquals(text1.substring(0, text1.length() - x + 1) + text2, merge.toString());
- }
- }
- }
- }
-
- @Test
- public void TestMergeRFAndRRKmer() {
- String test1 = "TAGAT";
- String test2 = "TCTAG"; // rc = CTAGA
- String test3 = "GCTAG";
- KmerBytesWritable k1 = new KmerBytesWritable(5);
- KmerBytesWritable k2 = new KmerBytesWritable(5);
- KmerBytesWritable k3 = new KmerBytesWritable(5);
- k1.setByRead(test1.getBytes(), 0);
- k2.setByRead(test2.getBytes(), 0);
- k3.setByRead(test3.getBytes(), 0);
- k1.mergeWithRFKmer(5, k2);
- Assert.assertEquals("CTAGAT", k1.toString());
- k1.mergeWithRRKmer(5, k3);
- Assert.assertEquals("GCTAGAT", k1.toString());
- }
}
-
-
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
index 1bbb771..5a69a3c 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
@@ -20,20 +20,22 @@
//one kmer in list and reset each time
KmerBytesWritable kmer;
for (int i = 1; i < 200; i++) {
- kmer = new KmerBytesWritable(i);
+ KmerBytesWritable.setGlobalKmerLength(i);
+ kmer = new KmerBytesWritable();
String randomString = generateString(i);
byte[] array = randomString.getBytes();
kmer.setByRead(array, 0);
- kmerList.reset(kmer.getKmerLength());
+ kmerList.reset();
kmerList.append(kmer);
- Assert.assertEquals(kmerList.getPosition(0).toString(), randomString);
+ Assert.assertEquals(randomString, kmerList.getPosition(0).toString());
Assert.assertEquals(1, kmerList.getCountOfPosition());
}
- kmerList.reset(0);
+ kmerList.reset();
+ KmerBytesWritable.setGlobalKmerLength(5);
//add one more kmer each time and fix kmerSize
for (int i = 0; i < 200; i++) {
- kmer = new KmerBytesWritable(5);
+ kmer = new KmerBytesWritable();
String randomString = generateString(5);
byte[] array = randomString.getBytes();
kmer.setByRead(array, 0);
@@ -44,8 +46,8 @@
byte [] another = new byte [kmerList.getLength()*2];
int start = 20;
- System.arraycopy(kmerList.getByteArray(), 0, another, start, kmerList.getLength());
- KmerListWritable plist2 = new KmerListWritable(kmerList.kmerlength, kmerList.getCountOfPosition(),another,start);
+ System.arraycopy(kmerList.getByteArray(), kmerList.getStartOffset(), another, start, kmerList.getLength());
+ KmerListWritable plist2 = new KmerListWritable(another, start);
for(int i = 0; i < plist2.getCountOfPosition(); i++){
Assert.assertEquals(kmerList.getPosition(i).toString(), plist2.getPosition(i).toString());
}
@@ -59,12 +61,13 @@
int i;
KmerBytesWritable kmer;
for (i = 0; i < 200; i++) {
- kmer = new KmerBytesWritable(5);
+ KmerBytesWritable.setGlobalKmerLength(5);
+ kmer = new KmerBytesWritable();
String randomString = generateString(5);
byte[] array = randomString.getBytes();
kmer.setByRead(array, 0);
kmerList.append(kmer);
- Assert.assertEquals(kmerList.getPosition(i).toString(), randomString);
+ Assert.assertEquals(randomString, kmerList.getPosition(i).toString());
Assert.assertEquals(i + 1, kmerList.getCountOfPosition());
}
@@ -72,12 +75,12 @@
KmerBytesWritable tmpKmer = new KmerBytesWritable();
i = 0;
KmerListWritable copyList = new KmerListWritable();
- copyList.set(kmerList);
+ copyList.setCopy(kmerList);
Iterator<KmerBytesWritable> iterator;
for(int j = 0; j < 5; j++){
iterator = copyList.iterator();
byte[] array = kmerList.getPosition(j).toString().getBytes();
- KmerBytesWritable deletePos = new KmerBytesWritable(5);
+ KmerBytesWritable deletePos = new KmerBytesWritable();
deletePos.setByRead(array, 0);
while(iterator.hasNext()){
tmpKmer = iterator.next();
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/PositionListWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/PositionListWritableTest.java
index fea658d..ac7322e 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/PositionListWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/PositionListWritableTest.java
@@ -41,7 +41,7 @@
byte [] another = new byte [plist.getLength()*2];
int start = 20;
System.arraycopy(plist.getByteArray(), 0, another, start, plist.getLength());
- PositionListWritable plist2 = new PositionListWritable(plist.getCountOfPosition(),another,start);
+ PositionListWritable plist2 = new PositionListWritable(another,start);
for( i = 0; i < plist2.getCountOfPosition(); i++){
Assert.assertEquals(plist.getPosition(i), plist2.getPosition(i));
}
@@ -84,19 +84,22 @@
iterator = copyList.iterator();
PositionWritable deletePos = new PositionWritable();
deletePos.set((byte)1, (long)j, j);
+ boolean removed = false;
while(iterator.hasNext()){
pos = iterator.next();
if(pos.equals(deletePos)){
iterator.remove();
+ removed = true;
break;
}
}
+ Assert.assertTrue(removed);
Assert.assertEquals(5 - 1 - j, copyList.getCountOfPosition());
while(iterator.hasNext()){
pos = iterator.next();
- Assert.assertTrue(pos.getUUID() != deletePos.getUUID());
- Assert.assertTrue(pos.getReadId() != deletePos.getReadId());
- Assert.assertTrue(pos.getPosId() != deletePos.getPosId());
+ Assert.assertTrue(! (pos.getUUID() == deletePos.getUUID() &&
+ pos.getReadId() == deletePos.getReadId() &&
+ pos.getPosId() == deletePos.getPosId()));
i++;
}
}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
index a50e465..5dd4f82 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
@@ -24,386 +24,376 @@
import edu.uci.ics.genomix.type.VKmerBytesWritable;
public class VKmerBytesWritableTest {
- static byte[] array = { 'A', 'A', 'T', 'A', 'G', 'A', 'A', 'G' };
- static int k = 7;
+ static byte[] array = { 'A', 'A', 'T', 'A', 'G', 'A', 'A', 'G' };
+ static int k = 7;
- @Test
- public void TestCompressKmer() {
- VKmerBytesWritable kmer = new VKmerBytesWritable(k);
- kmer.setByRead(array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
+ @Test
+ public void TestCompressKmer() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable(k);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
- kmer.setByRead(array, 1);
- Assert.assertEquals(kmer.toString(), "ATAGAAG");
- }
+ kmer.setByRead(array, 1);
+ Assert.assertEquals(kmer.toString(), "ATAGAAG");
+ }
- @Test
- public void TestMoveKmer() {
- VKmerBytesWritable kmer = new VKmerBytesWritable(k);
- kmer.setByRead(array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
+ @Test
+ public void TestMoveKmer() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable(k);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
- for (int i = k; i < array.length - 1; i++) {
- kmer.shiftKmerWithNextCode(array[i]);
- Assert.assertTrue(false);
- }
+ for (int i = k; i < array.length - 1; i++) {
+ kmer.shiftKmerWithNextCode(array[i]);
+ Assert.assertTrue(false);
+ }
- byte out = kmer.shiftKmerWithNextChar(array[array.length - 1]);
- Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
- Assert.assertEquals(kmer.toString(), "ATAGAAG");
- }
+ byte out = kmer.shiftKmerWithNextChar(array[array.length - 1]);
+ Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
+ Assert.assertEquals(kmer.toString(), "ATAGAAG");
+ }
- @Test
- public void TestCompressKmerReverse() {
- VKmerBytesWritable kmer = new VKmerBytesWritable(k);
- kmer.setByRead(array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
+ @Test
+ public void TestCompressKmerReverse() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable(k);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
- kmer.setByReadReverse(array, 1);
- Assert.assertEquals(kmer.toString(), "CTTCTAT");
- }
+ kmer.setByReadReverse(array, 1);
+ Assert.assertEquals(kmer.toString(), "CTTCTAT");
+ }
- @Test
- public void TestMoveKmerReverse() {
- VKmerBytesWritable kmer = new VKmerBytesWritable(k);
- kmer.setByRead(array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
+ @Test
+ public void TestMoveKmerReverse() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable(k);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(kmer.toString(), "AATAGAA");
- for (int i = k; i < array.length - 1; i++) {
- kmer.shiftKmerWithPreChar(array[i]);
- Assert.assertTrue(false);
- }
+ for (int i = k; i < array.length - 1; i++) {
+ kmer.shiftKmerWithPreChar(array[i]);
+ Assert.assertTrue(false);
+ }
- byte out = kmer.shiftKmerWithPreChar(array[array.length - 1]);
- Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
- Assert.assertEquals(kmer.toString(), "GAATAGA");
- }
+ byte out = kmer.shiftKmerWithPreChar(array[array.length - 1]);
+ Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
+ Assert.assertEquals(kmer.toString(), "GAATAGA");
+ }
- @Test
- public void TestGetGene() {
- VKmerBytesWritable kmer = new VKmerBytesWritable(9);
- String text = "AGCTGACCG";
- byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G' };
- kmer.setByRead(array, 0);
+ @Test
+ public void TestGetGene() {
+ VKmerBytesWritable kmer = new VKmerBytesWritable(9);
+ String text = "AGCTGACCG";
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G' };
+ kmer.setByRead(array, 0);
- for (int i = 0; i < 9; i++) {
- Assert.assertEquals(text.charAt(i), (char) (GeneCode
- .getSymbolFromCode(kmer.getGeneCodeAtPosition(i))));
- }
- }
+ for (int i = 0; i < 9; i++) {
+ Assert.assertEquals(text.charAt(i), (char) (GeneCode.getSymbolFromCode(kmer.getGeneCodeAtPosition(i))));
+ }
+ }
- @Test
- public void TestGetOneByteFromKmer() {
- byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
- String string = "AGCTGACCGT";
- for (int k = 3; k <= 10; k++) {
- VKmerBytesWritable kmer = new VKmerBytesWritable(k);
- VKmerBytesWritable kmerAppend = new VKmerBytesWritable(k);
- kmer.setByRead(array, 0);
- Assert.assertEquals(string.substring(0, k), kmer.toString());
- for (int b = 0; b < k; b++) {
- byte byteActual = VKmerBytesWritable
- .getOneByteFromKmerAtPosition(b, kmer.getBytes(),
- kmer.getOffset(), kmer.getLength());
- byte byteExpect = GeneCode.getCodeFromSymbol(array[b]);
- for (int i = 1; i < 4 && b + i < k; i++) {
- byteExpect += GeneCode.getCodeFromSymbol(array[b + i]) << (i * 2);
- }
- Assert.assertEquals(byteActual, byteExpect);
- VKmerBytesWritable.appendOneByteAtPosition(b, byteActual,
- kmerAppend.getBytes(), kmerAppend.getOffset(),
- kmerAppend.getLength());
- }
- Assert.assertEquals(kmer.toString(), kmerAppend.toString());
- }
- }
+ @Test
+ public void TestGetOneByteFromKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String string = "AGCTGACCGT";
+ for (int k = 3; k <= 10; k++) {
+ VKmerBytesWritable kmer = new VKmerBytesWritable(k);
+ VKmerBytesWritable kmerAppend = new VKmerBytesWritable(k);
+ kmer.setByRead(array, 0);
+ Assert.assertEquals(string.substring(0, k), kmer.toString());
+ for (int b = 0; b < k; b++) {
+ byte byteActual = KmerBytesWritable.getOneByteFromKmerAtPosition(b, kmer.getBytes(),
+ kmer.getKmerOffset(), kmer.getKmerByteLength());
+ byte byteExpect = GeneCode.getCodeFromSymbol(array[b]);
+ for (int i = 1; i < 4 && b + i < k; i++) {
+ byteExpect += GeneCode.getCodeFromSymbol(array[b + i]) << (i * 2);
+ }
+ Assert.assertEquals(byteActual, byteExpect);
+ KmerBytesWritable.appendOneByteAtPosition(b, byteActual, kmerAppend.getBytes(),
+ kmerAppend.getKmerOffset(), kmerAppend.getKmerByteLength());
+ }
+ Assert.assertEquals(kmer.toString(), kmerAppend.toString());
+ }
+ }
- @Test
- public void TestMergeFFKmer() {
- byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
- String text = "AGCTGACCGT";
- VKmerBytesWritable kmer1 = new VKmerBytesWritable(8);
- kmer1.setByRead(array, 0);
- String text1 = "AGCTGACC";
- Assert.assertEquals(text1, kmer1.toString());
-
- VKmerBytesWritable kmer2 = new VKmerBytesWritable(8);
- kmer2.setByRead(array, 1);
- String text2 = "GCTGACCG";
- Assert.assertEquals(text2, kmer2.toString());
-
- VKmerBytesWritable merge = new VKmerBytesWritable(kmer1);
- int kmerSize = 8;
- merge.mergeWithFFKmer(kmerSize, kmer2);
- Assert.assertEquals(text1 + text2.substring(kmerSize - 1),
- merge.toString());
+ @Test
+ public void TestMergeFFKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String text = "AGCTGACCGT";
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable(8);
+ kmer1.setByRead(array, 0);
+ String text1 = "AGCTGACC";
+ Assert.assertEquals(text1, kmer1.toString());
- for (int i = 1; i < 8; i++) {
- merge.setAsCopy(kmer1);
- merge.mergeWithFFKmer(i, kmer2);
- Assert.assertEquals(text1 + text2.substring(i - 1),
- merge.toString());
- }
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable(8);
+ kmer2.setByRead(array, 1);
+ String text2 = "GCTGACCG";
+ Assert.assertEquals(text2, kmer2.toString());
- for (int ik = 1; ik <= 10; ik++) {
- for (int jk = 1; jk <= 10; jk++) {
- kmer1 = new VKmerBytesWritable(ik);
- kmer2 = new VKmerBytesWritable(jk);
- kmer1.setByRead(array, 0);
- kmer2.setByRead(array, 0);
- text1 = text.substring(0, ik);
- text2 = text.substring(0, jk);
- Assert.assertEquals(text1, kmer1.toString());
- Assert.assertEquals(text2, kmer2.toString());
- for (int x = 1; x < jk; x++) {
- merge.setAsCopy(kmer1);
- merge.mergeWithFFKmer(x, kmer2);
- Assert.assertEquals(text1 + text2.substring(x - 1),
- merge.toString());
- }
- }
- }
- }
+ VKmerBytesWritable merge = new VKmerBytesWritable(kmer1);
+ int kmerSize = 8;
+ merge.mergeWithFFKmer(kmerSize, kmer2);
+ Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
- @Test
- public void TestMergeFRKmer() {
- int kmerSize = 3;
- String result = "AAGCTAACAACC";
- byte[] resultArray = result.getBytes();
+ for (int i = 1; i < 8; i++) {
+ merge.setAsCopy(kmer1);
+ merge.mergeWithFFKmer(i, kmer2);
+ Assert.assertEquals(text1 + text2.substring(i - 1), merge.toString());
+ }
- String text1 = "AAGCTAA";
- VKmerBytesWritable kmer1 = new VKmerBytesWritable(text1.length());
- kmer1.setByRead(resultArray, 0);
- Assert.assertEquals(text1, kmer1.toString());
+ for (int ik = 1; ik <= 10; ik++) {
+ for (int jk = 1; jk <= 10; jk++) {
+ kmer1 = new VKmerBytesWritable(ik);
+ kmer2 = new VKmerBytesWritable(jk);
+ kmer1.setByRead(array, 0);
+ kmer2.setByRead(array, 0);
+ text1 = text.substring(0, ik);
+ text2 = text.substring(0, jk);
+ Assert.assertEquals(text1, kmer1.toString());
+ Assert.assertEquals(text2, kmer2.toString());
+ for (int x = 1; x < jk; x++) {
+ merge.setAsCopy(kmer1);
+ merge.mergeWithFFKmer(x, kmer2);
+ Assert.assertEquals(text1 + text2.substring(x - 1), merge.toString());
+ }
+ }
+ }
+ }
- // kmer2 is the rc of the end of the read
- String text2 = "GGTTGTT";
- VKmerBytesWritable kmer2 = new VKmerBytesWritable(text2.length());
- kmer2.setByReadReverse(resultArray, result.length() - text2.length());
- Assert.assertEquals(text2, kmer2.toString());
+ @Test
+ public void TestMergeFRKmer() {
+ int kmerSize = 3;
+ String result = "AAGCTAACAACC";
+ byte[] resultArray = result.getBytes();
- VKmerBytesWritable merge = new VKmerBytesWritable(kmer1);
- merge.mergeWithFRKmer(kmerSize, kmer2);
- Assert.assertEquals(result, merge.toString());
+ String text1 = "AAGCTAA";
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable(text1.length());
+ kmer1.setByRead(resultArray, 0);
+ Assert.assertEquals(text1, kmer1.toString());
- int i = 1;
- merge.setAsCopy(kmer1);
- merge.mergeWithFRKmer(i, kmer2);
- Assert.assertEquals("AAGCTAAAACAACC", merge.toString());
+ // kmer2 is the rc of the end of the read
+ String text2 = "GGTTGTT";
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable(text2.length());
+ kmer2.setByReadReverse(resultArray, result.length() - text2.length());
+ Assert.assertEquals(text2, kmer2.toString());
- i = 2;
- merge.setAsCopy(kmer1);
- merge.mergeWithFRKmer(i, kmer2);
- Assert.assertEquals("AAGCTAAACAACC", merge.toString());
+ VKmerBytesWritable merge = new VKmerBytesWritable(kmer1);
+ merge.mergeWithFRKmer(kmerSize, kmer2);
+ Assert.assertEquals(result, merge.toString());
- i = 3;
- merge.setAsCopy(kmer1);
- merge.mergeWithFRKmer(i, kmer2);
- Assert.assertEquals("AAGCTAACAACC", merge.toString());
- }
+ int i = 1;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAAAACAACC", merge.toString());
- @Test
- public void TestMergeRFKmer() {
- int kmerSize = 3;
- String result = "GGCACAACAACCC";
- byte[] resultArray = result.getBytes();
+ i = 2;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAAACAACC", merge.toString());
- String text1 = "AACAACCC";
- VKmerBytesWritable kmer1 = new VKmerBytesWritable(text1.length());
- kmer1.setByRead(resultArray, 5);
- Assert.assertEquals(text1, kmer1.toString());
+ i = 3;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithFRKmer(i, kmer2);
+ Assert.assertEquals("AAGCTAACAACC", merge.toString());
+ }
- // kmer2 is the rc of the end of the read
- String text2 = "TTGTGCC";
- VKmerBytesWritable kmer2 = new VKmerBytesWritable(text2.length());
- kmer2.setByReadReverse(resultArray, 0);
- Assert.assertEquals(text2, kmer2.toString());
+ @Test
+ public void TestMergeRFKmer() {
+ int kmerSize = 3;
+ String result = "GGCACAACAACCC";
+ byte[] resultArray = result.getBytes();
- VKmerBytesWritable merge = new VKmerBytesWritable(kmer1);
- merge.mergeWithRFKmer(kmerSize, kmer2);
- Assert.assertEquals(result, merge.toString());
+ String text1 = "AACAACCC";
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable(text1.length());
+ kmer1.setByRead(resultArray, 5);
+ Assert.assertEquals(text1, kmer1.toString());
- int i = 1;
- merge.setAsCopy(kmer1);
- merge.mergeWithRFKmer(i, kmer2);
- Assert.assertEquals("GGCACAAAACAACCC", merge.toString());
+ // kmer2 is the rc of the end of the read
+ String text2 = "TTGTGCC";
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable(text2.length());
+ kmer2.setByReadReverse(resultArray, 0);
+ Assert.assertEquals(text2, kmer2.toString());
- i = 2;
- merge.setAsCopy(kmer1);
- merge.mergeWithRFKmer(i, kmer2);
- Assert.assertEquals("GGCACAAACAACCC", merge.toString());
+ VKmerBytesWritable merge = new VKmerBytesWritable(kmer1);
+ merge.mergeWithRFKmer(kmerSize, kmer2);
+ Assert.assertEquals(result, merge.toString());
- i = 3;
- merge.setAsCopy(kmer1);
- merge.mergeWithRFKmer(i, kmer2);
- Assert.assertEquals("GGCACAACAACCC", merge.toString());
+ int i = 1;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithRFKmer(i, kmer2);
+ Assert.assertEquals("GGCACAAAACAACCC", merge.toString());
- // String test1 = "CTTAT";
- // String test2 = "AGACC"; // rc = GGTCT
- // VKmerBytesWritable k1 = new VKmerBytesWritable(5);
- // VKmerBytesWritable k2 = new VKmerBytesWritable(5);
- // k1.setByRead(test1.getBytes(), 0);
- // k2.setByRead(test2.getBytes(), 0);
- // k1.mergeWithRFKmer(3, k2);
- // Assert.assertEquals("GGTCTTAT", k1.toString()); //GGTCGTCT ->
- // AGACGACC ??
+ i = 2;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithRFKmer(i, kmer2);
+ Assert.assertEquals("GGCACAAACAACCC", merge.toString());
- String test3 = "CTA";
- String test4 = "AGA"; // rc = TCT
- VKmerBytesWritable k3 = new VKmerBytesWritable(3);
- VKmerBytesWritable k4 = new VKmerBytesWritable(3);
- k3.setByRead(test3.getBytes(), 0);
- k4.setByRead(test4.getBytes(), 0);
- k3.mergeWithRFKmer(3, k4);
- Assert.assertEquals("TCTA", k3.toString());
- // Assert.assertEquals("CTAT", k3); // this is an incorrect test case--
- // the merge always flips the passed-in kmer
- }
+ i = 3;
+ merge.setAsCopy(kmer1);
+ merge.mergeWithRFKmer(i, kmer2);
+ Assert.assertEquals("GGCACAACAACCC", merge.toString());
- @Test
- public void TestMergeRRKmer() {
- byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
- String text = "AGCTGACCGT";
- VKmerBytesWritable kmer1 = new VKmerBytesWritable(8);
- kmer1.setByRead(array, 0);
- String text1 = "AGCTGACC";
- VKmerBytesWritable kmer2 = new VKmerBytesWritable(8);
- kmer2.setByRead(array, 1);
- String text2 = "GCTGACCG";
- Assert.assertEquals(text2, kmer2.toString());
- VKmerBytesWritable merge = new VKmerBytesWritable(kmer2);
- int kmerSize = 8;
- merge.mergeWithRRKmer(kmerSize, kmer1);
- Assert.assertEquals(text1 + text2.substring(kmerSize - 1),
- merge.toString());
+ // String test1 = "CTTAT";
+ // String test2 = "AGACC"; // rc = GGTCT
+ // VKmerBytesWritable k1 = new VKmerBytesWritable(5);
+ // VKmerBytesWritable k2 = new VKmerBytesWritable(5);
+ // k1.setByRead(test1.getBytes(), 0);
+ // k2.setByRead(test2.getBytes(), 0);
+ // k1.mergeWithRFKmer(3, k2);
+ // Assert.assertEquals("GGTCTTAT", k1.toString()); //GGTCGTCT ->
+ // AGACGACC ??
- for (int i = 1; i < 8; i++) {
- merge.setAsCopy(kmer2);
- merge.mergeWithRRKmer(i, kmer1);
- Assert.assertEquals(text1.substring(0, text1.length() - i + 1)
- + text2, merge.toString());
- }
+ String test3 = "CTA";
+ String test4 = "AGA"; // rc = TCT
+ VKmerBytesWritable k3 = new VKmerBytesWritable(3);
+ VKmerBytesWritable k4 = new VKmerBytesWritable(3);
+ k3.setByRead(test3.getBytes(), 0);
+ k4.setByRead(test4.getBytes(), 0);
+ k3.mergeWithRFKmer(3, k4);
+ Assert.assertEquals("TCTA", k3.toString());
+ // Assert.assertEquals("CTAT", k3); // this is an incorrect test case--
+ // the merge always flips the passed-in kmer
+ }
- for (int ik = 1; ik <= 10; ik++) {
- for (int jk = 1; jk <= 10; jk++) {
- kmer1 = new VKmerBytesWritable(ik);
- kmer2 = new VKmerBytesWritable(jk);
- kmer1.setByRead(array, 0);
- kmer2.setByRead(array, 0);
- text1 = text.substring(0, ik);
- text2 = text.substring(0, jk);
- Assert.assertEquals(text1, kmer1.toString());
- Assert.assertEquals(text2, kmer2.toString());
- for (int x = 1; x < ik; x++) {
- merge.setAsCopy(kmer2);
- merge.mergeWithRRKmer(x, kmer1);
- Assert.assertEquals(
- text1.substring(0, text1.length() - x + 1) + text2,
- merge.toString());
- }
- }
- }
- }
+ @Test
+ public void TestMergeRRKmer() {
+ byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
+ String text = "AGCTGACCGT";
+ VKmerBytesWritable kmer1 = new VKmerBytesWritable(8);
+ kmer1.setByRead(array, 0);
+ String text1 = "AGCTGACC";
+ VKmerBytesWritable kmer2 = new VKmerBytesWritable(8);
+ kmer2.setByRead(array, 1);
+ String text2 = "GCTGACCG";
+ Assert.assertEquals(text2, kmer2.toString());
+ VKmerBytesWritable merge = new VKmerBytesWritable(kmer2);
+ int kmerSize = 8;
+ merge.mergeWithRRKmer(kmerSize, kmer1);
+ Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
- @Test
- public void TestMergeRFAndRRKmer() {
- String test1 = "TAGAT";
- String test2 = "TCTAG"; // rc = CTAGA
- String test3 = "GCTAG";
- VKmerBytesWritable k1 = new VKmerBytesWritable(5);
- VKmerBytesWritable k2 = new VKmerBytesWritable(5);
- VKmerBytesWritable k3 = new VKmerBytesWritable(5);
- k1.setByRead(test1.getBytes(), 0);
- k2.setByRead(test2.getBytes(), 0);
- k3.setByRead(test3.getBytes(), 0);
- k1.mergeWithRFKmer(5, k2);
- Assert.assertEquals("CTAGAT", k1.toString());
- k1.mergeWithRRKmer(5, k3);
- Assert.assertEquals("GCTAGAT", k1.toString());
- }
-
- @Test
- public void TestMergeRFAndRFKmer() {
- String test1 = "TAGAT";
- String test2 = "TCTAG"; // rc = CTAGA
- String test3 = "CTAGC"; // rc = GCTAG
- VKmerBytesWritable k1 = new VKmerBytesWritable(5);
- VKmerBytesWritable k2 = new VKmerBytesWritable(5);
- VKmerBytesWritable k3 = new VKmerBytesWritable(5);
- k1.setByRead(test1.getBytes(), 0);
- k2.setByRead(test2.getBytes(), 0);
- k3.setByRead(test3.getBytes(), 0);
- k1.mergeWithRFKmer(5, k2);
- Assert.assertEquals("CTAGAT", k1.toString());
- k1.mergeWithRFKmer(5, k3);
- Assert.assertEquals("GCTAGAT", k1.toString());
- }
-
- @Test
- public void TestMergeRFAndFRKmer() {
- String test1 = "TAGAT"; // rc = ATCTA
- String test2 = "TCTAG"; // rc = CTAGA
- String test3 = "GCTAG"; // rc = CTAGC
- VKmerBytesWritable k1 = new VKmerBytesWritable(5);
- VKmerBytesWritable k2 = new VKmerBytesWritable(5);
- VKmerBytesWritable k3 = new VKmerBytesWritable(5);
- k1.setByRead(test1.getBytes(), 0);
- k2.setByRead(test2.getBytes(), 0);
- k3.setByRead(test3.getBytes(), 0);
- k2.mergeWithRFKmer(5, k1);
- Assert.assertEquals("ATCTAG", k2.toString());
- k2.mergeWithFRKmer(5, k3);
- Assert.assertEquals("ATCTAGC", k2.toString());
- }
-
- @Test
- public void TestMergeRFAndFFKmer() {
- String test1 = "TAGAT"; // rc = ATCTA
- String test2 = "TCTAG"; // rc = CTAGA
- String test3 = "CTAGC"; // rc = GCTAG
- VKmerBytesWritable k1 = new VKmerBytesWritable(5);
- VKmerBytesWritable k2 = new VKmerBytesWritable(5);
- VKmerBytesWritable k3 = new VKmerBytesWritable(5);
- k1.setByRead(test1.getBytes(), 0);
- k2.setByRead(test2.getBytes(), 0);
- k3.setByRead(test3.getBytes(), 0);
- k2.mergeWithRFKmer(5, k1);
- Assert.assertEquals("ATCTAG", k2.toString());
- k2.mergeWithFFKmer(5, k3);
- Assert.assertEquals("ATCTAGC", k2.toString());
- }
-
- @Test
- public void TestMergeKmerAndVKmer() {
- String test1 = "TAGAT"; // rc = ATCTA
- String test2 = "TCTAG"; // rc = CTAGA
- String test3 = "CTAGC"; // rc = GCTAG
- KmerBytesWritable k1 = new KmerBytesWritable(5);
- VKmerBytesWritable k2 = new VKmerBytesWritable(5);
- VKmerBytesWritable k3 = new VKmerBytesWritable(5);
- k1.setByRead(test1.getBytes(), 0);
- k2.setByRead(test2.getBytes(), 0);
- k3.setByRead(test3.getBytes(), 0);
- k2.mergeWithRFKmer(5, k1);
- Assert.assertEquals("ATCTAG", k2.toString());
- k2.mergeWithFFKmer(5, k3);
- Assert.assertEquals("ATCTAGC", k2.toString());
- }
-
- @Test
- public void TestMergeKmerAndVKmerRFAndRFKmer() {
- String test1 = "TAGAT";
- String test2 = "TCTAG"; // rc = CTAGA
- String test3 = "CTAGC"; // rc = GCTAG
- KmerBytesWritable k1 = new KmerBytesWritable(5);
- VKmerBytesWritable k2 = new VKmerBytesWritable(5);
- VKmerBytesWritable k3 = new VKmerBytesWritable(5);
- k1.setByRead(test1.getBytes(), 0);
- k2.setByRead(test2.getBytes(), 0);
- k3.setByRead(test3.getBytes(), 0);
- k1.mergeWithRFKmer(5, k2);
- Assert.assertEquals("CTAGAT", k1.toString());
- k1.mergeWithRFKmer(5, k3);
- Assert.assertEquals("GCTAGAT", k1.toString());
- }
+ for (int i = 1; i < 8; i++) {
+ merge.setAsCopy(kmer2);
+ merge.mergeWithRRKmer(i, kmer1);
+ Assert.assertEquals(text1.substring(0, text1.length() - i + 1) + text2, merge.toString());
+ }
+
+ for (int ik = 1; ik <= 10; ik++) {
+ for (int jk = 1; jk <= 10; jk++) {
+ kmer1 = new VKmerBytesWritable(ik);
+ kmer2 = new VKmerBytesWritable(jk);
+ kmer1.setByRead(array, 0);
+ kmer2.setByRead(array, 0);
+ text1 = text.substring(0, ik);
+ text2 = text.substring(0, jk);
+ Assert.assertEquals(text1, kmer1.toString());
+ Assert.assertEquals(text2, kmer2.toString());
+ for (int x = 1; x < ik; x++) {
+ merge.setAsCopy(kmer2);
+ merge.mergeWithRRKmer(x, kmer1);
+ Assert.assertEquals(text1.substring(0, text1.length() - x + 1) + text2, merge.toString());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void TestMergeRFAndRRKmer() {
+ String test1 = "TAGAT";
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "GCTAG";
+ VKmerBytesWritable k1 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k2 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k3 = new VKmerBytesWritable(5);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k3.setByRead(test3.getBytes(), 0);
+ k1.mergeWithRFKmer(5, k2);
+ Assert.assertEquals("CTAGAT", k1.toString());
+ k1.mergeWithRRKmer(5, k3);
+ Assert.assertEquals("GCTAGAT", k1.toString());
+ }
+
+ @Test
+ public void TestMergeRFAndRFKmer() {
+ String test1 = "TAGAT";
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "CTAGC"; // rc = GCTAG
+ VKmerBytesWritable k1 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k2 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k3 = new VKmerBytesWritable(5);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k3.setByRead(test3.getBytes(), 0);
+ k1.mergeWithRFKmer(5, k2);
+ Assert.assertEquals("CTAGAT", k1.toString());
+ k1.mergeWithRFKmer(5, k3);
+ Assert.assertEquals("GCTAGAT", k1.toString());
+ }
+
+ @Test
+ public void TestMergeRFAndFRKmer() {
+ String test1 = "TAGAT"; // rc = ATCTA
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "GCTAG"; // rc = CTAGC
+ VKmerBytesWritable k1 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k2 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k3 = new VKmerBytesWritable(5);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k3.setByRead(test3.getBytes(), 0);
+ k2.mergeWithRFKmer(5, k1);
+ Assert.assertEquals("ATCTAG", k2.toString());
+ k2.mergeWithFRKmer(5, k3);
+ Assert.assertEquals("ATCTAGC", k2.toString());
+ }
+
+ @Test
+ public void TestMergeRFAndFFKmer() {
+ String test1 = "TAGAT"; // rc = ATCTA
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "CTAGC"; // rc = GCTAG
+ VKmerBytesWritable k1 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k2 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k3 = new VKmerBytesWritable(5);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k3.setByRead(test3.getBytes(), 0);
+ k2.mergeWithRFKmer(5, k1);
+ Assert.assertEquals("ATCTAG", k2.toString());
+ k2.mergeWithFFKmer(5, k3);
+ Assert.assertEquals("ATCTAGC", k2.toString());
+ }
+
+ @Test
+ public void TestMergeThreeVKmersRF_FF() {
+ String test1 = "TAGAT"; // rc = ATCTA
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "CTAGC"; // rc = GCTAG
+ VKmerBytesWritable k1 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k2 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k3 = new VKmerBytesWritable(5);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k3.setByRead(test3.getBytes(), 0);
+ k2.mergeWithRFKmer(5, k1);
+ Assert.assertEquals("ATCTAG", k2.toString());
+ k2.mergeWithFFKmer(5, k3);
+ Assert.assertEquals("ATCTAGC", k2.toString());
+ }
+
+ @Test
+ public void TestMergeThreeVKmerRF_RF() {
+ String test1 = "TAGAT";
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "CTAGC"; // rc = GCTAG
+ VKmerBytesWritable k1 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k2 = new VKmerBytesWritable(5);
+ VKmerBytesWritable k3 = new VKmerBytesWritable(5);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k3.setByRead(test3.getBytes(), 0);
+ k1.mergeWithRFKmer(5, k2);
+ Assert.assertEquals("CTAGAT", k1.toString());
+ k1.mergeWithRFKmer(5, k3);
+ Assert.assertEquals("GCTAGAT", k1.toString());
+ }
}