Refactor merging into NodeWritable and add tests
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeListWritable.java
index d5a038a..2b41f5d 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/EdgeListWritable.java
@@ -65,6 +65,10 @@
return edges.get(i);
}
+ public boolean add(EdgeWritable element) {
+ return edges.add(element);
+ }
+
public EdgeWritable set(int i, EdgeWritable element) {
return edges.set(i, element);
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java
index c287c1b..394d14e 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java
@@ -72,13 +72,13 @@
int posInByteOfChain = ((kmerChain.getKmerLetterLength() - lastK) % 4) << 1; // *2
int byteInKmer = kmer.getKmerByteLength() - 1;
for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
- kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] = (byte) ((0xff & kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset()]) >> posInByteOfChain);
- kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] |= ((kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset() - 1] << (8 - posInByteOfChain)));
+ kmer.getBlockBytes()[byteInKmer + kmer.getKmerOffset()] = (byte) ((0xff & kmerChain.getBlockBytes()[byteInChain + kmerChain.getKmerOffset()]) >> posInByteOfChain);
+ kmer.getBlockBytes()[byteInKmer + kmer.getKmerOffset()] |= ((kmerChain.getBlockBytes()[byteInChain + kmerChain.getKmerOffset() - 1] << (8 - posInByteOfChain)));
}
/** last kmer byte */
if (byteInKmer == 0) {
- kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmerChain.getBytes()[0 + kmerChain.getKmerOffset()] & 0xff) >> posInByteOfChain);
+ kmer.getBlockBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmerChain.getBlockBytes()[0 + kmerChain.getKmerOffset()] & 0xff) >> posInByteOfChain);
}
kmer.clearLeadBit();
return kmer;
@@ -105,13 +105,13 @@
int i = 1;
for (; i < kmer.getKmerByteLength(); i++) {
- kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] = kmerChain.getBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i];
+ kmer.getBlockBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] = kmerChain.getBlockBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i];
}
int posInByteOfChain = (firstK % 4) << 1; // *2
if (posInByteOfChain == 0) {
- kmer.getBytes()[0 + kmer.getKmerOffset()] = kmerChain.getBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i];
+ kmer.getBlockBytes()[0 + kmer.getKmerOffset()] = kmerChain.getBlockBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i];
} else {
- kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) (kmerChain.getBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i] & ((1 << posInByteOfChain) - 1));
+ kmer.getBlockBytes()[0 + kmer.getKmerOffset()] = (byte) (kmerChain.getBlockBytes()[kmerChain.getKmerOffset() + kmerChain.getKmerByteLength() - i] & ((1 << posInByteOfChain) - 1));
}
kmer.clearLeadBit();
return kmer;
@@ -132,13 +132,13 @@
int posInByteOfChain = startK % 4 << 1; // *2
int byteInKmer = kmer.getKmerByteLength() - 1;
for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
- kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] = (byte) ((0xff & kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset()]) >> posInByteOfChain);
- kmer.getBytes()[byteInKmer + kmer.getKmerOffset()] |= ((kmerChain.getBytes()[byteInChain + kmerChain.getKmerOffset() - 1] << (8 - posInByteOfChain)));
+ kmer.getBlockBytes()[byteInKmer + kmer.getKmerOffset()] = (byte) ((0xff & kmerChain.getBlockBytes()[byteInChain + kmerChain.getKmerOffset()]) >> posInByteOfChain);
+ kmer.getBlockBytes()[byteInKmer + kmer.getKmerOffset()] |= ((kmerChain.getBlockBytes()[byteInChain + kmerChain.getKmerOffset() - 1] << (8 - posInByteOfChain)));
}
/** last kmer byte */
if (byteInKmer == 0) {
- kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmerChain.getBytes()[0 + kmerChain.getKmerOffset()] & 0xff) >> posInByteOfChain);
+ kmer.getBlockBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmerChain.getBlockBytes()[0 + kmerChain.getKmerOffset()] & 0xff) >> posInByteOfChain);
}
kmer.clearLeadBit();
return kmer;
@@ -160,12 +160,12 @@
public VKmerBytesWritable mergeKmerWithNextCode(final VKmerBytesWritable kmer, byte nextCode) {
this.kmer.reset(kmer.getKmerLetterLength() + 1);
for (int i = 1; i <= kmer.getKmerByteLength(); i++) {
- this.kmer.getBytes()[this.kmer.getKmerOffset() + this.kmer.getKmerByteLength() - i] = kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i];
+ this.kmer.getBlockBytes()[this.kmer.getKmerOffset() + this.kmer.getKmerByteLength() - i] = kmer.getBlockBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i];
}
if (this.kmer.getKmerByteLength() > kmer.getKmerByteLength()) {
- this.kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) (nextCode & 0x3);
+ this.kmer.getBlockBytes()[0 + kmer.getKmerOffset()] = (byte) (nextCode & 0x3);
} else {
- this.kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) (kmer.getBytes()[0 + kmer.getKmerOffset()] | ((nextCode & 0x3) << ((kmer.getKmerLetterLength() % 4) << 1)));
+ this.kmer.getBlockBytes()[0 + kmer.getKmerOffset()] = (byte) (kmer.getBlockBytes()[0 + kmer.getKmerOffset()] | ((nextCode & 0x3) << ((kmer.getKmerLetterLength() % 4) << 1)));
}
this.kmer.clearLeadBit();
return this.kmer;
@@ -188,13 +188,13 @@
this.kmer.reset(kmer.getKmerLetterLength() + 1);
int byteInMergedKmer = 0;
if (kmer.getKmerLetterLength() % 4 == 0) {
- this.kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmer.getBytes()[0 + kmer.getKmerOffset()] >> 6) & 0x3);
+ this.kmer.getBlockBytes()[0 + kmer.getKmerOffset()] = (byte) ((kmer.getBlockBytes()[0 + kmer.getKmerOffset()] >> 6) & 0x3);
byteInMergedKmer++;
}
for (int i = 0; i < kmer.getKmerByteLength() - 1; i++, byteInMergedKmer++) {
- this.kmer.getBytes()[byteInMergedKmer + kmer.getKmerOffset()] = (byte) ((kmer.getBytes()[i + kmer.getKmerOffset()] << 2) | ((kmer.getBytes()[i + kmer.getKmerOffset() + 1] >> 6) & 0x3));
+ this.kmer.getBlockBytes()[byteInMergedKmer + kmer.getKmerOffset()] = (byte) ((kmer.getBlockBytes()[i + kmer.getKmerOffset()] << 2) | ((kmer.getBlockBytes()[i + kmer.getKmerOffset() + 1] >> 6) & 0x3));
}
- this.kmer.getBytes()[byteInMergedKmer + kmer.getKmerOffset()] = (byte) ((kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - 1] << 2) | (preCode & 0x3));
+ this.kmer.getBlockBytes()[byteInMergedKmer + kmer.getKmerOffset()] = (byte) ((kmer.getBlockBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - 1] << 2) | (preCode & 0x3));
this.kmer.clearLeadBit();
return this.kmer;
}
@@ -217,24 +217,24 @@
kmer.reset(preKmer.getKmerLetterLength() + nextKmer.getKmerLetterLength());
int i = 1;
for (; i <= preKmer.getKmerByteLength(); i++) {
- kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] = preKmer.getBytes()[preKmer.getKmerOffset() + preKmer.getKmerByteLength() - i];
+ kmer.getBlockBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] = preKmer.getBlockBytes()[preKmer.getKmerOffset() + preKmer.getKmerByteLength() - i];
}
if (i > 1) {
i--;
}
if (preKmer.getKmerLetterLength() % 4 == 0) {
for (int j = 1; j <= nextKmer.getKmerByteLength(); j++) {
- kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i - j] = nextKmer.getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j];
+ kmer.getBlockBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i - j] = nextKmer.getBlockBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j];
}
} else {
int posNeedToMove = ((preKmer.getKmerLetterLength() % 4) << 1);
- kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] |= nextKmer.getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - 1] << posNeedToMove;
+ kmer.getBlockBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i] |= nextKmer.getBlockBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - 1] << posNeedToMove;
for (int j = 1; j < nextKmer.getKmerByteLength(); j++) {
- kmer.getBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i - j] = (byte) (((nextKmer.getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j] & 0xff) >> (8 - posNeedToMove)) | (nextKmer
- .getBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j - 1] << posNeedToMove));
+ kmer.getBlockBytes()[kmer.getKmerOffset() + kmer.getKmerByteLength() - i - j] = (byte) (((nextKmer.getBlockBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j] & 0xff) >> (8 - posNeedToMove)) | (nextKmer
+ .getBlockBytes()[nextKmer.getKmerOffset() + nextKmer.getKmerByteLength() - j - 1] << posNeedToMove));
}
if (nextKmer.getKmerLetterLength() % 4 == 0 || (nextKmer.getKmerLetterLength() % 4) * 2 + posNeedToMove > 8) {
- kmer.getBytes()[0 + kmer.getKmerOffset()] = (byte) ((0xff & nextKmer.getBytes()[0 + nextKmer.getKmerOffset()]) >> (8 - posNeedToMove));
+ kmer.getBlockBytes()[0 + kmer.getKmerOffset()] = (byte) ((0xff & nextKmer.getBlockBytes()[0 + nextKmer.getKmerOffset()]) >> (8 - posNeedToMove));
}
}
kmer.clearLeadBit();
@@ -290,14 +290,14 @@
int curPosAtReverse = 0;
int curByteAtReverse = this.kmer.getKmerByteLength() - 1;
- this.kmer.getBytes()[curByteAtReverse + this.kmer.getKmerOffset()] = 0;
+ this.kmer.getBlockBytes()[curByteAtReverse + this.kmer.getKmerOffset()] = 0;
for (int i = 0; i < kmer.getKmerLetterLength(); i++) {
- byte gene = (byte) ((kmer.getBytes()[curByteAtKmer + kmer.getKmerOffset()] >> curPosAtKmer) & 0x03);
- this.kmer.getBytes()[curByteAtReverse + this.kmer.getKmerOffset()] |= gene << curPosAtReverse;
+ byte gene = (byte) ((kmer.getBlockBytes()[curByteAtKmer + kmer.getKmerOffset()] >> curPosAtKmer) & 0x03);
+ this.kmer.getBlockBytes()[curByteAtReverse + this.kmer.getKmerOffset()] |= gene << curPosAtReverse;
curPosAtReverse += 2;
if (curPosAtReverse >= 8) {
curPosAtReverse = 0;
- this.kmer.getBytes()[--curByteAtReverse + this.kmer.getKmerOffset()] = 0;
+ this.kmer.getBlockBytes()[--curByteAtReverse + this.kmer.getKmerOffset()] = 0;
}
curPosAtKmer -= 2;
if (curPosAtKmer < 0) {
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index dd01806..bb6622a 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -33,21 +33,18 @@
public static final NodeWritable EMPTY_NODE = new NodeWritable();
private static final int SIZE_FLOAT = 4;
-
- private EdgeListWritable[] edges = {null, null, null, null};
-
- private PositionListWritable startReads; // first internalKmer in read
- private PositionListWritable endReads; //first internalKmer in read (but internalKmer was flipped)
-
+
+ private EdgeListWritable[] edges = { null, null, null, null };
+
+ private PositionListWritable startReads; // first internalKmer in read
+ private PositionListWritable endReads; // first internalKmer in read (but
+ // internalKmer was flipped)
+
private VKmerBytesWritable internalKmer;
private float averageCoverage;
// merge/update directions
-
- // merge/update directions
-
- // merge/update directions
public static class DirectionFlag {
public static final byte DIR_FF = 0b00 << 0;
public static final byte DIR_FR = 0b01 << 0;
@@ -55,35 +52,40 @@
public static final byte DIR_RR = 0b11 << 0;
public static final byte DIR_MASK = 0b11 << 0;
public static final byte DIR_CLEAR = 0b1111100 << 0;
-
- public static final byte[] values = {DIR_FF, DIR_FR, DIR_RF, DIR_RR};
+
+ public static final byte[] values = { DIR_FF, DIR_FR, DIR_RF, DIR_RR };
}
-
+
public NodeWritable() {
- for (byte d: DirectionFlag.values) {
+ for (byte d : DirectionFlag.values) {
edges[d] = new EdgeListWritable();
}
startReads = new PositionListWritable();
endReads = new PositionListWritable();
- internalKmer = new VKmerBytesWritable(); // in graph construction - not set kmerlength Optimization: VKmer
+ internalKmer = new VKmerBytesWritable(); // in graph construction - not
+ // set kmerlength
+ // Optimization: VKmer
averageCoverage = 0;
}
-
- public NodeWritable(EdgeListWritable[] edges,
- PositionListWritable startReads, PositionListWritable endReads,
+
+ public NodeWritable(EdgeListWritable[] edges, PositionListWritable startReads, PositionListWritable endReads,
VKmerBytesWritable kmer, float coverage) {
this();
setAsCopy(edges, startReads, endReads, kmer, coverage);
}
-
- public void setAsCopy(NodeWritable node){
+
+ public NodeWritable(byte[] data, int offset) {
+ this();
+ setAsReference(data, offset);
+ }
+
+ public void setAsCopy(NodeWritable node) {
setAsCopy(node.edges, node.startReads, node.endReads, node.internalKmer, node.averageCoverage);
}
-
- public void setAsCopy(EdgeListWritable[] edges,
- PositionListWritable startReads, PositionListWritable endReads,
+
+ public void setAsCopy(EdgeListWritable[] edges, PositionListWritable startReads, PositionListWritable endReads,
VKmerBytesWritable kmer2, float coverage) {
- for (byte d: DirectionFlag.values) {
+ for (byte d : DirectionFlag.values) {
this.edges[d].setAsCopy(edges[d]);
}
this.startReads.set(startReads);
@@ -93,7 +95,7 @@
}
public void reset() {
- for (byte d: DirectionFlag.values) {
+ for (byte d : DirectionFlag.values) {
edges[d].reset();
}
startReads.reset();
@@ -101,7 +103,7 @@
internalKmer.reset(0);
averageCoverage = 0;
}
-
+
public VKmerBytesWritable getInternalKmer() {
return internalKmer;
}
@@ -113,45 +115,48 @@
public int getKmerLength() {
return internalKmer.getKmerLetterLength();
}
-
+
public EdgeListWritable getEdgeList(byte dir) {
return edges[dir & DirectionFlag.DIR_MASK];
}
-
+
public void setEdgeList(byte dir, EdgeListWritable edgeList) {
this.edges[dir & DirectionFlag.DIR_MASK].setAsCopy(edgeList);
}
-
- /**
- * Update my coverage to be the average of this and other. Used when merging paths.
- */
- public void mergeCoverage(NodeWritable other) {
- // sequence considered in the average doesn't include anything overlapping with other kmers
- float adjustedLength = internalKmer.getKmerLetterLength() + other.internalKmer.getKmerLetterLength() - (KmerBytesWritable.getKmerLength() - 1) * 2;
-
- float myCount = (internalKmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1) * averageCoverage;
- float otherCount = (other.internalKmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1) * other.averageCoverage;
- averageCoverage = (myCount + otherCount) / adjustedLength;
- }
-
- /**
- * Update my coverage as if all the reads in other became my own
- */
- public void addCoverage(NodeWritable other) {
- float myAdjustedLength = internalKmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1;
- float otherAdjustedLength = other.internalKmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1;
- averageCoverage += other.averageCoverage * (otherAdjustedLength / myAdjustedLength);
- }
-
- public void setAvgCoverage(float coverage) {
- averageCoverage = coverage;
- }
-
- public float getAvgCoverage() {
- return averageCoverage;
- }
-
- public PositionListWritable getStartReads() {
+
+ /**
+ * Update my coverage to be the average of this and other. Used when merging
+ * paths.
+ */
+ public void mergeCoverage(NodeWritable other) {
+ // sequence considered in the average doesn't include anything
+ // overlapping with other kmers
+ float adjustedLength = internalKmer.getKmerLetterLength() + other.internalKmer.getKmerLetterLength()
+ - (KmerBytesWritable.getKmerLength() - 1) * 2;
+
+ float myCount = (internalKmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() + 1) * averageCoverage;
+ float otherCount = (other.internalKmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() + 1) * other.averageCoverage;
+ averageCoverage = (myCount + otherCount) / adjustedLength;
+ }
+
+ /**
+ * Update my coverage as if all the reads in other became my own
+ */
+ public void addCoverage(NodeWritable other) {
+ float myAdjustedLength = internalKmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1;
+ float otherAdjustedLength = other.internalKmer.getKmerLetterLength() - KmerBytesWritable.getKmerLength() - 1;
+ averageCoverage += other.averageCoverage * (otherAdjustedLength / myAdjustedLength);
+ }
+
+ public void setAvgCoverage(float coverage) {
+ averageCoverage = coverage;
+ }
+
+ public float getAvgCoverage() {
+ return averageCoverage;
+ }
+
+ public PositionListWritable getStartReads() {
return startReads;
}
@@ -168,22 +173,22 @@
}
/**
- * Returns the length of the byte-array version of this node
- */
- public int getSerializedLength() {
- int length = 0;
- for (byte d:DirectionFlag.values) {
- length += edges[d].getLength();
- }
- length += internalKmer.getLength();
- length += this.startReads.getLength();
- length += this.endReads.getLength();
- length += SIZE_FLOAT;
- return length;
- }
-
- /**
- * Return this Node's representation as a new byte array
+ * Returns the length of the byte-array version of this node
+ */
+ public int getSerializedLength() {
+ int length = 0;
+ for (byte d : DirectionFlag.values) {
+ length += edges[d].getLength();
+ }
+ length += startReads.getLength();
+ length += endReads.getLength();
+ length += internalKmer.getLength();
+ length += SIZE_FLOAT; // avgCoverage
+ return length;
+ }
+
+ /**
+ * Return this Node's representation as a new byte array
*/
public byte[] marshalToByteArray() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream(getSerializedLength());
@@ -194,7 +199,7 @@
public void setAsCopy(byte[] data, int offset) {
int curOffset = offset;
- for (byte d:DirectionFlag.values) {
+ for (byte d : DirectionFlag.values) {
edges[d].setAsCopy(data, curOffset);
curOffset += edges[d].getLength();
}
@@ -209,7 +214,7 @@
public void setAsReference(byte[] data, int offset) {
int curOffset = offset;
- for (byte d:DirectionFlag.values) {
+ for (byte d : DirectionFlag.values) {
edges[d].setAsReference(data, curOffset);
curOffset += edges[d].getLength();
}
@@ -217,15 +222,15 @@
curOffset += startReads.getLength();
endReads.setNewReference(data, curOffset);
curOffset += endReads.getLength();
-
- internalKmer.setAsReference(data, curOffset);
+
+ internalKmer.setAsReference(data, curOffset);
curOffset += internalKmer.getLength();
averageCoverage = Marshal.getFloat(data, curOffset);
}
@Override
public void write(DataOutput out) throws IOException {
- for (byte d:DirectionFlag.values) {
+ for (byte d : DirectionFlag.values) {
edges[d].write(out);
}
startReads.write(out);
@@ -237,7 +242,7 @@
@Override
public void readFields(DataInput in) throws IOException {
reset();
- for (byte d:DirectionFlag.values) {
+ for (byte d : DirectionFlag.values) {
edges[d].readFields(in);
}
startReads.readFields(in);
@@ -265,51 +270,153 @@
@Override
public boolean equals(Object o) {
- if (! (o instanceof NodeWritable))
+ if (!(o instanceof NodeWritable))
return false;
-
+
NodeWritable nw = (NodeWritable) o;
- for (byte d:DirectionFlag.values) {
+ for (byte d : DirectionFlag.values) {
if (!edges[d].equals(nw.edges[d]))
return false;
}
- return averageCoverage == nw.averageCoverage && internalKmer.equals(nw.internalKmer);
+
+ return (averageCoverage == nw.averageCoverage && startReads.equals(nw.startReads) &&
+ endReads.equals(nw.endReads) && internalKmer.equals(nw.internalKmer));
}
@Override
public String toString() {
StringBuilder sbuilder = new StringBuilder();
sbuilder.append('{');
- for (byte d: DirectionFlag.values) {
+ for (byte d : DirectionFlag.values) {
sbuilder.append(edges[d].toString()).append('\t');
}
+ sbuilder.append("{5':" + startReads.toString() + ", ~5':" + endReads.toString() + "}");
sbuilder.append(internalKmer.toString()).append('\t');
sbuilder.append(averageCoverage).append('x').append('}');
return sbuilder.toString();
}
/**
- * merge this node with another node. If a flip is necessary, `other` will flip.
+ * merge this node with another node. If a flip is necessary, `other` will flip.
* According to `dir`:
- * * kmers are concatenated
- * * coverage becomes a weighted average
- * * startReads and endReads are merged
- * * my edges are replaced with some subset of `other`'s edges
- *
- * Raises an error when:
- * 1) non-overlapping kmers
- * 2) `other` has degree > 1 towards me
+ * 1) kmers are concatenated/prepended/flipped
+ * 2) coverage becomes a weighted average of the two spans
+ * 3) startReads and endReads are merged and possibly flipped
+ * 4) my edges are replaced with some subset of `other`'s edges
*
- * @param dir: one of the DirectionFlag.DIR_*
- * @param other: the node to merge with. I should have a `dir` edge towards `other`
+ * An error is raised when:
+ * 1) non-overlapping kmers // TODO
+ * 2) `other` has degree > 1 towards me
+ *
+ * @param dir
+ * : one of the DirectionFlag.DIR_*
+ * @param other
+ * : the node to merge with. I should have a `dir` edge towards `other`
*/
public void mergeWithNode(byte dir, final NodeWritable other) {
- switch(dir & DirectionFlag.DIR_MASK) {
- case DIR_FF:
-
+ mergeEdges(dir, other);
+ mergeStartAndEndReadIDs(dir, other);
+ mergeCoverage(other);
+ internalKmer.mergeWithKmerInDir(dir, KmerBytesWritable.lettersInKmer, other.internalKmer);
+ }
+
+ /**
+ * merge my edge list (both kmers and readIDs) with those of `other`. Assumes that `other` is doing the flipping, if any.
+ */
+ private void mergeEdges(byte dir, NodeWritable other) {
+ switch (dir & DirectionFlag.DIR_MASK) {
+ case DirectionFlag.DIR_FF:
+ if (outDegree() > 1)
+ throw new IllegalArgumentException("Illegal FF merge attempted! My outgoing degree is " + outDegree() + " in " + toString());
+ if (other.inDegree() > 1)
+ throw new IllegalArgumentException("Illegal FF merge attempted! Other incoming degree is " + other.inDegree() + " in " + other.toString());
+ edges[DirectionFlag.DIR_FF].setAsCopy(other.edges[DirectionFlag.DIR_FF]);
+ edges[DirectionFlag.DIR_FR].setAsCopy(other.edges[DirectionFlag.DIR_FR]);
+ break;
+ case DirectionFlag.DIR_FR:
+ if (outDegree() > 1)
+ throw new IllegalArgumentException("Illegal FR merge attempted! My outgoing degree is " + outDegree() + " in " + toString());
+ if (other.outDegree() > 1)
+ throw new IllegalArgumentException("Illegal FR merge attempted! Other outgoing degree is " + other.outDegree() + " in " + other.toString());
+ edges[DirectionFlag.DIR_FF].setAsCopy(other.edges[DirectionFlag.DIR_RF]);
+ edges[DirectionFlag.DIR_FR].setAsCopy(other.edges[DirectionFlag.DIR_RR]);
+ break;
+ case DirectionFlag.DIR_RF:
+ if (inDegree() > 1)
+ throw new IllegalArgumentException("Illegal RF merge attempted! My incoming degree is " + inDegree() + " in " + toString());
+ if (other.inDegree() > 1)
+ throw new IllegalArgumentException("Illegal RF merge attempted! Other incoming degree is " + other.inDegree() + " in " + other.toString());
+ edges[DirectionFlag.DIR_RF].setAsCopy(other.edges[DirectionFlag.DIR_FF]);
+ edges[DirectionFlag.DIR_RR].setAsCopy(other.edges[DirectionFlag.DIR_FR]);
+ break;
+ case DirectionFlag.DIR_RR:
+ if (inDegree() > 1)
+ throw new IllegalArgumentException("Illegal RR merge attempted! My incoming degree is " + inDegree() + " in " + toString());
+ if (other.outDegree() > 1)
+ throw new IllegalArgumentException("Illegal RR merge attempted! Other outgoing degree is " + other.outDegree() + " in " + other.toString());
+ edges[DirectionFlag.DIR_RF].setAsCopy(other.edges[DirectionFlag.DIR_RF]);
+ edges[DirectionFlag.DIR_RR].setAsCopy(other.edges[DirectionFlag.DIR_RR]);
+ break;
}
}
-
+
+ private void mergeStartAndEndReadIDs(byte dir, NodeWritable other) {
+ int K = KmerBytesWritable.lettersInKmer;
+ int otherLength = other.internalKmer.lettersInKmer;
+ int thisLength = internalKmer.lettersInKmer;
+ int newOtherOffset, newThisOffset;
+ switch (dir & DirectionFlag.DIR_MASK) {
+ case DirectionFlag.DIR_FF:
+ newOtherOffset = thisLength - K + 1;
+ // stream theirs in with my offset
+ for (PositionWritable p : other.startReads) {
+ startReads.append(p.getMateId(), p.getReadId(), newOtherOffset + p.getPosId());
+ }
+ for (PositionWritable p : other.endReads) {
+ endReads.append(p.getMateId(), p.getReadId(), newOtherOffset + p.getPosId());
+ }
+ break;
+ case DirectionFlag.DIR_FR:
+ newOtherOffset = thisLength - K + 1 + otherLength - K;
+ // stream theirs in, offset and flipped
+ for (PositionWritable p : other.startReads) {
+ endReads.append(p.getMateId(), p.getReadId(), newOtherOffset + p.getPosId());
+ }
+ for (PositionWritable p : other.endReads) {
+ startReads.append(p.getMateId(), p.getReadId(), newOtherOffset + p.getPosId());
+ }
+ break;
+ case DirectionFlag.DIR_RF:
+ newThisOffset = otherLength - K + 1;
+ newOtherOffset = otherLength - K;
+ // shift my offsets (other is prepended)
+ for (PositionWritable p : startReads) {
+ p.set(p.getMateId(), p.getReadId(), newThisOffset + p.getPosId());
+ }
+ for (PositionWritable p : other.endReads) {
+ p.set(p.getMateId(), p.getReadId(), newThisOffset + p.getPosId());
+ }
+ //stream theirs in, not offset (they are first now) but flipped
+ for (PositionWritable p : other.startReads) {
+ endReads.append(p.getMateId(), p.getReadId(), newOtherOffset + p.getPosId());
+ }
+ for (PositionWritable p : other.endReads) {
+ startReads.append(p.getMateId(), p.getReadId(), newOtherOffset + p.getPosId());
+ }
+ break;
+ case DirectionFlag.DIR_RR:
+ newThisOffset = otherLength - K + 1;
+ // shift my offsets (other is prepended)
+ for (PositionWritable p : startReads) {
+ p.set(p.getMateId(), p.getReadId(), newThisOffset + p.getPosId());
+ }
+ for (PositionWritable p : other.endReads) {
+ p.set(p.getMateId(), p.getReadId(), newThisOffset + p.getPosId());
+ }
+ break;
+ }
+ }
+
public int inDegree() {
return edges[DirectionFlag.DIR_RR].getCountOfPosition() + edges[DirectionFlag.DIR_RF].getCountOfPosition();
}
@@ -319,7 +426,8 @@
}
/*
- * Return if this node is a "path" compressible node, that is, it has an in-degree and out-degree of 1
+ * Return if this node is a "path" compressible node, that is, it has an
+ * in-degree and out-degree of 1
*/
public boolean isPathNode() {
return inDegree() == 1 && outDegree() == 1;
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
index 4449280..fce0360 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerBytesWritable.java
@@ -19,6 +19,7 @@
import java.io.DataOutput;
import java.io.IOException;
import java.io.Serializable;
+import java.nio.ByteBuffer;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.WritableComparable;
@@ -34,7 +35,7 @@
* Note: `offset` as used in this class is the offset at which the *kmer*
* begins. There is a {@value HEADER_SIZE}-byte header preceding the kmer
*/
-public class VKmerBytesWritable extends BinaryComparable implements Serializable, WritableComparable<BinaryComparable> {
+public class VKmerBytesWritable extends BinaryComparable implements Serializable, WritableComparable<BinaryComparable> {
private static final long serialVersionUID = 1L;
protected static final byte[] EMPTY_BYTES = { 0, 0, 0, 0 }; // int indicating 0 length
protected static final int HEADER_SIZE = 4; // number of bytes for header info
@@ -289,6 +290,10 @@
@Override
public byte[] getBytes() {
+ return ByteBuffer.wrap(bytes, getBlockOffset(), getLength()).array();
+ }
+
+ public byte[] getBlockBytes() {
return bytes;
}
@@ -309,7 +314,7 @@
/**
* Return the number of bytes used by both header and kmer chain
*/
- @Override
+// @Override
public int getLength() {
return bytesUsed + HEADER_SIZE;
}
@@ -730,4 +735,10 @@
return fracDissimilar(this, other);
}
+ @Override
+ public int compareTo(BinaryComparable o) {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerListWritable.java
index 80353c1..84816f4 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/VKmerListWritable.java
@@ -55,7 +55,7 @@
public void append(VKmerBytesWritable kmer) {
setSize(getLength() + kmer.getLength());
- System.arraycopy(kmer.getBytes(), kmer.kmerStartOffset - VKmerBytesWritable.HEADER_SIZE, storage, offset
+ System.arraycopy(kmer.getBlockBytes(), kmer.kmerStartOffset - VKmerBytesWritable.HEADER_SIZE, storage, offset
+ getLength(), kmer.getLength());
valueCount += 1;
Marshal.putInt(valueCount, storage, offset);
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java.orig b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java.orig
deleted file mode 100644
index 8a0cb6d..0000000
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java.orig
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.data.test;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-import junit.framework.Assert;
-
-import org.junit.Test;
-
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-public class KmerBytesWritableTest {
- static byte[] array = { 'A', 'A', 'T', 'A', 'G', 'A', 'A', 'G' };
- static int k = 7;
-
- @Test
- public void TestCompressKmer() {
- KmerBytesWritable.setGlobalKmerLength(k);
- KmerBytesWritable kmer = new KmerBytesWritable();
- kmer.setByRead(array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
-
- kmer.setByRead(array, 1);
- Assert.assertEquals(kmer.toString(), "ATAGAAG");
- }
-
- @Test
- public void TestMoveKmer() {
- KmerBytesWritable.setGlobalKmerLength(k);
- KmerBytesWritable kmer = new KmerBytesWritable();
- kmer.setByRead(array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
-
- for (int i = k; i < array.length - 1; i++) {
- kmer.shiftKmerWithNextCode(array[i]);
- Assert.assertTrue(false);
- }
-
- byte out = kmer.shiftKmerWithNextChar(array[array.length - 1]);
- Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
- Assert.assertEquals(kmer.toString(), "ATAGAAG");
- }
-
- @Test
- public void TestCompressKmerReverse() {
- KmerBytesWritable.setGlobalKmerLength(k);
- KmerBytesWritable kmer = new KmerBytesWritable();
- kmer.setByRead(array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
-
- kmer.setByReadReverse(array, 1);
- Assert.assertEquals(kmer.toString(), "CTTCTAT");
- }
-
- @Test
- public void TestMoveKmerReverse() {
- KmerBytesWritable.setGlobalKmerLength(k);
- KmerBytesWritable kmer = new KmerBytesWritable();
- kmer.setByRead(array, 0);
- Assert.assertEquals(kmer.toString(), "AATAGAA");
-
- for (int i = k; i < array.length - 1; i++) {
- kmer.shiftKmerWithPreChar(array[i]);
- Assert.assertTrue(false);
- }
-
- byte out = kmer.shiftKmerWithPreChar(array[array.length - 1]);
- Assert.assertEquals(out, GeneCode.getCodeFromSymbol((byte) 'A'));
- Assert.assertEquals(kmer.toString(), "GAATAGA");
- }
-
- @Test
- public void TestGetGene() {
- KmerBytesWritable.setGlobalKmerLength(9);
- KmerBytesWritable kmer = new KmerBytesWritable();
- String text = "AGCTGACCG";
- byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G' };
- kmer.setByRead(array, 0);
-
- for (int i = 0; i < 9; i++) {
- Assert.assertEquals(text.charAt(i), (char) (GeneCode.getSymbolFromCode(kmer.getGeneCodeAtPosition(i))));
- }
- }
-
- @Test
- public void TestGetOneByteFromKmer() {
- byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
- String string = "AGCTGACCGT";
- for (int k = 3; k <= 10; k++) {
- KmerBytesWritable.setGlobalKmerLength(k);
- KmerBytesWritable kmer = new KmerBytesWritable();
- KmerBytesWritable kmerAppend = new KmerBytesWritable();
- kmer.setByRead(array, 0);
- Assert.assertEquals(string.substring(0, k), kmer.toString());
- for (int b = 0; b < k; b++) {
- byte byteActual = KmerBytesWritable.getOneByteFromKmerAtPosition(b, kmer.getBytes(), kmer.getOffset(),
- kmer.getLength());
- byte byteExpect = GeneCode.getCodeFromSymbol(array[b]);
- for (int i = 1; i < 4 && b + i < k; i++) {
- byteExpect += GeneCode.getCodeFromSymbol(array[b + i]) << (i * 2);
- }
- Assert.assertEquals(byteActual, byteExpect);
- KmerBytesWritable.appendOneByteAtPosition(b, byteActual, kmerAppend.getBytes(), kmerAppend.getOffset(),
- kmerAppend.getLength());
- }
- Assert.assertEquals(kmer.toString(), kmerAppend.toString());
- }
- }
-<<<<<<< HEAD
-=======
-
- @Test
- public void TestMergeFFKmer() {
- byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
- String text = "AGCTGACCGT";
- KmerBytesWritable kmer1 = new KmerBytesWritable(8);
- kmer1.setByRead(array, 0);
- String text1 = "AGCTGACC";
- KmerBytesWritable kmer2 = new KmerBytesWritable(8);
- kmer2.setByRead(array, 1);
- String text2 = "GCTGACCG";
- Assert.assertEquals(text2, kmer2.toString());
- KmerBytesWritable merge = new KmerBytesWritable(kmer1);
- int kmerSize = 8;
- merge.mergeWithFFKmer(kmerSize, kmer2);
- Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
-
- for (int i = 1; i < 8; i++) {
- merge.set(kmer1);
- merge.mergeWithFFKmer(i, kmer2);
- Assert.assertEquals(text1 + text2.substring(i - 1), merge.toString());
- }
-
- for (int ik = 1; ik <= 10; ik++) {
- for (int jk = 1; jk <= 10; jk++) {
- kmer1 = new KmerBytesWritable(ik);
- kmer2 = new KmerBytesWritable(jk);
- kmer1.setByRead(array, 0);
- kmer2.setByRead(array, 0);
- text1 = text.substring(0, ik);
- text2 = text.substring(0, jk);
- Assert.assertEquals(text1, kmer1.toString());
- Assert.assertEquals(text2, kmer2.toString());
- for (int x = 1; x < jk; x++) {
- merge.set(kmer1);
- merge.mergeWithFFKmer(x, kmer2);
- Assert.assertEquals(text1 + text2.substring(x - 1), merge.toString());
- }
- }
- }
- }
-
- @Test
- public void TestMergeFRKmer() {
- int kmerSize = 3;
- String result = "AAGCTAACAACC";
- byte[] resultArray = result.getBytes();
-
- String text1 = "AAGCTAA";
- KmerBytesWritable kmer1 = new KmerBytesWritable(text1.length());
- kmer1.setByRead(resultArray, 0);
- Assert.assertEquals(text1, kmer1.toString());
-
- // kmer2 is the rc of the end of the read
- String text2 = "GGTTGTT";
- KmerBytesWritable kmer2 = new KmerBytesWritable(text2.length());
- kmer2.setByReadReverse(resultArray, result.length() - text2.length());
- Assert.assertEquals(text2, kmer2.toString());
-
- KmerBytesWritable merge = new KmerBytesWritable(kmer1);
- merge.mergeWithFRKmer(kmerSize, kmer2);
- Assert.assertEquals(result, merge.toString());
-
- int i = 1;
- merge.set(kmer1);
- merge.mergeWithFRKmer(i, kmer2);
- Assert.assertEquals("AAGCTAAAACAACC", merge.toString());
-
- i = 2;
- merge.set(kmer1);
- merge.mergeWithFRKmer(i, kmer2);
- Assert.assertEquals("AAGCTAAACAACC", merge.toString());
-
- i = 3;
- merge.set(kmer1);
- merge.mergeWithFRKmer(i, kmer2);
- Assert.assertEquals("AAGCTAACAACC", merge.toString());
- }
-
-
- @Test
- public void TestMergeRFKmer() {
- int kmerSize = 3;
- String result = "GGCACAACAACCC";
- byte[] resultArray = result.getBytes();
-
- String text1 = "AACAACCC";
- KmerBytesWritable kmer1 = new KmerBytesWritable(text1.length());
- kmer1.setByRead(resultArray, 5);
- Assert.assertEquals(text1, kmer1.toString());
-
- // kmer2 is the rc of the end of the read
- String text2 = "TTGTGCC";
- KmerBytesWritable kmer2 = new KmerBytesWritable(text2.length());
- kmer2.setByReadReverse(resultArray, 0);
- Assert.assertEquals(text2, kmer2.toString());
-
- KmerBytesWritable merge = new KmerBytesWritable(kmer1);
- merge.mergeWithRFKmer(kmerSize, kmer2);
- Assert.assertEquals(result, merge.toString());
-
- int i = 1;
- merge.set(kmer1);
- merge.mergeWithRFKmer(i, kmer2);
- Assert.assertEquals("GGCACAAAACAACCC", merge.toString());
-
- i = 2;
- merge.set(kmer1);
- merge.mergeWithRFKmer(i, kmer2);
- Assert.assertEquals("GGCACAAACAACCC", merge.toString());
-
- i = 3;
- merge.set(kmer1);
- merge.mergeWithRFKmer(i, kmer2);
- Assert.assertEquals("GGCACAACAACCC", merge.toString());
-
- String test1;
- String test2;
- test1 = "CTA";
- test2 = "AGA";
- KmerBytesWritable k1 = new KmerBytesWritable(3);
- KmerBytesWritable k2 = new KmerBytesWritable(3);
- k1.setByRead(test1.getBytes(), 0);
- k2.setByRead(test2.getBytes(), 0);
- k1.mergeWithRFKmer(3, k2);
- Assert.assertEquals("TCTA", k1.toString());
-
- test1 = "CTA";
- test2 = "ATA"; //TAT
- k1 = new KmerBytesWritable(3);
- k2 = new KmerBytesWritable(3);
- k1.setByRead(test1.getBytes(), 0);
- k2.setByRead(test2.getBytes(), 0);
- k1.mergeWithFRKmer(3, k2);
- Assert.assertEquals("CTAT", k1.toString());
-
- test1 = "ATA";
- test2 = "CTA"; //TAT
- k1 = new KmerBytesWritable(3);
- k2 = new KmerBytesWritable(3);
- k1.setByRead(test1.getBytes(), 0);
- k2.setByRead(test2.getBytes(), 0);
- k1.mergeWithFRKmer(3, k2);
- Assert.assertEquals("ATAG", k1.toString());
-
- test1 = "TCTAT";
- test2 = "GAAC";
- k1 = new KmerBytesWritable(5);
- k2 = new KmerBytesWritable(4);
- k1.setByRead(test1.getBytes(), 0);
- k2.setByRead(test2.getBytes(), 0);
- k1.mergeWithRFKmer(3, k2);
- Assert.assertEquals("GTTCTAT", k1.toString());
- }
-
-
-
- @Test
- public void TestMergeRRKmer() {
- byte[] array = { 'A', 'G', 'C', 'T', 'G', 'A', 'C', 'C', 'G', 'T' };
- String text = "AGCTGACCGT";
- KmerBytesWritable kmer1 = new KmerBytesWritable(8);
- kmer1.setByRead(array, 0);
- String text1 = "AGCTGACC";
- KmerBytesWritable kmer2 = new KmerBytesWritable(8);
- kmer2.setByRead(array, 1);
- String text2 = "GCTGACCG";
- Assert.assertEquals(text2, kmer2.toString());
- KmerBytesWritable merge = new KmerBytesWritable(kmer2);
- int kmerSize = 8;
- merge.mergeWithRRKmer(kmerSize, kmer1);
- Assert.assertEquals(text1 + text2.substring(kmerSize - 1), merge.toString());
-
- for (int i = 1; i < 8; i++) {
- merge.set(kmer2);
- merge.mergeWithRRKmer(i, kmer1);
- Assert.assertEquals(text1.substring(0, text1.length() - i + 1) + text2, merge.toString());
- }
-
- for (int ik = 1; ik <= 10; ik++) {
- for (int jk = 1; jk <= 10; jk++) {
- kmer1 = new KmerBytesWritable(ik);
- kmer2 = new KmerBytesWritable(jk);
- kmer1.setByRead(array, 0);
- kmer2.setByRead(array, 0);
- text1 = text.substring(0, ik);
- text2 = text.substring(0, jk);
- Assert.assertEquals(text1, kmer1.toString());
- Assert.assertEquals(text2, kmer2.toString());
- for (int x = 1; x < ik; x++) {
- merge.set(kmer2);
- merge.mergeWithRRKmer(x, kmer1);
- Assert.assertEquals(text1.substring(0, text1.length() - x + 1) + text2, merge.toString());
- }
- }
- }
- }
-
- @Test
- public void TestFinalMerge() {
- String selfString;
- String match;
- String msgString;
- int index;
- KmerBytesWritable kmer = new KmerBytesWritable();
- int kmerSize = 3;
-
- String F1 = "AATAG";
- String F2 = "TAGAA";
- String R1 = "CTATT";
- String R2 = "TTCTA";
-
- //FF test
- selfString = F1;
- match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
- msgString = F2;
- index = msgString.indexOf(match);
- kmer.reset(msgString.length() - index);
- kmer.setByRead(msgString.substring(index).getBytes(), 0);
- System.out.println(kmer.toString());
-
- //FR test
- selfString = F1;
- match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
- msgString = GeneCode.reverseComplement(R2);
- index = msgString.indexOf(match);
- kmer.reset(msgString.length() - index);
- kmer.setByRead(msgString.substring(index).getBytes(), 0);
- System.out.println(kmer.toString());
-
- //RF test
- selfString = R1;
- match = selfString.substring(0,kmerSize - 1);
- msgString = GeneCode.reverseComplement(F2);
- index = msgString.lastIndexOf(match) + kmerSize - 2;
- kmer.reset(index + 1);
- kmer.setByReadReverse(msgString.substring(0, index + 1).getBytes(), 0);
- System.out.println(kmer.toString());
-
- //RR test
- selfString = R1;
- match = selfString.substring(0,kmerSize - 1);
- msgString = R2;
- index = msgString.lastIndexOf(match) + kmerSize - 2;
- kmer.reset(index + 1);
- kmer.setByRead(msgString.substring(0, index + 1).getBytes(), 0);
- System.out.println(kmer.toString());
-
- String[][] connectedTable = new String[][]{
- {"FF", "RF"},
- {"FF", "RR"},
- {"FR", "RF"},
- {"FR", "RR"}
- };
- System.out.println(connectedTable[0][1]);
-
- Set<Long> s1 = new HashSet<Long>();
- Set<Long> s2 = new HashSet<Long>();
- s1.add((long) 1);
- s1.add((long) 2);
- s2.add((long) 2);
- s2.add((long) 3);
- Set<Long> intersection = new HashSet<Long>();
- intersection.addAll(s1);
- intersection.retainAll(s2);
- System.out.println(intersection.toString());
- Set<Long> difference = new HashSet<Long>();
- difference.addAll(s1);
- difference.removeAll(s2);
- System.out.println(difference.toString());
-
- Map<KmerBytesWritable, Set<Long>> map = new HashMap<KmerBytesWritable, Set<Long>>();
- KmerBytesWritable k1 = new KmerBytesWritable(3);
- Set<Long> set1 = new HashSet<Long>();
- k1.setByRead(("CTA").getBytes(), 0);
- set1.add((long)1);
- map.put(k1, set1);
- KmerBytesWritable k2 = new KmerBytesWritable(3);
- k2.setByRead(("GTA").getBytes(), 0);
- Set<Long> set2 = new HashSet<Long>();
- set2.add((long) 2);
- map.put(k2, set2);
- KmerBytesWritable k3 = new KmerBytesWritable(3);
- k3.setByRead(("ATG").getBytes(), 0);
- Set<Long> set3 = new HashSet<Long>();
- set3.add((long) 3);
- map.put(k3, set3);
- KmerBytesWritable k4 = new KmerBytesWritable(3);
- k4.setByRead(("AAT").getBytes(), 0);
- Set<Long> set4 = new HashSet<Long>();
- set4.add((long) 4);
- map.put(k4, set4);
- System.out.println("CTA = " + map.get(k1).toString());
- System.out.println("GTA = " + map.get(k2).toString());
- System.out.println("ATG = " + map.get(k3).toString());
- System.out.println("AAT = " + map.get(k4).toString());
- }
->>>>>>> 94e075b5c3db9aa613ef61c2581430a143b17bc8
-}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
index fc97664..06ba6bd 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
@@ -92,7 +92,7 @@
Assert.assertEquals(200 - 1 - j, copyList.getCountOfPosition());
while(iterator.hasNext()){
tmpKmer = iterator.next();
- Assert.assertTrue(!tmpKmer.getBytes().equals(deletePos.getBytes()));
+ Assert.assertTrue(!tmpKmer.getBlockBytes().equals(deletePos.getBlockBytes()));
i++;
}
}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/NodeWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/NodeWritableTest.java
new file mode 100644
index 0000000..cf93c37
--- /dev/null
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/NodeWritableTest.java
@@ -0,0 +1,121 @@
+package edu.uci.ics.genomix.data.test;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Iterator;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import edu.uci.ics.genomix.data.KmerUtil;
+import edu.uci.ics.genomix.type.EdgeListWritable;
+import edu.uci.ics.genomix.type.EdgeWritable;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.NodeWritable.DirectionFlag;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class NodeWritableTest {
+
+ @Test
+ public void TestMergeRF_FF() throws IOException {
+ KmerBytesWritable.setGlobalKmerLength(5);
+ String test1 = "TAGAT"; // rc = ATCTA
+ String test2 = "TCTAG"; // rc = CTAGA
+ String test3 = "CTAGC"; // rc = GCTAG
+ VKmerBytesWritable k1 = new VKmerBytesWritable();
+ VKmerBytesWritable k2 = new VKmerBytesWritable();
+ VKmerBytesWritable k3 = new VKmerBytesWritable();
+ k1.setByRead(5, test1.getBytes(), 0);
+ k2.setByRead(5, test2.getBytes(), 0);
+ k3.setByRead(5, test3.getBytes(), 0);
+// k2.mergeWithRFKmer(5, k1);
+// Assert.assertEquals("ATCTAG", k2.toString());
+// k2.mergeWithFFKmer(5, k3);
+// Assert.assertEquals("ATCTAGC", k2.toString());
+
+ PositionWritable read1 = new PositionWritable((byte) 1, 50, 0);
+ PositionWritable read2 = new PositionWritable((byte) 1, 75, 0);
+ PositionWritable read3 = new PositionWritable((byte) 0, 100, 0);
+ PositionListWritable plist1 = new PositionListWritable(Arrays.asList(read1));
+ PositionListWritable plist2 = new PositionListWritable();
+ PositionListWritable plist3 = new PositionListWritable(Arrays.asList(read3));
+
+ // k1 {r50} --RF-> k2 {r75} --FF-> k3 {~r100}
+
+ NodeWritable n1 = new NodeWritable();
+ n1.setInternalKmer(k1);
+ n1.setAvgCoverage(10);
+ n1.getStartReads().append(read1);
+ n1.getEdgeList(DirectionFlag.DIR_RF).add(new EdgeWritable(k2, plist1));
+ Assert.assertEquals("(50-0_0)", n1.getEdgeList(DirectionFlag.DIR_RF).get(0).getReadIDs().getPosition(0).toString());
+ Assert.assertEquals(10f, n1.getAvgCoverage());
+
+ NodeWritable n2 = new NodeWritable();
+ n2.setInternalKmer(k2);
+ n2.setAvgCoverage(20);
+ n2.getStartReads().append(read2);
+ Assert.assertEquals(1, n2.getStartReads().getCountOfPosition());
+ n2.getEdgeList(DirectionFlag.DIR_RF).add(new EdgeWritable(k1, plist1));
+ n2.getEdgeList(DirectionFlag.DIR_FF).add(new EdgeWritable(k3, plist3));
+ Assert.assertEquals(20f, n2.getAvgCoverage());
+
+ NodeWritable n3 = new NodeWritable();
+ n3.setInternalKmer(k3);
+ n3.setAvgCoverage(30);
+ n3.getEndReads().append(read3);
+ n3.getEdgeList(DirectionFlag.DIR_RR).add(new EdgeWritable(k2, plist3));
+ Assert.assertEquals("(100-0_0)", n3.getEdgeList(DirectionFlag.DIR_RR).get(0).getReadIDs().getPosition(0).toString());
+ Assert.assertEquals(30f, n3.getAvgCoverage());
+
+
+ // dump and recover each
+ byte[] block = new byte[2000];
+ int offset = 50;
+ System.arraycopy(n1.marshalToByteArray(), 0, block, offset, n1.getSerializedLength());
+ NodeWritable copy = new NodeWritable(block, offset);
+ Assert.assertEquals(n1, copy);
+ offset += copy.getSerializedLength();
+
+ System.arraycopy(n2.marshalToByteArray(), 0, block, offset, n2.getSerializedLength());
+ copy = new NodeWritable(block, offset);
+ Assert.assertEquals(n2, copy);
+ offset += copy.getSerializedLength();
+
+ System.arraycopy(n3.marshalToByteArray(), 0, block, offset, n3.getSerializedLength());
+ copy = new NodeWritable(block, offset);
+ Assert.assertEquals(n3, copy);
+ offset += copy.getSerializedLength();
+
+
+ // merge k1 with k2, then k1k2 with k3
+// k2.mergeWithRFKmer(5, k1);
+// Assert.assertEquals("ATCTAG", k2.toString());
+// k2.mergeWithFFKmer(5, k3);
+// Assert.assertEquals("ATCTAGC", k2.toString());
+ n2.mergeWithNode(DirectionFlag.DIR_RF, n1);
+ Assert.assertEquals("ATCTAG", n2.getInternalKmer().toString());
+ Assert.assertEquals(15f, n2.getAvgCoverage());
+ Assert.assertEquals(1, n2.getEndReads().getCountOfPosition());
+ Assert.assertEquals("(50-0_1)", n2.getEndReads().getPosition(0).toString());
+ Assert.assertEquals(1, n2.getStartReads().getCountOfPosition());
+ Assert.assertEquals("(75-1_1)", n2.getStartReads().getPosition(0).toString());
+ Assert.assertEquals(0, n2.inDegree());
+ Assert.assertEquals(1, n2.outDegree());
+ Assert.assertEquals(k3, n2.getEdgeList(DirectionFlag.DIR_FF).get(0).getKey());
+
+ n2.mergeWithNode(DirectionFlag.DIR_FF, n3);
+ Assert.assertEquals("ATCTAGC", n2.getInternalKmer().toString());
+ Assert.assertEquals(20f, n2.getAvgCoverage());
+ Assert.assertEquals(2, n2.getEndReads().getCountOfPosition());
+ Assert.assertEquals("(50-0_1)", n2.getEndReads().getPosition(0).toString());
+ Assert.assertEquals("(100-2_0)", n2.getEndReads().getPosition(1).toString());
+ Assert.assertEquals(1, n2.getStartReads().getCountOfPosition());
+ Assert.assertEquals("(75-1_1)", n2.getStartReads().getPosition(0).toString());
+ Assert.assertEquals(0, n2.inDegree());
+ Assert.assertEquals(0, n2.outDegree());
+ }
+}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
index a4be1e3..53853aa 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/VKmerBytesWritableTest.java
@@ -116,14 +116,14 @@
kmer.setByRead(k, array, 0);
Assert.assertEquals(string.substring(0, k), kmer.toString());
for (int b = 0; b < k; b++) {
- byte byteActual = KmerBytesWritable.getOneByteFromKmerAtPosition(b, kmer.getBytes(),
+ byte byteActual = KmerBytesWritable.getOneByteFromKmerAtPosition(b, kmer.getBlockBytes(),
kmer.getKmerOffset(), kmer.getKmerByteLength());
byte byteExpect = GeneCode.getCodeFromSymbol(array[b]);
for (int i = 1; i < 4 && b + i < k; i++) {
byteExpect += GeneCode.getCodeFromSymbol(array[b + i]) << (i * 2);
}
Assert.assertEquals(byteActual, byteExpect);
- KmerBytesWritable.appendOneByteAtPosition(b, byteActual, kmerAppend.getBytes(),
+ KmerBytesWritable.appendOneByteAtPosition(b, byteActual, kmerAppend.getBlockBytes(),
kmerAppend.getKmerOffset(), kmerAppend.getKmerByteLength());
}
Assert.assertEquals(kmer.toString(), kmerAppend.toString());