facilitate better testing via more constructors and proper hashing/equals
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
index 736dc56..d3f47cf 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
@@ -50,6 +50,10 @@
public KmerBytesWritable(int k, byte[] storage, int offset) {
setNewReference(k, storage, offset);
}
+
+ public KmerBytesWritable(int k, String kmer) {
+ setNewReference(kmer.length(), kmer.getBytes(), 0);
+ }
/**
* Initial Kmer space by kmerlength
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index 844e2e8..5d6c36a 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -24,7 +24,7 @@
}
public NodeWritable(int kmerSize) {
- nodeID = new PositionWritable(0,(byte) 0);
+ nodeID = new PositionWritable(0, (byte) 0);
forwardForwardList = new PositionListWritable();
forwardReverseList = new PositionListWritable();
reverseForwardList = new PositionListWritable();
@@ -32,6 +32,17 @@
kmer = new KmerBytesWritable(kmerSize);
}
+ public NodeWritable(PositionWritable nodeID, PositionListWritable FFList, PositionListWritable FRList,
+ PositionListWritable RFList, PositionListWritable RRList, KmerBytesWritable kmer) {
+ this(kmer.getKmerLength());
+ this.nodeID.set(nodeID);
+ forwardForwardList.set(FFList);
+ forwardReverseList.set(FRList);
+ reverseForwardList.set(RFList);
+ reverseReverseList.set(RRList);
+ kmer.set(kmer);
+ }
+
public void setNodeID(PositionWritable ref) {
this.setNodeID(ref.getReadID(), ref.getPosInRead());
}
@@ -43,7 +54,7 @@
public void setKmer(KmerBytesWritable right) {
this.kmer.set(right);
}
-
+
public void reset(int kmerSize) {
nodeID.set(0, (byte) 0);
forwardForwardList.reset();
@@ -56,7 +67,7 @@
public PositionListWritable getFFList() {
return forwardForwardList;
}
-
+
public PositionListWritable getFRList() {
return forwardReverseList;
}
@@ -64,7 +75,7 @@
public PositionListWritable getRFList() {
return reverseForwardList;
}
-
+
public PositionListWritable getRRList() {
return reverseReverseList;
}
@@ -86,8 +97,8 @@
this.forwardReverseList.set(nextNode.forwardReverseList);
kmer.mergeNextKmer(initialKmerSize, nextNode.getKmer());
}
-
- public void mergeForwardPre(NodeWritable preNode, int initialKmerSize){
+
+ public void mergeForwardPre(NodeWritable preNode, int initialKmerSize) {
this.reverseForwardList.set(preNode.reverseForwardList);
this.reverseReverseList.set(preNode.reverseReverseList);
kmer.mergePreKmer(initialKmerSize, preNode.getKmer());
@@ -133,6 +144,18 @@
}
@Override
+ public boolean equals(Object o) {
+ if (o instanceof NodeWritable) {
+ NodeWritable nw = (NodeWritable) o;
+ return (this.nodeID.equals(nw.nodeID) && this.forwardForwardList.equals(nw.forwardForwardList)
+ && this.forwardReverseList.equals(nw.forwardReverseList)
+ && this.reverseForwardList.equals(nw.reverseForwardList)
+ && this.reverseReverseList.equals(nw.reverseReverseList) && this.kmer.equals(nw.kmer));
+ }
+ return false;
+ }
+
+ @Override
public String toString() {
StringBuilder sbuilder = new StringBuilder();
sbuilder.append('(');
@@ -144,15 +167,15 @@
sbuilder.append(kmer.toString()).append(')');
return sbuilder.toString();
}
-
- public int inDegree(){
+
+ public int inDegree() {
return reverseReverseList.getCountOfPosition() + reverseForwardList.getCountOfPosition();
}
-
- public int outDegree(){
+
+ public int outDegree() {
return forwardForwardList.getCountOfPosition() + forwardReverseList.getCountOfPosition();
}
-
+
/*
* Return if this node is a "path" compressible node, that is, it has an in-degree and out-degree of 1
*/
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
index 34fb0f6..cd9fc66 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
@@ -4,7 +4,9 @@
import java.io.DataOutput;
import java.io.IOException;
import java.io.Serializable;
+import java.util.Arrays;
import java.util.Iterator;
+import java.util.List;
import org.apache.hadoop.io.Writable;
@@ -32,6 +34,13 @@
public PositionListWritable(int count, byte[] data, int offset) {
setNewReference(count, data, offset);
}
+
+ public PositionListWritable(List<PositionWritable> posns) {
+ this();
+ for (PositionWritable p : posns) {
+ append(p);
+ }
+ }
public void setNewReference(int count, byte[] data, int offset) {
this.valueCount = count;
@@ -184,4 +193,23 @@
}
return sbuilder.toString();
}
+
+ @Override
+ public int hashCode() {
+ return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof PositionListWritable))
+ return false;
+ PositionListWritable other = (PositionListWritable) o;
+ if (this.valueCount != other.valueCount)
+ return false;
+ for (int i=0; i < this.valueCount; i++) {
+ if (!this.getPosition(i).equals(other.getPosition(i)))
+ return false;
+ }
+ return true;
+ }
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
index 0988813..b548934 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
@@ -132,7 +132,6 @@
return diff2;
}
return diff1;
- // return compareBytes(b1, s1, l1, b2, s2, l2);
}
}
diff --git a/genomix/genomix-hadoop/pom.xml b/genomix/genomix-hadoop/pom.xml
index 610092a..8ca2fa3 100755
--- a/genomix/genomix-hadoop/pom.xml
+++ b/genomix/genomix-hadoop/pom.xml
@@ -174,5 +174,11 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.mrunit</groupId>
+ <artifactId>mrunit</artifactId>
+ <version>1.0.0</version>
+ <classifier>hadoop1</classifier>
+ </dependency>
</dependencies>
</project>
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
index eaf2a6f..c735a0d 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
@@ -52,6 +52,7 @@
public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, int readLength,
boolean onlyTest1stJob, boolean seqOutput, String defaultConfPath) throws IOException {
if (onlyTest1stJob == true) {
+
runfirstjob(inputPath, numReducers, sizeKmer, readLength, seqOutput, defaultConfPath);
} else {
runfirstjob(inputPath, numReducers, sizeKmer, readLength, true, defaultConfPath);
@@ -113,6 +114,7 @@
conf.setPartitionerClass(ReadIDPartitioner.class);
+ // grouping is done on the readID only; sorting is based on the (readID, abs(posn))
conf.setOutputKeyComparatorClass(PositionWritable.Comparator.class);
conf.setOutputValueGroupingComparator(PositionWritable.FirstComparator.class);