add hadoop test and keep both bin and txt
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
index b791515..eb0bd59 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
@@ -32,7 +32,7 @@
public KmerListWritable(int kmerlength) {
this();
this.kmerlength = kmerlength;
- this.kmerByteSize = KmerUtil.getByteNumFromK(kmerlength);;
+ this.kmerByteSize = KmerUtil.getByteNumFromK(kmerlength);
}
public KmerListWritable(int kmerlength, int count, byte[] data, int offset) {
@@ -68,7 +68,7 @@
*/
public void appendList(KmerListWritable otherList) {
if (otherList.valueCount > 0) {
- setSize((valueCount + otherList.valueCount) * PositionWritable.LENGTH);
+ setSize((valueCount + otherList.valueCount) * kmerByteSize);
// copy contents of otherList into the end of my storage
System.arraycopy(otherList.storage, otherList.offset,
storage, offset + valueCount * kmerByteSize,
@@ -98,7 +98,9 @@
}
}
- public void reset() {
+ public void reset(int kmerSize) {
+ kmerlength = kmerSize;
+ kmerByteSize = KmerUtil.getByteNumFromK(kmerlength);
storage = EMPTY;
valueCount = 0;
offset = 0;
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index 161d31f..4725e30 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -10,10 +10,10 @@
public class NodeWritable implements WritableComparable<NodeWritable>, Serializable{
- public static class KMER{
- public static final byte EXIST = 0;
- public static final byte NON_EXIST = 1;
- }
+// public static class KMER{
+// public static final byte EXIST = 0;
+// public static final byte NON_EXIST = 1;
+// }
private static final long serialVersionUID = 1L;
public static final NodeWritable EMPTY_NODE = new NodeWritable(0);
@@ -24,7 +24,7 @@
private KmerListWritable reverseForwardList;
private KmerListWritable reverseReverseList;
private KmerBytesWritable kmer;
- private byte kmerMark;
+// private byte kmerMark;
// merge/update directions
public static class DirectionFlag {
@@ -41,12 +41,12 @@
public NodeWritable(int kmerSize) {
nodeIdList = new PositionListWritable();
- forwardForwardList = new KmerListWritable();
- forwardReverseList = new KmerListWritable();
- reverseForwardList = new KmerListWritable();
- reverseReverseList = new KmerListWritable();
+ forwardForwardList = new KmerListWritable(kmerSize);
+ forwardReverseList = new KmerListWritable(kmerSize);
+ reverseForwardList = new KmerListWritable(kmerSize);
+ reverseReverseList = new KmerListWritable(kmerSize);
kmer = new KmerBytesWritable(kmerSize);
- kmerMark = KMER.NON_EXIST;
+// kmerMark = KMER.NON_EXIST;
}
public NodeWritable(PositionListWritable nodeIdList, KmerListWritable FFList, KmerListWritable FRList,
@@ -68,17 +68,17 @@
this.reverseForwardList.set(RFList);
this.reverseReverseList.set(RRList);
this.kmer.set(kmer);
- kmerMark = KMER.EXIST;
+// kmerMark = KMER.EXIST;
}
public void reset(int kmerSize) {
nodeIdList.reset();
- forwardForwardList.reset();
- forwardReverseList.reset();
- reverseForwardList.reset();
- reverseReverseList.reset();
+ forwardForwardList.reset(kmerSize);
+ forwardReverseList.reset(kmerSize);
+ reverseForwardList.reset(kmerSize);
+ reverseReverseList.reset(kmerSize);
kmer.reset(kmerSize);
- kmerMark = KMER.NON_EXIST;
+// kmerMark = KMER.NON_EXIST;
}
@@ -95,7 +95,7 @@
}
public void setKmer(KmerBytesWritable kmer) {
- kmerMark = KMER.EXIST;
+// kmerMark = KMER.EXIST;
this.kmer.set(kmer);
}
@@ -151,26 +151,26 @@
}
@Override
public void write(DataOutput out) throws IOException {
- out.writeByte(kmerMark);
- this.nodeIdList.write(out);
+// out.writeByte(kmerMark);
+// this.nodeIdList.write(out);
this.forwardForwardList.write(out);
this.forwardReverseList.write(out);
- this.reverseForwardList.write(out);
- this.reverseReverseList.write(out);
- if(kmerMark == KMER.EXIST)
- this.kmer.write(out);
+// this.reverseForwardList.write(out);
+// this.reverseReverseList.write(out);
+// if(kmerMark == KMER.EXIST)
+// this.kmer.write(out);
}
@Override
public void readFields(DataInput in) throws IOException {
- kmerMark = in.readByte();
- this.nodeIdList.readFields(in);
+// kmerMark = in.readByte();
+// this.nodeIdList.readFields(in);
this.forwardForwardList.readFields(in);
this.forwardReverseList.readFields(in);
- this.reverseForwardList.readFields(in);
- this.reverseReverseList.readFields(in);
- if(kmerMark == KMER.EXIST)
- this.kmer.readFields(in);
+// this.reverseForwardList.readFields(in);
+// this.reverseReverseList.readFields(in);
+// if(kmerMark == KMER.EXIST)
+// this.kmer.readFields(in);
}
@Override
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
index c31ca6d..71246a1 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
@@ -24,13 +24,13 @@
String randomString = generateString(i);
byte[] array = randomString.getBytes();
kmer.setByRead(array, 0);
- kmerList.reset();
+ kmerList.reset(kmer.getKmerLength());
kmerList.append(kmer);
Assert.assertEquals(kmerList.getPosition(0).toString(), randomString);
Assert.assertEquals(1, kmerList.getCountOfPosition());
}
- kmerList.reset();
+ kmerList.reset(kmer.getKmerLength());
//add one more kmer each time and fix kmerSize
for (int i = 0; i < 200; i++) {
kmer = new KmerBytesWritable(5);
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
index 50223e2..3b615cb 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixMapper.java
@@ -57,9 +57,9 @@
nextReverseKmer = new KmerBytesWritable(KMER_SIZE);
nodeId = new PositionWritable();
nodeIdList = new PositionListWritable();
- edgeListForPreKmer = new KmerListWritable();
- edgeListForNextKmer = new KmerListWritable();
- outputNode = new NodeWritable();
+ edgeListForPreKmer = new KmerListWritable(KMER_SIZE);
+ edgeListForNextKmer = new KmerListWritable(KMER_SIZE);
+ outputNode = new NodeWritable(KMER_SIZE);
preKmerDir = KmerDir.FORWARD;
curKmerDir = KmerDir.FORWARD;
nextKmerDir = KmerDir.FORWARD;
@@ -86,7 +86,7 @@
}
/** first kmer **/
- outputNode.reset(0);
+ outputNode.reset(KMER_SIZE);
curForwardKmer.setByRead(array, 0);
curReverseKmer.setByReadReverse(array, 0);
curKmerDir = curForwardKmer.compareTo(curReverseKmer) <= 0 ? KmerDir.FORWARD : KmerDir.REVERSE;
@@ -100,7 +100,7 @@
/** middle kmer **/
for (int i = KMER_SIZE + 1; i < array.length; i++) {
- outputNode.reset(0);
+ outputNode.reset(KMER_SIZE);
setPreKmerByOldCurKmer();
setCurKmerByOldNextKmer();
setNextKmer(array[i]);
@@ -114,7 +114,7 @@
}
/** last kmer **/
- outputNode.reset(0);
+ outputNode.reset(KMER_SIZE);
setPreKmerByOldCurKmer();
setCurKmerByOldNextKmer();
//set value.nodeId
@@ -138,12 +138,12 @@
case FORWARD:
switch(preKmerDir){
case FORWARD:
- edgeListForPreKmer.reset();
+ edgeListForPreKmer.reset(KMER_SIZE);
edgeListForPreKmer.append(preForwardKmer);
outputNode.setRRList(edgeListForPreKmer);
break;
case REVERSE:
- edgeListForPreKmer.reset();
+ edgeListForPreKmer.reset(KMER_SIZE);
edgeListForPreKmer.append(preReverseKmer);
outputNode.setRFList(edgeListForPreKmer);
break;
@@ -152,12 +152,12 @@
case REVERSE:
switch(preKmerDir){
case FORWARD:
- edgeListForPreKmer.reset();
+ edgeListForPreKmer.reset(KMER_SIZE);
edgeListForPreKmer.append(preForwardKmer);
outputNode.setFRList(edgeListForPreKmer);
break;
case REVERSE:
- edgeListForPreKmer.reset();
+ edgeListForPreKmer.reset(KMER_SIZE);
edgeListForPreKmer.append(preReverseKmer);
outputNode.setFFList(edgeListForPreKmer);
break;
@@ -171,12 +171,12 @@
case FORWARD:
switch(nextKmerDir){
case FORWARD:
- edgeListForNextKmer.reset();
+ edgeListForNextKmer.reset(KMER_SIZE);
edgeListForNextKmer.append(nextForwardKmer);
outputNode.setFFList(edgeListForNextKmer);
break;
case REVERSE:
- edgeListForNextKmer.reset();
+ edgeListForNextKmer.reset(KMER_SIZE);
edgeListForNextKmer.append(nextReverseKmer);
outputNode.setFRList(edgeListForNextKmer);
break;
@@ -185,12 +185,12 @@
case REVERSE:
switch(nextKmerDir){
case FORWARD:
- edgeListForNextKmer.reset();
+ edgeListForNextKmer.reset(KMER_SIZE);
edgeListForNextKmer.append(nextForwardKmer);
outputNode.setRFList(edgeListForNextKmer);
break;
case REVERSE:
- edgeListForNextKmer.reset();
+ edgeListForNextKmer.reset(KMER_SIZE);
edgeListForNextKmer.append(nextReverseKmer);
outputNode.setRRList(edgeListForNextKmer);
break;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
index 625984d..6472f05 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
@@ -16,20 +16,20 @@
Reducer<KmerBytesWritable, NodeWritable, KmerBytesWritable, NodeWritable>{
private NodeWritable outputNode = new NodeWritable();
-
+ private NodeWritable tmpNode = new NodeWritable();
@Override
public void reduce(KmerBytesWritable key, Iterator<NodeWritable> values,
OutputCollector<KmerBytesWritable, NodeWritable> output,
Reporter reporter) throws IOException {
- outputNode.reset(GenomixMapper.KMER_SIZE);
+ outputNode.reset(0);
- //copy first item to outputNode
- if(values.hasNext()){
- NodeWritable tmpNode = values.next();
- outputNode.set(tmpNode);
- }
+// //copy first item to outputNode
+// if(values.hasNext()){
+// NodeWritable tmpNode = values.next();
+// outputNode.set(tmpNode);
+// }
while (values.hasNext()) {
- NodeWritable tmpNode = values.next();
+ tmpNode.set(values.next());
outputNode.getNodeIdList().appendList(tmpNode.getNodeIdList());
outputNode.getFFList().appendList(tmpNode.getFFList());
outputNode.getFRList().appendList(tmpNode.getFRList());
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
index 8aef9a8..4716072 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
@@ -14,6 +14,8 @@
import org.apache.hadoop.mapred.MiniMRCluster;
import org.junit.Test;
+import edu.uci.ics.genomix.hadoop.pmcommon.HadoopMiniClusterTest;
+
@SuppressWarnings("deprecation")
public class GraphBuildingTest {
@@ -43,7 +45,7 @@
public void TestMapKmerToNode() throws Exception {
GenomixDriver driver = new GenomixDriver();
- driver.run(HDFS_PATH, RESULT_PATH, 2, SIZE_KMER, READ_LENGTH, false, HADOOP_CONF_PATH);
+ driver.run(HDFS_PATH, RESULT_PATH, 0, SIZE_KMER, READ_LENGTH, true, HADOOP_CONF_PATH);
dumpResult();
}
@@ -51,9 +53,9 @@
FileSystem lfs = FileSystem.getLocal(new Configuration());
lfs.delete(new Path("build"), true);
System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+ dfsCluster = new MiniDFSCluster(conf, 1, true, null);
dfs = dfsCluster.getFileSystem();
- mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+ mrCluster = new MiniMRCluster(1, dfs.getUri().toString(), 1);
Path src = new Path(DATA_PATH);
Path dest = new Path(HDFS_PATH + "/");
@@ -75,5 +77,6 @@
Path src = new Path(RESULT_PATH);
Path dest = new Path(ACTUAL_RESULT_DIR);
dfs.copyToLocalFile(src, dest);
+ HadoopMiniClusterTest.copyResultsToLocal(RESULT_PATH, "test.txt", false, conf, true, dfs);
}
}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
index d4ae5dd..0f2d714 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
@@ -69,12 +69,19 @@
Configuration conf) throws IOException {
copyResultsToLocal(hdfsSrcDir, localDestFile, resultsAreText, conf, true);
}
+
+ public static void copyResultsToLocal(String hdfsSrcDir, String localDestFile, boolean resultsAreText,
+ Configuration conf, boolean ignoreZeroOutputs) throws IOException {
+ copyResultsToLocal(hdfsSrcDir, localDestFile, resultsAreText,
+ conf, ignoreZeroOutputs, dfs);
+ }
+
/*
* Merge and copy a DFS directory to a local destination, converting to text if necessary.
* Also locally store the binary-formatted result if available.
*/
- protected static void copyResultsToLocal(String hdfsSrcDir, String localDestFile, boolean resultsAreText,
- Configuration conf, boolean ignoreZeroOutputs) throws IOException {
+ public static void copyResultsToLocal(String hdfsSrcDir, String localDestFile, boolean resultsAreText,
+ Configuration conf, boolean ignoreZeroOutputs, FileSystem dfs) throws IOException {
if (resultsAreText) {
// for text files, just concatenate them together
FileUtil.copyMerge(FileSystem.get(conf), new Path(hdfsSrcDir), FileSystem.getLocal(new Configuration()),