p4 pass test
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
index de27f30..88bb79c 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
@@ -56,11 +56,13 @@
}
public void append(KmerBytesWritable kmer){
- kmerByteSize = kmer.kmerByteSize;
- kmerlength = kmer.kmerlength;
- setSize((1 + valueCount) * kmerByteSize);
- System.arraycopy(kmer.getBytes(), 0, storage, offset + valueCount * kmerByteSize, kmerByteSize);
- valueCount += 1;
+ if(kmer != null){
+ kmerByteSize = kmer.kmerByteSize;
+ kmerlength = kmer.kmerlength;
+ setSize((1 + valueCount) * kmerByteSize);
+ System.arraycopy(kmer.getBytes(), 0, storage, offset + valueCount * kmerByteSize, kmerByteSize);
+ valueCount += 1;
+ }
}
/*
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
index 37c64aa..b4361ac 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
@@ -228,6 +228,15 @@
merge.set(kmer1);
merge.mergeWithRFKmer(i, kmer2);
Assert.assertEquals("GGCACAACAACCC", merge.toString());
+
+ String test1 = "CTA";
+ String test2 = "AGA";
+ KmerBytesWritable k1 = new KmerBytesWritable(3);
+ KmerBytesWritable k2 = new KmerBytesWritable(3);
+ k1.setByRead(test1.getBytes(), 0);
+ k2.setByRead(test2.getBytes(), 0);
+ k1.mergeWithRFKmer(3, k2);
+ Assert.assertEquals("CTAT", k1);
}
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2
new file mode 100644
index 0000000..0f501fe
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2
@@ -0,0 +1 @@
+1 AATA
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/2~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3
new file mode 100644
index 0000000..b90246c
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3
@@ -0,0 +1 @@
+1 AATAG
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/3~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4
new file mode 100644
index 0000000..3f1cd5c
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4
@@ -0,0 +1 @@
+1 AATAGA
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/4~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5
new file mode 100644
index 0000000..a720dc4
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5
@@ -0,0 +1 @@
+1 AATAGAA
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/5~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6
new file mode 100644
index 0000000..7a95b7c
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6
@@ -0,0 +1 @@
+1 AATAGAAC
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/6~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7
new file mode 100644
index 0000000..ce4b8a8
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7
@@ -0,0 +1 @@
+1 AATAGAACT
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/7~
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8
new file mode 100644
index 0000000..3959d4d
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8
@@ -0,0 +1 @@
+1 AATAGAACTT
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8~
new file mode 100644
index 0000000..89ead1e
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/8~
@@ -0,0 +1 @@
+1 AATAGAACTTA
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9 b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9
new file mode 100644
index 0000000..89ead1e
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9
@@ -0,0 +1 @@
+1 AATAGAACTTA
diff --git a/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9~ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9~
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/pathmerge_TestSet/9~
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
index 42c43f8..217e882 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
@@ -22,13 +22,13 @@
private JobConf conf = new JobConf();
private static final String ACTUAL_RESULT_DIR = "actual";
private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
- private static final String DATA_PATH = "data/webmap/8";
+ private static final String DATA_PATH = "data/webmap/pathmerge_TestSet/9";
private static final String HDFS_PATH = "/webmap";
private static final String RESULT_PATH = "/result";
// private static final int COUNT_REDUCER = 2;
private static final int SIZE_KMER = 3;
- private static final int READ_LENGTH = 10;
+ private static final int READ_LENGTH = 11;
private MiniDFSCluster dfsCluster;
private MiniMRCluster mrCluster;
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/2/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/2/part-00000
new file mode 100755
index 0000000..6be54c5
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/2/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/3/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/3/part-00000
new file mode 100755
index 0000000..6f2b1d8
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/3/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/4/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/4/part-00000
new file mode 100755
index 0000000..7fbe1a4
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/4/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/5/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/5/part-00000
new file mode 100755
index 0000000..1887e36
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/5/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/6/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/6/part-00000
new file mode 100755
index 0000000..72b4009
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/6/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/7/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/7/part-00000
new file mode 100755
index 0000000..394c8c9
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/7/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/PathMergeTestSet/9/part-00000 b/genomix/genomix-pregelix/data/PathMergeTestSet/9/part-00000
new file mode 100755
index 0000000..b7760c2
--- /dev/null
+++ b/genomix/genomix-pregelix/data/PathMergeTestSet/9/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
index c6a2c9a..5d06234 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
@@ -284,15 +284,19 @@
switch (neighborToDeleteDir & MessageFlag.DIR_MASK) {
case MessageFlag.DIR_FF:
this.getFFList().remove(nodeToDelete); //set(null);
+ this.getKmer().mergeWithFFKmer(kmerSize, kmer);
break;
case MessageFlag.DIR_FR:
this.getFRList().remove(nodeToDelete);
+ this.getKmer().mergeWithFRKmer(kmerSize, kmer);
break;
case MessageFlag.DIR_RF:
this.getRFList().remove(nodeToDelete);
+ this.getKmer().mergeWithRFKmer(kmerSize, kmer);
break;
case MessageFlag.DIR_RR:
this.getRRList().remove(nodeToDelete);
+ this.getKmer().mergeWithRRKmer(kmerSize, kmer);
break;
}
// TODO: remove switch below and replace with general direction merge
@@ -300,19 +304,15 @@
switch (neighborToMergeDir & MessageFlag.DIR_MASK) {
case MessageFlag.DIR_FF:
- this.getKmer().mergeWithFFKmer(kmerSize, kmer);
this.getFFList().append(nodeToAdd);
break;
case MessageFlag.DIR_FR:
- this.getKmer().mergeWithFRKmer(kmerSize, kmer);
this.getFRList().append(nodeToAdd);
break;
case MessageFlag.DIR_RF:
- this.getKmer().mergeWithRFKmer(kmerSize, kmer);
this.getRFList().append(nodeToAdd);
break;
case MessageFlag.DIR_RR:
- this.getKmer().mergeWithRRKmer(kmerSize, kmer);
this.getRRList().append(nodeToAdd);
break;
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java
index c44fb53..937fa42 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java
@@ -365,6 +365,8 @@
setSuccessorAdjMsg();
if(ifFlipWithPredecessor())
outgoingMsg.setFlip(true);
+ else
+ outgoingMsg.setFlip(false);
outgoingMsg.setFlag(outFlag);
outgoingMsg.setNeighberNode(getVertexValue().getIncomingList());
outgoingMsg.setSourceVertexId(getVertexId());
@@ -376,6 +378,8 @@
setPredecessorAdjMsg();
if(ifFilpWithSuccessor())
outgoingMsg.setFlip(true);
+ else
+ outgoingMsg.setFlip(false);
outgoingMsg.setFlag(outFlag);
outgoingMsg.setNeighberNode(getVertexValue().getOutgoingList());
outgoingMsg.setSourceVertexId(getVertexId());
@@ -397,6 +401,8 @@
setSuccessorAdjMsg();
if(ifFlipWithPredecessor())
outgoingMsg.setFlip(true);
+ else
+ outgoingMsg.setFlip(false);
outgoingMsg.setFlag(outFlag);
outgoingMsg.setNeighberNode(getVertexValue().getIncomingList());
outgoingMsg.setSourceVertexId(getVertexId());
@@ -408,6 +414,8 @@
setPredecessorAdjMsg();
if(ifFilpWithSuccessor())
outgoingMsg.setFlip(true);
+ else
+ outgoingMsg.setFlip(false);
outgoingMsg.setFlag(outFlag);
outgoingMsg.setNeighberNode(getVertexValue().getOutgoingList());
outgoingMsg.setSourceVertexId(getVertexId());
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P4ForPathMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P4ForPathMergeVertex.java
index 6768db6..94428ca 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P4ForPathMergeVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P4ForPathMergeVertex.java
@@ -175,7 +175,7 @@
sendUpdateMsgToSuccessor();
}
}
- }else {
+ else {
// I'm a tail
if (hasNext && hasPrev) {
if ((!nextHead && !prevHead) && (curKmer.compareTo(nextKmer) < 0 && curKmer.compareTo(prevKmer) < 0)) {
@@ -197,6 +197,7 @@
}
}
}
+ }
}
else if (getSuperstep() % 4 == 0){
//update neighber
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
index e151123..bc08600 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
@@ -12,7 +12,6 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
import edu.uci.ics.genomix.pregelix.io.VertexValueWritable.State;
import edu.uci.ics.genomix.type.KmerBytesWritable;
@@ -28,16 +27,14 @@
File srcPath = new File(strSrcDir);
for (File f : srcPath.listFiles((FilenameFilter) (new WildcardFileFilter("part*")))) {
SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, new Path(f.getAbsolutePath()), conf);
- //NodeWritable key = new NodeWritable(kmerSize);
- //NullWritable value = NullWritable.get();
- PositionWritable key = new PositionWritable();
+ KmerBytesWritable key = new KmerBytesWritable();
VertexValueWritable value = new VertexValueWritable();
while (reader.next(key, value)) {
if (key == null) {
break;
}
- bw.write(key.toString() + value.toString());
+ bw.write(key.toString() + "\t" + value.toString());
System.out.println(key.toString());
bw.newLine();
}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
index d2d3969..0e798fe 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
@@ -4,7 +4,6 @@
import java.io.FileOutputStream;
import java.io.IOException;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
import edu.uci.ics.genomix.pregelix.format.GraphCleanInputFormat;
import edu.uci.ics.genomix.pregelix.format.GraphCleanOutputFormat;
import edu.uci.ics.genomix.pregelix.format.InitialGraphCleanInputFormat;
@@ -19,6 +18,7 @@
import edu.uci.ics.genomix.pregelix.operator.pathmerge.P4ForPathMergeVertex;
import edu.uci.ics.genomix.pregelix.operator.tipremove.TipAddVertex;
import edu.uci.ics.genomix.pregelix.operator.tipremove.TipRemoveVertex;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.pregelix.api.job.PregelixJob;
public class JobGenerator {
@@ -83,7 +83,7 @@
job.setVertexInputFormatClass(InitialGraphCleanInputFormat.class);
job.setVertexOutputFormatClass(GraphCleanOutputFormat.class);
job.setDynamicVertexValueSize(true);
- job.setOutputKeyClass(PositionWritable.class);
+ job.setOutputKeyClass(KmerBytesWritable.class);
job.setOutputValueClass(VertexValueWritable.class);
job.getConfiguration().setInt(P4ForPathMergeVertex.KMER_SIZE, 3);
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
index 2e67da2..5aedeb7 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
@@ -45,20 +45,14 @@
public static final String PreFix = "data/PathMergeTestSet"; //"graphbuildresult";
public static final String[] TestDir = { PreFix + File.separator
- + "8"};
- //+ "tipremove/TipRemoveGraph/bin/fr_with_tip"};
- //+ "graphs/pathmerge/singleread"};
- //+ "bridgeadd/BridgeAddGraph/bin/tworeads"};
- /*+ "2", PreFix + File.separator
- + "3", PreFix + File.separator
- + "4", PreFix + File.separator
- + "5", PreFix + File.separator
- + "6", PreFix + File.separator
- + "7", PreFix + File.separator
- + "8", PreFix + File.separator
- + "9", PreFix + File.separator
- + "tworeads3", PreFix + File.separator
- + "tworeads_6"};*/
+// + "2", PreFix + File.separator
+// + "3", PreFix + File.separator
+// + "4", PreFix + File.separator
+// + "5", PreFix + File.separator
+// + "6", PreFix + File.separator
+// + "7", PreFix + File.separator
+// + "8", PreFix + File.separator
+ + "4"};
private static final String ACTUAL_RESULT_DIR = "data/actual/pathmerge";
private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
diff --git a/genomix/genomix-pregelix/src/test/resources/jobs/P4ForMergeGraph.xml b/genomix/genomix-pregelix/src/test/resources/jobs/P4ForMergeGraph.xml
index f20a863..597e5c3 100644
--- a/genomix/genomix-pregelix/src/test/resources/jobs/P4ForMergeGraph.xml
+++ b/genomix/genomix-pregelix/src/test/resources/jobs/P4ForMergeGraph.xml
@@ -58,7 +58,7 @@
<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
<property><name>fs.default.name</name><value>file:///</value></property>
-<property><name>mapred.output.key.class</name><value>edu.uci.ics.genomix.oldtype.PositionWritable</value></property>
+<property><name>mapred.output.key.class</name><value>edu.uci.ics.genomix.type.KmerBytesWritable</value></property>
<property><name>tasktracker.http.threads</name><value>40</value></property>
<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>