add path merge test case
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/LogAlgorithmMessageWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/LogAlgorithmMessageWritable.java
index 0934227..69352a1 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/LogAlgorithmMessageWritable.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/LogAlgorithmMessageWritable.java
@@ -28,6 +28,8 @@
public LogAlgorithmMessageWritable(){
sourceVertexId = new VKmerBytesWritable(ThreeStepLogAlgorithmForPathMergeVertex.kmerSize);
chainVertexId = new VKmerBytesWritable(ThreeStepLogAlgorithmForPathMergeVertex.kmerSize);
+ adjMap = 0;
+ message = 0;
checkMessage = 0;
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueStateWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueStateWritable.java
index c1be540..057ab08 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueStateWritable.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueStateWritable.java
@@ -91,8 +91,8 @@
return GeneCode.getSymbolFromBitMap(adjMap);
return GeneCode.getSymbolFromBitMap(adjMap) + "\t" +
getLengthOfMergeChain() + "\t" +
- mergeChain.toString() + "\t" +
- state;
+ mergeChain.toString();
+ //+ "\t" + state;
}
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/NaiveAlgorithmForPathMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/NaiveAlgorithmForPathMergeVertex.java
index 04b8525..6eb6717 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/NaiveAlgorithmForPathMergeVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/NaiveAlgorithmForPathMergeVertex.java
@@ -118,10 +118,11 @@
public void sendMsgToPathVertex(){
if(!msg.isRear()){
destVertexId.set(getDestVertexIdFromChain(msg.getChainVertexId(), msg.getAdjMap()));
+ msg.set(getVertexId(), msg.getChainVertexId(), msg.getHeadVertexId(), (byte)0, msg.isRear());
}else{
destVertexId.set(msg.getHeadVertexId());
+ msg.set(msg.getSourceVertexId(), msg.getChainVertexId(), msg.getHeadVertexId(), (byte)0, msg.isRear());
}
- msg.set(getVertexId(), msg.getChainVertexId(), msg.getHeadVertexId(), (byte)0, msg.isRear());
sendMsg(destVertexId,msg);
}
/**
@@ -144,7 +145,7 @@
}else{// is Rear
chainVertexId.set(msg.getSourceVertexId());
getVertexValue().set(GraphVertexOperation.updateRightNeighberByVertexId(getVertexValue().getAdjMap(), chainVertexId, kmerSize),
- State.START_VERTEX, msg.getChainVertexId());
+ State.FINAL_VERTEX, msg.getChainVertexId());
setVertexValue(getVertexValue());
//String source = msg.getChainVertexId().toString();
//System.out.print("");
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/TwoStepLogAlgorithmForPathMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/TwoStepLogAlgorithmForPathMergeVertex.java
index ca5d56f..b613ec1 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/TwoStepLogAlgorithmForPathMergeVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/TwoStepLogAlgorithmForPathMergeVertex.java
@@ -275,7 +275,6 @@
*/
public void sendMsgToPathVertex(Iterator<LogAlgorithmMessageWritable> msgIterator){
if(getSuperstep() == 3){
- //msg.reset();
sendMsgToPathVertex(getVertexId(), getVertexValue().getAdjMap());
}
else{
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/CombineSequenceFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/CombineSequenceFile.java
index c7349dd..55ddcff 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/CombineSequenceFile.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/CombineSequenceFile.java
@@ -25,7 +25,7 @@
Path p = new Path("output");
//Path p2 = new Path("data/result");
- Path outFile = new Path("output");
+ Path outFile = new Path("output2");
SequenceFile.Reader reader;
SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
outFile, KmerBytesWritable.class, KmerCountValue.class,
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateSmallFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateSmallFile.java
index d97a2fd..c17abc9 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateSmallFile.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateSmallFile.java
@@ -41,8 +41,8 @@
// TODO Auto-generated method stub
Path dir = new Path("data");
Path inFile = new Path(dir, "part-0");
- Path outFile = new Path(dir, "part-0-out-5000000");
- generateNumOfLinesFromBigFile(inFile,outFile,5000000);
+ Path outFile = new Path(dir, "part-0-out-20000000");
+ generateNumOfLinesFromBigFile(inFile,outFile,20000000);
}
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
index 0729b96..249c293 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
@@ -10,19 +10,20 @@
import org.apache.hadoop.io.SequenceFile;
import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.genomix.pregelix.type.State;
import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.KmerCountValue;
public class GenerateTextFile {
public static void generateFromPathmergeResult() throws IOException{
- BufferedWriter bw = new BufferedWriter(new FileWriter("text/naive_LongPath"));
+ BufferedWriter bw = new BufferedWriter(new FileWriter("output2"));
Configuration conf = new Configuration();
FileSystem fileSys = FileSystem.get(conf);
for(int i = 0; i < 2; i++){
Path path = new Path("output/part-" + i);
SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
- KmerBytesWritable key = new KmerBytesWritable(5);
+ KmerBytesWritable key = new KmerBytesWritable(55);
ValueStateWritable value = new ValueStateWritable();
while(reader.next(key, value)){
@@ -37,6 +38,55 @@
}
bw.close();
}
+ public static void generateSpecificLengthChainFromNaivePathmergeResult(int maxLength) throws IOException{
+ BufferedWriter bw = new BufferedWriter(new FileWriter("naive_text_" + maxLength));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ for(int i = 0; i < 2; i++){
+ Path path = new Path("/home/anbangx/genomix_result/final_naive/part-" + i);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ KmerBytesWritable key = new KmerBytesWritable(55);
+ ValueStateWritable value = new ValueStateWritable();
+
+ while(reader.next(key, value)){
+ if (key == null || value == null){
+ break;
+ }
+ if(value.getLengthOfMergeChain() != -1 && value.getLengthOfMergeChain() <= maxLength){
+ bw.write(value.toString());
+ bw.newLine();
+ }
+ }
+ reader.close();
+ }
+ bw.close();
+ }
+
+ public static void generateSpecificLengthChainFromLogPathmergeResult(int maxLength) throws IOException{
+ BufferedWriter bw = new BufferedWriter(new FileWriter("log_text_" + maxLength));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ for(int i = 0; i < 2; i++){
+ Path path = new Path("/home/anbangx/genomix_result/improvelog2/part-" + i);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ KmerBytesWritable key = new KmerBytesWritable(55);
+ ValueStateWritable value = new ValueStateWritable();
+
+ while(reader.next(key, value)){
+ if (key == null || value == null){
+ break;
+ }
+ if(value.getLengthOfMergeChain() != -1 && value.getLengthOfMergeChain() <= maxLength
+ && value.getState() == State.FINAL_VERTEX){
+ bw.write(key.toString()
+ + "\t" + value.toString());
+ bw.newLine();
+ }
+ }
+ reader.close();
+ }
+ bw.close();
+ }
public static void generateFromGraphbuildResult() throws IOException{
BufferedWriter bw = new BufferedWriter(new FileWriter("textfile"));
Configuration conf = new Configuration();
@@ -63,6 +113,8 @@
public static void main(String[] args) throws IOException {
generateFromPathmergeResult();
//generateFromGraphbuildResult();
+ //generateSpecificLengthChainFromPathmergeResult(68);
+ //generateSpecificLengthChainFromLogPathmergeResult(68);
}
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ProcessFinalResult.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ProcessFinalResult.java
index a07cff2..358b99d 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ProcessFinalResult.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ProcessFinalResult.java
@@ -14,8 +14,8 @@
*/
public static void main(String[] args) throws Exception {
DecimalFormat df = new DecimalFormat("0.00");
- BufferedReader br = new BufferedReader(new FileReader("log2_unfinite"));
- BufferedWriter bw = new BufferedWriter(new FileWriter("report3"));
+ BufferedReader br = new BufferedReader(new FileReader("log2_unfinite_improve"));
+ BufferedWriter bw = new BufferedWriter(new FileWriter("report4"));
String line;
int i = 0;
double totalTime = 0;
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/util/GraphVertexOperation.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/util/GraphVertexOperation.java
index 5cb0c2b..1b4217f 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/util/GraphVertexOperation.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/util/GraphVertexOperation.java
@@ -4,18 +4,6 @@
import edu.uci.ics.genomix.type.VKmerBytesWritable;
public class GraphVertexOperation {
-
- /**
- * generate the valid data(byte[]) from BytesWritable
- */
- public static byte[] generateValidDataFromBytesWritable(VKmerBytesWritable bw){
- byte[] wholeBytes = bw.getBytes();
- int validNum = bw.getLength();
- byte[] validBytes = new byte[validNum];
- for(int i = 0; i < validNum; i++)
- validBytes[i] = wholeBytes[i];
- return validBytes;
- }
/**
* Single Vertex: in-degree = out-degree = 1
* @param vertexValue
@@ -49,7 +37,7 @@
public static byte updateRightNeighberByVertexId(byte oldVertexValue, VKmerBytesWritable neighberVertex, int k){
byte geneCode = neighberVertex.getGeneCodeAtPosition(k-1);
- byte newBit = GeneCode.getAdjBit(geneCode);
+ byte newBit = GeneCode.getBitMapFromGeneCode(geneCode); //getAdjBit
return (byte) ((byte)(oldVertexValue & 0xF0) | (byte) (newBit & 0x0F));
}
/**
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/CompareTest.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/CompareTest.java
new file mode 100644
index 0000000..2c7c7e6
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/CompareTest.java
@@ -0,0 +1,19 @@
+package edu.uci.ics.genomix.pregelix.pathmerge;
+
+import java.io.File;
+
+import org.junit.Test;
+
+import edu.uci.ics.pregelix.core.util.TestUtils;
+
+public class CompareTest {
+ public static final String PATH_TO_TESTSTORE = "testcase/pathmerge";
+ public static final String CHAIN_OUTPUT = PATH_TO_TESTSTORE + "chain";
+
+ @Test
+ public void test() throws Exception {
+ File naive = new File(CHAIN_OUTPUT + "/naive-sort");
+ File log = new File(CHAIN_OUTPUT + "/log-sort");
+ TestUtils.compareWithResult(naive, log);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/MergePathTest.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/MergePathTest.java
new file mode 100644
index 0000000..98eb03a
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/pathmerge/MergePathTest.java
@@ -0,0 +1,120 @@
+package edu.uci.ics.genomix.pregelix.pathmerge;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.genomix.pregelix.type.State;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+
+public class MergePathTest {
+ public static final String PATH_TO_TESTSTORE = "testcase/pathmerge/";
+ public static final String NAIVE_DATA_INPUT = "genomix_result/pathmerge/final_naive";
+ public static final String LOG_DATA_INPUT = "genomix_result/pathmerge/final_log";
+ public static final String TEXT_OUTPUT = PATH_TO_TESTSTORE + "textfile";
+ public static final String CHAIN_OUTPUT = PATH_TO_TESTSTORE + "chain";
+
+ private static int nc = 4;
+ private static int kmerSize = 55;
+ private static int maxLength = 68;
+
+ @Test
+ public void test() throws Exception {
+ FileUtils.forceMkdir(new File(PATH_TO_TESTSTORE));
+ FileUtils.cleanDirectory(new File(PATH_TO_TESTSTORE));
+ FileUtils.forceMkdir(new File(TEXT_OUTPUT));
+ FileUtils.cleanDirectory(new File(TEXT_OUTPUT));
+ FileUtils.forceMkdir(new File(CHAIN_OUTPUT));
+ FileUtils.cleanDirectory(new File(CHAIN_OUTPUT));
+ generateTextFromPathmergeResult(NAIVE_DATA_INPUT, TEXT_OUTPUT, "/naive");
+ generateTextFromPathmergeResult(LOG_DATA_INPUT, TEXT_OUTPUT, "/log");
+ generateSpecificLengthChainFromNaivePathmergeResult(NAIVE_DATA_INPUT, CHAIN_OUTPUT, maxLength);
+ generateSpecificLengthChainFromLogPathmergeResult(LOG_DATA_INPUT, CHAIN_OUTPUT, maxLength);
+ }
+
+ public static void generateTextFromPathmergeResult(String input, String outputDir, String fileName) throws IOException{
+ BufferedWriter bw = new BufferedWriter(new FileWriter(new File(outputDir + fileName)));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ for(int i = 0; i < nc; i++){
+ Path path = new Path(input + "/part-" + i);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ KmerBytesWritable key = new KmerBytesWritable(kmerSize);
+ ValueStateWritable value = new ValueStateWritable();
+
+ while(reader.next(key, value)){
+ if (key == null || value == null){
+ break;
+ }
+ if(value.getLengthOfMergeChain() != -1){
+ bw.write(value.getLengthOfMergeChain() + "\t" +
+ value.getMergeChain().toString());
+
+ bw.newLine();
+ }
+ }
+ reader.close();
+ }
+ bw.close();
+ }
+
+ public static void generateSpecificLengthChainFromNaivePathmergeResult(String input, String output, int maxLength) throws IOException{
+ BufferedWriter bw = new BufferedWriter(new FileWriter(new File(output + "/naive")));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ for(int i = 0; i < nc; i++){
+ Path path = new Path(input + "/part-" + i);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ KmerBytesWritable key = new KmerBytesWritable(kmerSize);
+ ValueStateWritable value = new ValueStateWritable();
+
+ while(reader.next(key, value)){
+ if (key == null || value == null){
+ break;
+ }
+ if(value.getLengthOfMergeChain() != -1 && value.getLengthOfMergeChain() <= maxLength){
+ bw.write(value.getLengthOfMergeChain() + "\t" +
+ value.getMergeChain().toString());
+ bw.newLine();
+ }
+ }
+ reader.close();
+ }
+ bw.close();
+ }
+
+ public static void generateSpecificLengthChainFromLogPathmergeResult(String input, String output, int maxLength) throws IOException{
+ BufferedWriter bw = new BufferedWriter(new FileWriter(new File(output + "/log")));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ for(int i = 0; i < nc; i++){
+ Path path = new Path(input + "/part-" + i);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ KmerBytesWritable key = new KmerBytesWritable(kmerSize);
+ ValueStateWritable value = new ValueStateWritable();
+
+ while(reader.next(key, value)){
+ if (key == null || value == null){
+ break;
+ }
+ if(value.getLengthOfMergeChain() != -1 && value.getLengthOfMergeChain() <= maxLength
+ && value.getState() == State.FINAL_VERTEX){
+ bw.write(value.getLengthOfMergeChain() + "\t" +
+ value.getMergeChain().toString());
+ bw.newLine();
+ }
+ }
+ reader.close();
+ }
+ bw.close();
+ }
+}