fix some bugs of log algorithm
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java
index 757d8f8..9947d1c 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java
@@ -45,9 +45,11 @@
* The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
* The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable.
*/
-public class LogAlgorithmForMergeGraphVertex extends Vertex<BytesWritable, ValueStateWritable, NullWritable, LogAlgorithmMessageWritable>{
+public class LogAlgorithmForMergeGraphVertex extends Vertex<BytesWritable, ValueStateWritable, NullWritable, LogAlgorithmMessageWritable>{
public static final String KMER_SIZE = "LogAlgorithmForMergeGraphVertex.kmerSize";
+ public static final String ITERATIONS = "MergeGraphVertex.iteration";
public static int kmerSize = -1;
+ private int maxIteration = -1;
private byte[] tmpVertexId;
private byte[] tmpDestVertexId;
@@ -60,18 +62,15 @@
/**
* Log Algorithm for path merge graph
*/
-
/**
- * Load KmerSize
+ * Load KmerSize, MaxIteration
*/
- public LogAlgorithmForMergeGraphVertex(){
-
- }
-
@Override
public void compute(Iterator<LogAlgorithmMessageWritable> msgIterator) {
if(kmerSize == -1)
kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ if (maxIteration < 0)
+ maxIteration = getContext().getConfiguration().getInt(ITERATIONS, 100);
tmpVertexId = GraphVertexOperation.generateValidDataFromBytesWritable(getVertexId());
tmpVal = getVertexValue();
if (getSuperstep() == 1) {
@@ -103,12 +102,12 @@
tmpVal.setState(State.MID_VERTEX);
setVertexValue(tmpVal);
}
- if(!GraphVertexOperation.isHeadVertex(tmpVal.getValue())
+ /*if(!GraphVertexOperation.isHeadVertex(tmpVal.getValue())
&& !GraphVertexOperation.isRearVertex(tmpVal.getValue())
&& !GraphVertexOperation.isRearVertex(tmpVal.getValue()))
- voteToHalt();
+ voteToHalt();*/
}
- else if(getSuperstep() == 2){
+ else if(getSuperstep() == 2 && getSuperstep() <= maxIteration){
while(msgIterator.hasNext()){
if(!GraphVertexOperation.isPathVertex(tmpVal.getValue())){
msgIterator.next();
@@ -116,7 +115,8 @@
}
else{
tmpMsg = msgIterator.next();
- if(tmpMsg.getMessage() == Message.START && tmpVal.getState() == State.MID_VERTEX){
+ if(tmpMsg.getMessage() == Message.START &&
+ (tmpVal.getState() == State.MID_VERTEX || tmpVal.getState() == State.END_VERTEX)){
tmpVal.setState(State.START_VERTEX);
setVertexValue(tmpVal);
}
@@ -131,7 +131,7 @@
}
}
//head node sends message to path node
- else if(getSuperstep()%3 == 0){
+ else if(getSuperstep()%3 == 0 && getSuperstep() <= maxIteration){
if(getSuperstep() == 3){
tmpMsg = new LogAlgorithmMessageWritable();
if(Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpVal.getValue() & 0x0F)) == -1)
@@ -167,7 +167,7 @@
else{
tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(kmerSize, lastKmer,
0, lastKmer.length,
- Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpVal.getValue() & 0x0F))); //tmpMsg.getNeighberInfo()
+ Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpVal.getValue() & 0x0F)));
destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
if(tmpVal.getState() == State.START_VERTEX){
tmpMsg.setMessage(Message.START);
@@ -186,7 +186,7 @@
}
//path node sends message back to head node
- else if(getSuperstep()%3 == 1){
+ else if(getSuperstep()%3 == 1 && getSuperstep() <= maxIteration){
if(msgIterator.hasNext()){
tmpMsg = msgIterator.next();
int message = tmpMsg.getMessage();
@@ -217,15 +217,16 @@
}
destVertexId.set(tmpMsg.getSourceVertexId(), 0, tmpMsg.getSourceVertexId().length);
sendMsg(destVertexId,tmpMsg);
- voteToHalt();
+ //voteToHalt();
}
else{
if(getVertexValue().getState() != State.START_VERTEX
- && getVertexValue().getState() != State.END_VERTEX && getVertexValue().getState() != State.FINAL_DELETE)
+ && getVertexValue().getState() != State.END_VERTEX && getVertexValue().getState() != State.FINAL_DELETE){
deleteVertex(getVertexId()); //killSelf because it doesn't receive any message
+ }
}
}
- else if(getSuperstep()%3 == 2){
+ else if(getSuperstep()%3 == 2 && getSuperstep() <= maxIteration){
if(tmpVal.getState() == State.TODELETE)
deleteVertex(getVertexId()); //killSelf
else{
@@ -275,7 +276,6 @@
voteToHalt();
}
}
-
}
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java
index 0f244dc..067b70d 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java
@@ -63,7 +63,7 @@
* @throws
*/
/**
- * Load KmerSize
+ * Load KmerSize, MaxIteration
*/
@Override
public void compute(Iterator<MessageWritable> msgIterator) {
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphOutputFormat.java
index aa81066..24fca7b 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphOutputFormat.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphOutputFormat.java
@@ -7,11 +7,13 @@
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import edu.uci.ics.genomix.pregelix.GraphVertexOperation;
import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryVertexOutputFormat;
import edu.uci.ics.pregelix.api.graph.Vertex;
import edu.uci.ics.pregelix.api.io.VertexWriter;
import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
import edu.uci.ics.genomix.pregelix.type.State;
+import edu.uci.ics.genomix.type.Kmer;
public class LogAlgorithmForMergeGraphOutputFormat extends
BinaryVertexOutputFormat<BytesWritable, ValueStateWritable, NullWritable> {
@@ -37,7 +39,8 @@
@Override
public void writeVertex(Vertex<BytesWritable, ValueStateWritable, NullWritable, ?> vertex) throws IOException,
InterruptedException {
- if(vertex.getVertexValue().getState() != State.FINAL_DELETE)
+ if(vertex.getVertexValue().getState() != State.FINAL_DELETE
+ && vertex.getVertexValue().getState() != State.END_VERTEX)
getRecordWriter().write(vertex.getVertexId(),vertex.getVertexValue());
}
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueStateWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueStateWritable.java
index 9acda4e..f101d7f 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueStateWritable.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/ValueStateWritable.java
@@ -5,6 +5,7 @@
import org.apache.hadoop.io.WritableComparable;
import edu.uci.ics.genomix.pregelix.type.State;
+import edu.uci.ics.genomix.type.Kmer;
public class ValueStateWritable implements WritableComparable<ValueStateWritable> {
@@ -63,6 +64,8 @@
value = in.readByte();
state = in.readInt();
lengthOfMergeChain = in.readInt();
+ if(lengthOfMergeChain < 0)
+ System.out.println();
if(lengthOfMergeChain != 0){
mergeChain = new byte[(lengthOfMergeChain-1)/4 + 1];
in.readFully(mergeChain);
@@ -86,4 +89,14 @@
return 0;
}
+ @Override
+ public String toString() {
+ if(lengthOfMergeChain == 0)
+ return Kmer.GENE_CODE.getSymbolFromBitMap(value);
+ return Kmer.GENE_CODE.getSymbolFromBitMap(value) + "\t" +
+ lengthOfMergeChain + "\t" +
+ Kmer.recoverKmerFrom(lengthOfMergeChain, mergeChain, 0, mergeChain.length) + "\t" +
+ state;
+ }
+
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
new file mode 100644
index 0000000..8ab6990
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
@@ -0,0 +1,50 @@
+package edu.uci.ics.genomix.pregelix.sequencefile;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.genomix.type.Kmer;
+
+public class GenerateTextFile {
+
+ public static void generate() throws IOException{
+ BufferedWriter bw = new BufferedWriter(new FileWriter("text/naive_CyclePath"));
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+ for(int i = 0; i < 2; i++){
+ Path path = new Path("output/part-" + i);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, path, conf);
+ BytesWritable key = new BytesWritable();
+ ValueStateWritable value = new ValueStateWritable();
+
+ while(reader.next(key, value)){
+ if (key == null || value == null){
+ break;
+ }
+ bw.write(Kmer.recoverKmerFrom(5, key.getBytes(), 0,
+ key.getLength())
+ + "\t" + value.toString());
+ bw.newLine();
+ }
+ reader.close();
+ }
+ bw.close();
+ }
+ /**
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException {
+ // TODO Auto-generated method stub
+ generate();
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
index 53340dc..763d46e 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
@@ -71,7 +71,7 @@
job.setOutputValueClass(ValueStateWritable.class);
FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
- job.getConfiguration().setInt(LogAlgorithmForMergeGraphVertex.KMER_SIZE, 55);
+ job.getConfiguration().setInt(LogAlgorithmForMergeGraphVertex.KMER_SIZE, 5);
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
}
@@ -86,8 +86,8 @@
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
//genLoadGraph();
- genMergeGraph();
- //genLogAlgorithmForMergeGraph();
+ //genMergeGraph();
+ genLogAlgorithmForMergeGraph();
//genSequenceLoadGraph();
//genBasicBinaryLoadGraph();
}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestSuite.java
index ba0f4c3..8d08f2b 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestSuite.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestSuite.java
@@ -40,7 +40,7 @@
private static final String PATH_TO_ONLY = "src/test/resources/only.txt";
private static final String FILE_EXTENSION_OF_RESULTS = "result";
- private static final String DATA_PATH = "data/input/part-1-out-500000";// sequenceShortFileMergeTest
+ private static final String DATA_PATH = "data/result/BridgePath";// sequenceShortFileMergeTest
private static final String HDFS_PATH = "/webmap/";
private static final String HYRACKS_APP_NAME = "pregelix";
diff --git a/genomix/genomix-pregelix/text/BridgePath/log_BridgePath b/genomix/genomix-pregelix/text/BridgePath/log_BridgePath
new file mode 100644
index 0000000..cab1281
--- /dev/null
+++ b/genomix/genomix-pregelix/text/BridgePath/log_BridgePath
@@ -0,0 +1,6 @@
+TTCCA T|C
+CCGTG CT|
+TCCAC T|CT
+CCACC T|G 9 CCACCCCGT 5
+TTTCC |A
+CCACT T|G 9 CCACTCCGT 5
diff --git a/genomix/genomix-pregelix/text/BridgePath/naive_BridgePath b/genomix/genomix-pregelix/text/BridgePath/naive_BridgePath
new file mode 100644
index 0000000..c669dba
--- /dev/null
+++ b/genomix/genomix-pregelix/text/BridgePath/naive_BridgePath
@@ -0,0 +1,6 @@
+TTCCA T|C 5 TTCCA 1
+CCGTG CT|
+TCCAC T|CT
+CCACC T|G 9 CCACCCCGT 1
+TTTCC |A
+CCACT T|C
diff --git a/genomix/genomix-pregelix/text/CyclePath/log_CyclePath b/genomix/genomix-pregelix/text/CyclePath/log_CyclePath
new file mode 100644
index 0000000..d0ee84f
--- /dev/null
+++ b/genomix/genomix-pregelix/text/CyclePath/log_CyclePath
@@ -0,0 +1,3 @@
+GCAAC |T
+AACTT C|T 12 AACTTCATCAAC 5
+CAACT GT|T
diff --git a/genomix/genomix-pregelix/text/CyclePath/naive_CyclePath b/genomix/genomix-pregelix/text/CyclePath/naive_CyclePath
new file mode 100644
index 0000000..5cdb7c2
--- /dev/null
+++ b/genomix/genomix-pregelix/text/CyclePath/naive_CyclePath
@@ -0,0 +1,3 @@
+GCAAC |T
+AACTT C|T 12 AACTTCATCAAC 1
+CAACT GT|T
diff --git a/genomix/genomix-pregelix/text/LongPath/log_LongPath b/genomix/genomix-pregelix/text/LongPath/log_LongPath
new file mode 100644
index 0000000..98cb21e
--- /dev/null
+++ b/genomix/genomix-pregelix/text/LongPath/log_LongPath
@@ -0,0 +1,3 @@
+GCCTC G|G 15 GCCTCAGTACGCCCG 5
+CCCGG G|
+GGCCT |C
diff --git a/genomix/genomix-pregelix/text/LongPath/naive_LongPath b/genomix/genomix-pregelix/text/LongPath/naive_LongPath
new file mode 100644
index 0000000..c1a472e
--- /dev/null
+++ b/genomix/genomix-pregelix/text/LongPath/naive_LongPath
@@ -0,0 +1,3 @@
+GCCTC G|G 15 GCCTCAGTACGCCCG 1
+CCCGG G|
+GGCCT |C
diff --git a/genomix/genomix-pregelix/text/Path/log_Path b/genomix/genomix-pregelix/text/Path/log_Path
new file mode 100644
index 0000000..3a46528
--- /dev/null
+++ b/genomix/genomix-pregelix/text/Path/log_Path
@@ -0,0 +1,3 @@
+GCCTC G|G 10 GCCTCAGTAC 5
+GGCCT |C
+GTACG A|
diff --git a/genomix/genomix-pregelix/text/Path/naive_Path b/genomix/genomix-pregelix/text/Path/naive_Path
new file mode 100644
index 0000000..b8d2aeb
--- /dev/null
+++ b/genomix/genomix-pregelix/text/Path/naive_Path
@@ -0,0 +1,3 @@
+GCCTC G|G 10 GCCTCAGTAC 1
+GGCCT |C
+GTACG A|
diff --git a/genomix/genomix-pregelix/text/SimplePath/log_SimplePath b/genomix/genomix-pregelix/text/SimplePath/log_SimplePath
new file mode 100644
index 0000000..5c149ac
--- /dev/null
+++ b/genomix/genomix-pregelix/text/SimplePath/log_SimplePath
@@ -0,0 +1,9 @@
+CGGCA G|A 8 CGGCAAGA 5
+AGCAC C|
+AAGAC |A
+GCGGC |A
+GCATC C|
+ATATC |G
+TATCG A|C 8 TATCGCAT 5
+AAGAA C|
+AGACA A|C 8 AGACAGCA 5
diff --git a/genomix/genomix-pregelix/text/SimplePath/naive_SimplePath b/genomix/genomix-pregelix/text/SimplePath/naive_SimplePath
new file mode 100644
index 0000000..cf53cc8
--- /dev/null
+++ b/genomix/genomix-pregelix/text/SimplePath/naive_SimplePath
@@ -0,0 +1,9 @@
+CGGCA G|A 8 CGGCAAGA 1
+AGCAC C|
+AAGAC |A
+GCGGC |A
+GCATC C|
+ATATC |G
+TATCG A|C 8 TATCGCAT 1
+AAGAA C|
+AGACA A|C 8 AGACAGCA 1
diff --git a/genomix/genomix-pregelix/text/SinglePath/log_SinglePath b/genomix/genomix-pregelix/text/SinglePath/log_SinglePath
new file mode 100644
index 0000000..f1371ec
--- /dev/null
+++ b/genomix/genomix-pregelix/text/SinglePath/log_SinglePath
@@ -0,0 +1,3 @@
+ACAGT A|
+GACAA A|T 8 GACAACAG 5
+AGACA |A
diff --git a/genomix/genomix-pregelix/text/SinglePath/naive_SinglePath b/genomix/genomix-pregelix/text/SinglePath/naive_SinglePath
new file mode 100644
index 0000000..b736667
--- /dev/null
+++ b/genomix/genomix-pregelix/text/SinglePath/naive_SinglePath
@@ -0,0 +1,3 @@
+ACAGT A|
+GACAA A|T 8 GACAACAG 1
+AGACA |A
diff --git a/genomix/genomix-pregelix/text/TreePath/log_TreePath b/genomix/genomix-pregelix/text/TreePath/log_TreePath
new file mode 100644
index 0000000..0b9f198
--- /dev/null
+++ b/genomix/genomix-pregelix/text/TreePath/log_TreePath
@@ -0,0 +1,9 @@
+CAGTA T|AC
+AGTAC C|G 10 AGTACGCCCG 5
+ATCCC T|
+GCCTC G|A 8 GCCTCAGT 5
+CCCGG G|
+GGCCT |CG
+AGTAA C|C 10 AGTAACTAAA 5
+TAAAC C|
+GCCTG G|C 12 GCCTGGCTATCC 5
diff --git a/genomix/genomix-pregelix/text/TreePath/naive_TreePath b/genomix/genomix-pregelix/text/TreePath/naive_TreePath
new file mode 100644
index 0000000..39dcbaa
--- /dev/null
+++ b/genomix/genomix-pregelix/text/TreePath/naive_TreePath
@@ -0,0 +1,9 @@
+CAGTA T|AC
+AGTAC C|G 10 AGTACGCCCG 1
+ATCCC T|
+GCCTC G|A 8 GCCTCAGT 1
+CCCGG G|
+GGCCT |CG
+AGTAA C|C 10 AGTAACTAAA 1
+TAAAC C|
+GCCTG G|C 12 GCCTGGCTATCC 1