add test cases for path merge and other data clean patterns
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index d4dab00..54abda3 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -57,6 +57,16 @@
reverseReverseList.set(RRList);
kmer.set(kmer);
}
+
+ public void set(PositionWritable nodeID, PositionListWritable FFList, PositionListWritable FRList,
+ PositionListWritable RFList, PositionListWritable RRList, KmerBytesWritable kmer){
+ this.nodeID.set(nodeID);
+ this.forwardForwardList.set(FFList);
+ this.forwardReverseList.set(FRList);
+ this.reverseForwardList.set(RFList);
+ this.reverseReverseList.set(RRList);
+ this.kmer.set(kmer);
+ }
public void setNodeID(PositionWritable ref) {
this.setNodeID(ref.getReadID(), ref.getPosInRead());
diff --git a/genomix/genomix-pregelix/data/TipAddGraph/txt/.test.crc b/genomix/genomix-pregelix/data/TipAddGraph/txt/.test.crc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TipAddGraph/txt/.test.crc
diff --git a/genomix/genomix-pregelix/data/TipAddGraph/txt/test b/genomix/genomix-pregelix/data/TipAddGraph/txt/test
new file mode 100755
index 0000000..5c36d0a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/TipAddGraph/txt/test
@@ -0,0 +1,7 @@
+(1,5) ([(1,6)] [] [] [(1,4)] GAA)
+(2,1) ([] [] [] [(1,4)] )
+(1,1) ([(1,2)] [] [] [] AAT)
+(1,2) ([(1,3)] [] [] [(1,1)] ATA)
+(1,4) ([(1,5),(2,1)] [] [] [(1,3)] AGA)
+(1,3) ([(1,4)] [] [] [(1,2)] TAG)
+(1,6) ([] [] [] [(1,5)] AAC)
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/read.txt b/genomix/genomix-pregelix/data/graphbuild.test/read.txt
new file mode 100755
index 0000000..bd5f3c4
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/read.txt
@@ -0,0 +1 @@
+1 AATAGAAC
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/repeat.txt b/genomix/genomix-pregelix/data/graphbuild.test/repeat.txt
new file mode 100755
index 0000000..64e3de8
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/repeat.txt
@@ -0,0 +1,2 @@
+1 AATAGAAC
+2 TAGAACTG
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/repeat2.txt b/genomix/genomix-pregelix/data/graphbuild.test/repeat2.txt
new file mode 100755
index 0000000..c065d06
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/repeat2.txt
@@ -0,0 +1,2 @@
+1 AATAGAAC
+2 TAGAAGAT
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/tip.txt b/genomix/genomix-pregelix/data/graphbuild.test/tip.txt
new file mode 100755
index 0000000..62a6fd7
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/tip.txt
@@ -0,0 +1,2 @@
+1 AATAGAAC
+2 GAAGGTCC
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/tworeads.txt b/genomix/genomix-pregelix/data/graphbuild.test/tworeads.txt
new file mode 100755
index 0000000..62a6fd7
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/tworeads.txt
@@ -0,0 +1,2 @@
+1 AATAGAAC
+2 GAAGGTCC
diff --git a/genomix/genomix-pregelix/data/input/read/part-0 b/genomix/genomix-pregelix/data/input/read/part-0
new file mode 100755
index 0000000..6595dfe
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/read/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/read/part-1 b/genomix/genomix-pregelix/data/input/read/part-1
new file mode 100755
index 0000000..072d5f9
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/read/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/read/part-2 b/genomix/genomix-pregelix/data/input/read/part-2
new file mode 100755
index 0000000..3d132ac
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/read/part-2
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/read/part-3 b/genomix/genomix-pregelix/data/input/read/part-3
new file mode 100755
index 0000000..df12383
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/read/part-3
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/test/part-0 b/genomix/genomix-pregelix/data/input/test/part-0
new file mode 100755
index 0000000..0b15880
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/test/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/test/part-1 b/genomix/genomix-pregelix/data/input/test/part-1
new file mode 100755
index 0000000..b68421a
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/test/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/test/part-2 b/genomix/genomix-pregelix/data/input/test/part-2
new file mode 100755
index 0000000..9098ad0
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/test/part-2
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/test/part-3 b/genomix/genomix-pregelix/data/input/test/part-3
new file mode 100755
index 0000000..9cfb024
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/test/part-3
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/test/result.graphbuild.txt.bin b/genomix/genomix-pregelix/data/input/test/result.graphbuild.txt.bin
deleted file mode 100644
index 4865aeb..0000000
--- a/genomix/genomix-pregelix/data/input/test/result.graphbuild.txt.bin
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/tipremove/part-0 b/genomix/genomix-pregelix/data/input/tipremove/part-0
new file mode 100755
index 0000000..a10ab8b
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/tipremove/part-0
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/tipremove/part-1 b/genomix/genomix-pregelix/data/input/tipremove/part-1
new file mode 100755
index 0000000..20a3078
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/tipremove/part-1
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/tipremove/part-2 b/genomix/genomix-pregelix/data/input/tipremove/part-2
new file mode 100755
index 0000000..aed9265
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/tipremove/part-2
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/tipremove/part-3 b/genomix/genomix-pregelix/data/input/tipremove/part-3
new file mode 100755
index 0000000..8af7223
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/tipremove/part-3
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/tipremove/part-4 b/genomix/genomix-pregelix/data/input/tipremove/part-4
new file mode 100755
index 0000000..2fd77a1
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/tipremove/part-4
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/tipremove/part-5 b/genomix/genomix-pregelix/data/input/tipremove/part-5
new file mode 100755
index 0000000..7eb1649
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/tipremove/part-5
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/tipremove/part-6 b/genomix/genomix-pregelix/data/input/tipremove/part-6
new file mode 100755
index 0000000..8ae2314
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/tipremove/part-6
Binary files differ
diff --git a/genomix/genomix-pregelix/data/input/tipremove/part-7 b/genomix/genomix-pregelix/data/input/tipremove/part-7
new file mode 100755
index 0000000..d2b9342
--- /dev/null
+++ b/genomix/genomix-pregelix/data/input/tipremove/part-7
Binary files differ
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryDataCleanVertexOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryDataCleanVertexOutputFormat.java
new file mode 100644
index 0000000..efe41d6
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryDataCleanVertexOutputFormat.java
@@ -0,0 +1,102 @@
+package edu.uci.ics.genomix.pregelix.api.io.binary;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+import edu.uci.ics.pregelix.api.io.VertexOutputFormat;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+
+/**
+ * Abstract class that users should subclass to use their own text based vertex
+ * output format.
+ *
+ * @param <I>
+ * Vertex index value
+ * @param <V>
+ * Vertex value
+ * @param <E>
+ * Edge value
+ */
+@SuppressWarnings("rawtypes")
+public abstract class BinaryDataCleanVertexOutputFormat<I extends WritableComparable, V extends Writable, E extends Writable>
+ extends VertexOutputFormat<I, V, E> {
+ /** Uses the SequenceFileOutputFormat to do everything */
+ protected SequenceFileOutputFormat binaryOutputFormat = new SequenceFileOutputFormat();
+
+ /**
+ * Abstract class to be implemented by the user based on their specific
+ * vertex output. Easiest to ignore the key value separator and only use key
+ * instead.
+ *
+ * @param <I>
+ * Vertex index value
+ * @param <V>
+ * Vertex value
+ * @param <E>
+ * Edge value
+ */
+ public static abstract class BinaryVertexWriter<I extends WritableComparable, V extends Writable, E extends Writable>
+ implements VertexWriter<I, V, E> {
+ /** Context passed to initialize */
+ private TaskAttemptContext context;
+ /** Internal line record writer */
+ private final RecordWriter<PositionWritable, ValueStateWritable> lineRecordWriter;
+
+ /**
+ * Initialize with the LineRecordWriter.
+ *
+ * @param lineRecordWriter
+ * Line record writer from SequenceFileOutputFormat
+ */
+ public BinaryVertexWriter(RecordWriter<PositionWritable, ValueStateWritable> lineRecordWriter) {
+ this.lineRecordWriter = lineRecordWriter;
+ }
+
+ @Override
+ public void initialize(TaskAttemptContext context) throws IOException {
+ this.context = context;
+ }
+
+ @Override
+ public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+ lineRecordWriter.close(context);
+ }
+
+ /**
+ * Get the line record writer.
+ *
+ * @return Record writer to be used for writing.
+ */
+ public RecordWriter<PositionWritable, ValueStateWritable> getRecordWriter() {
+ return lineRecordWriter;
+ }
+
+ /**
+ * Get the context.
+ *
+ * @return Context passed to initialize.
+ */
+ public TaskAttemptContext getContext() {
+ return context;
+ }
+ }
+
+ @Override
+ public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
+ binaryOutputFormat.checkOutputSpecs(context);
+ }
+
+ @Override
+ public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
+ return binaryOutputFormat.getOutputCommitter(context);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryVertexOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryVertexOutputFormat.java
index 8fbd1ce..eb78fff 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryVertexOutputFormat.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/api/io/binary/BinaryVertexOutputFormat.java
@@ -2,6 +2,7 @@
import java.io.IOException;
+import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.JobContext;
@@ -10,8 +11,7 @@
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
-import edu.uci.ics.genomix.type.PositionWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
import edu.uci.ics.pregelix.api.io.VertexOutputFormat;
import edu.uci.ics.pregelix.api.io.VertexWriter;
@@ -49,7 +49,7 @@
/** Context passed to initialize */
private TaskAttemptContext context;
/** Internal line record writer */
- private final RecordWriter<PositionWritable, ValueStateWritable> lineRecordWriter;
+ private final RecordWriter<NodeWritable, NullWritable> lineRecordWriter;
/**
* Initialize with the LineRecordWriter.
@@ -57,7 +57,7 @@
* @param lineRecordWriter
* Line record writer from SequenceFileOutputFormat
*/
- public BinaryVertexWriter(RecordWriter<PositionWritable, ValueStateWritable> lineRecordWriter) {
+ public BinaryVertexWriter(RecordWriter<NodeWritable, NullWritable> lineRecordWriter) {
this.lineRecordWriter = lineRecordWriter;
}
@@ -76,7 +76,7 @@
*
* @return Record writer to be used for writing.
*/
- public RecordWriter<PositionWritable, ValueStateWritable> getRecordWriter() {
+ public RecordWriter<NodeWritable, NullWritable> getRecordWriter() {
return lineRecordWriter;
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/DataCleanOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/DataCleanOutputFormat.java
index 40abd3e..2f2bd5c 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/DataCleanOutputFormat.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/DataCleanOutputFormat.java
@@ -6,14 +6,14 @@
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryVertexOutputFormat;
+import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryDataCleanVertexOutputFormat;
import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
import edu.uci.ics.genomix.type.PositionWritable;
import edu.uci.ics.pregelix.api.graph.Vertex;
import edu.uci.ics.pregelix.api.io.VertexWriter;
public class DataCleanOutputFormat extends
- BinaryVertexOutputFormat<PositionWritable, ValueStateWritable, NullWritable> {
+ BinaryDataCleanVertexOutputFormat<PositionWritable, ValueStateWritable, NullWritable> {
@Override
public VertexWriter<PositionWritable, ValueStateWritable, NullWritable> createVertexWriter(
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForPathMergeOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForPathMergeOutputFormat.java
index 8a04292..a26c075 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForPathMergeOutputFormat.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForPathMergeOutputFormat.java
@@ -10,7 +10,7 @@
import edu.uci.ics.pregelix.api.graph.Vertex;
import edu.uci.ics.pregelix.api.io.VertexWriter;
import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
-import edu.uci.ics.genomix.pregelix.type.State;
+import edu.uci.ics.genomix.type.NodeWritable;
import edu.uci.ics.genomix.type.PositionWritable;
public class LogAlgorithmForPathMergeOutputFormat extends
@@ -20,7 +20,7 @@
public VertexWriter<PositionWritable, ValueStateWritable, NullWritable> createVertexWriter(
TaskAttemptContext context) throws IOException, InterruptedException {
@SuppressWarnings("unchecked")
- RecordWriter<PositionWritable, ValueStateWritable> recordWriter = binaryOutputFormat.getRecordWriter(context);
+ RecordWriter<NodeWritable, NullWritable> recordWriter = binaryOutputFormat.getRecordWriter(context);
return new BinaryLoadGraphVertexWriter(recordWriter);
}
@@ -29,17 +29,20 @@
*/
public static class BinaryLoadGraphVertexWriter extends
BinaryVertexWriter<PositionWritable, ValueStateWritable, NullWritable> {
-
- public BinaryLoadGraphVertexWriter(RecordWriter<PositionWritable, ValueStateWritable> lineRecordWriter) {
+ private NodeWritable node = new NodeWritable();
+ private NullWritable nul = NullWritable.get();
+
+ public BinaryLoadGraphVertexWriter(RecordWriter<NodeWritable, NullWritable> lineRecordWriter) {
super(lineRecordWriter);
}
@Override
public void writeVertex(Vertex<PositionWritable, ValueStateWritable, NullWritable, ?> vertex)
throws IOException, InterruptedException {
- if (vertex.getVertexValue().getState() != State.END_VERTEX) {
- getRecordWriter().write(vertex.getVertexId(), vertex.getVertexValue());
- }
+ node.set(vertex.getVertexId(), vertex.getVertexValue().getFFList(),
+ vertex.getVertexValue().getFRList(), vertex.getVertexValue().getRFList(),
+ vertex.getVertexValue().getRRList(), vertex.getVertexValue().getMergeChain());
+ getRecordWriter().write(node, nul);
}
}
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/NaiveAlgorithmForPathMergeOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/NaiveAlgorithmForPathMergeOutputFormat.java
index fe3b12d..7e157bb 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/NaiveAlgorithmForPathMergeOutputFormat.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/NaiveAlgorithmForPathMergeOutputFormat.java
@@ -8,6 +8,7 @@
import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryVertexOutputFormat;
import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
import edu.uci.ics.genomix.type.PositionWritable;
import edu.uci.ics.pregelix.api.graph.Vertex;
import edu.uci.ics.pregelix.api.io.VertexWriter;
@@ -19,7 +20,7 @@
public VertexWriter<PositionWritable, ValueStateWritable, NullWritable> createVertexWriter(
TaskAttemptContext context) throws IOException, InterruptedException {
@SuppressWarnings("unchecked")
- RecordWriter<PositionWritable, ValueStateWritable> recordWriter = binaryOutputFormat.getRecordWriter(context);
+ RecordWriter<NodeWritable, NullWritable> recordWriter = binaryOutputFormat.getRecordWriter(context);
return new BinaryLoadGraphVertexWriter(recordWriter);
}
@@ -28,14 +29,20 @@
*/
public static class BinaryLoadGraphVertexWriter extends
BinaryVertexWriter<PositionWritable, ValueStateWritable, NullWritable> {
- public BinaryLoadGraphVertexWriter(RecordWriter<PositionWritable, ValueStateWritable> lineRecordWriter) {
+ private NodeWritable node = new NodeWritable();
+ private NullWritable nullWritable = NullWritable.get();
+
+ public BinaryLoadGraphVertexWriter(RecordWriter<NodeWritable, NullWritable> lineRecordWriter) {
super(lineRecordWriter);
}
@Override
public void writeVertex(Vertex<PositionWritable, ValueStateWritable, NullWritable, ?> vertex)
throws IOException, InterruptedException {
- getRecordWriter().write(vertex.getVertexId(), vertex.getVertexValue());
+ node.set(vertex.getVertexId(), vertex.getVertexValue().getFFList(),
+ vertex.getVertexValue().getFRList(), vertex.getVertexValue().getRFList(),
+ vertex.getVertexValue().getRRList(), vertex.getVertexValue().getMergeChain());
+ getRecordWriter().write(node, nullWritable);
}
}
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/NaiveAlgorithmForPathMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/NaiveAlgorithmForPathMergeVertex.java
index 722206a..3d0d395 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/NaiveAlgorithmForPathMergeVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/NaiveAlgorithmForPathMergeVertex.java
@@ -139,6 +139,14 @@
outgoingMsg.setMessage(Message.END);
sendMsgToAllPreviousNodes(getVertexValue());
}
+ if (VertexUtil.isHeadWithoutIndegree(getVertexValue())){
+ outgoingMsg.setMessage(Message.START);
+ sendMsg(getVertexId(), outgoingMsg); //send to itself
+ }
+ if (VertexUtil.isRearWithoutOutdegree(getVertexValue())){
+ outgoingMsg.setMessage(Message.END);
+ sendMsg(getVertexId(), outgoingMsg); //send to itself
+ }
}
/**
@@ -146,7 +154,9 @@
*/
public void initState(Iterator<MessageWritable> msgIterator) {
while (msgIterator.hasNext()) {
- if (!VertexUtil.isPathVertex(getVertexValue())) {
+ if (!VertexUtil.isPathVertex(getVertexValue())
+ || !VertexUtil.isHeadWithoutIndegree(getVertexValue())
+ || !VertexUtil.isRearWithoutOutdegree(getVertexValue())) {
msgIterator.next();
voteToHalt();
} else {
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipAddVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipAddVertex.java
new file mode 100644
index 0000000..2966bb1
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipAddVertex.java
@@ -0,0 +1,108 @@
+package edu.uci.ics.genomix.pregelix.operator.tipremove;
+
+import java.util.Iterator;
+import org.apache.hadoop.io.NullWritable;
+
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.genomix.pregelix.client.Client;
+import edu.uci.ics.genomix.pregelix.format.DataCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.DataCleanOutputFormat;
+import edu.uci.ics.genomix.pregelix.io.MessageWritable;
+import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
+
+/*
+ * vertexId: BytesWritable
+ * vertexValue: ByteWritable
+ * edgeValue: NullWritable
+ * message: MessageWritable
+ *
+ * DNA:
+ * A: 00
+ * C: 01
+ * G: 10
+ * T: 11
+ *
+ * succeed node
+ * A 00000001 1
+ * G 00000010 2
+ * C 00000100 4
+ * T 00001000 8
+ * precursor node
+ * A 00010000 16
+ * G 00100000 32
+ * C 01000000 64
+ * T 10000000 128
+ *
+ * For example, ONE LINE in input file: 00,01,10 0001,0010,
+ * That means that vertexId is ACG, its succeed node is A and its precursor node is C.
+ * The succeed node and precursor node will be stored in vertexValue and we don't use edgeValue.
+ * The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable.
+ */
+/**
+ * Remove tip or single node when l > constant
+ */
+public class TipAddVertex extends
+ Vertex<PositionWritable, ValueStateWritable, NullWritable, MessageWritable> {
+ public static final String KMER_SIZE = "TipAddVertex.kmerSize";
+ public static int kmerSize = -1;
+
+ /**
+ * initiate kmerSize, length
+ */
+ public void initVertex() {
+ if (kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void compute(Iterator<MessageWritable> msgIterator) {
+ initVertex();
+ if(getSuperstep() == 1){
+ if(getVertexId().getReadID() == 1 && getVertexId().getPosInRead() == 4){
+ getVertexValue().getFFList().append(2, (byte)1);
+
+ //add tip vertex
+ @SuppressWarnings("rawtypes")
+ Vertex vertex = (Vertex) BspUtils.createVertex(getContext().getConfiguration());
+ vertex.getMsgList().clear();
+ vertex.getEdges().clear();
+ PositionWritable vertexId = new PositionWritable();
+ ValueStateWritable vertexValue = new ValueStateWritable();
+ /**
+ * set the src vertex id
+ */
+ vertexId.set(2, (byte)1);
+ vertex.setVertexId(vertexId);
+ /**
+ * set the vertex value
+ */
+ PositionListWritable plist = new PositionListWritable();
+ plist.append(new PositionWritable(1, (byte)4));
+ vertexValue.setRRList(plist);
+ vertex.setVertexValue(vertexValue);
+
+ addVertex(vertexId, vertex);
+ }
+ }
+ voteToHalt();
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(TipRemoveVertex.class.getSimpleName());
+ job.setVertexClass(TipRemoveVertex.class);
+ /**
+ * BinaryInput and BinaryOutput
+ */
+ job.setVertexInputFormatClass(DataCleanInputFormat.class);
+ job.setVertexOutputFormatClass(DataCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(PositionWritable.class);
+ job.setOutputValueClass(ValueStateWritable.class);
+ Client.run(args, job);
+ }
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipRemoveVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipRemoveVertex.java
index 4a174ec..0d5e518 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipRemoveVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/tipremove/TipRemoveVertex.java
@@ -55,6 +55,8 @@
private MessageWritable incomingMsg = new MessageWritable();
private MessageWritable outgoingMsg = new MessageWritable();
+ Iterator<PositionWritable> iterator;
+ PositionWritable pos = new PositionWritable();
/**
* initiate kmerSize, length
*/
@@ -62,7 +64,7 @@
if (kmerSize == -1)
kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
if(length == -1)
- length = getContext().getConfiguration().getInt(LENGTH, kmerSize + 5);
+ length = getContext().getConfiguration().getInt(LENGTH, kmerSize); //kmerSize + 5
outgoingMsg.reset();
}
@@ -71,7 +73,7 @@
initVertex();
if(getSuperstep() == 1){
if(VertexUtil.isIncomingTipVertex(getVertexValue())){
- if(getVertexValue().getLengthOfMergeChain() > length){
+ if(getVertexValue().getLengthOfMergeChain() >= length){
if(getVertexValue().getFFList().getCountOfPosition() > 0)
outgoingMsg.setMessage(AdjMessage.FROMFF);
else if(getVertexValue().getFRList().getCountOfPosition() > 0)
@@ -100,12 +102,44 @@
incomingMsg = msgIterator.next();
if(incomingMsg.getMessage() == AdjMessage.FROMFF){
//remove incomingMsg.getSourceId from RR positionList
+ iterator = getVertexValue().getRRList().iterator();
+ while(iterator.hasNext()){
+ pos = iterator.next();
+ if(pos.equals(incomingMsg.getSourceVertexId())){
+ iterator.remove();
+ break;
+ }
+ }
} else if(incomingMsg.getMessage() == AdjMessage.FROMFR){
- //remove incomingMsg.getSourceId from RF positionList
+ //remove incomingMsg.getSourceId from RF positionList
+ iterator = getVertexValue().getRFList().iterator();
+ while(iterator.hasNext()){
+ pos = iterator.next();
+ if(pos.equals(incomingMsg.getSourceVertexId())){
+ iterator.remove();
+ break;
+ }
+ }
} else if(incomingMsg.getMessage() == AdjMessage.FROMRF){
- //remove incomingMsg.getSourceId from FR positionList
+ //remove incomingMsg.getSourceId from FR positionList
+ iterator = getVertexValue().getFRList().iterator();
+ while(iterator.hasNext()){
+ pos = iterator.next();
+ if(pos.equals(incomingMsg.getSourceVertexId())){
+ iterator.remove();
+ break;
+ }
+ }
} else{ //incomingMsg.getMessage() == AdjMessage.FROMRR
- //remove incomingMsg.getSourceId from FF positionList
+ //remove incomingMsg.getSourceId from FF positionList
+ iterator = getVertexValue().getFFList().iterator();
+ while(iterator.hasNext()){
+ pos = iterator.next();
+ if(pos.equals(incomingMsg.getSourceVertexId())){
+ iterator.remove();
+ break;
+ }
+ }
}
}
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
index 45609c6..1dd8757 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateTextFile.java
@@ -15,6 +15,7 @@
import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
import edu.uci.ics.genomix.pregelix.type.State;
import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
public class GenerateTextFile {
@@ -27,14 +28,17 @@
File srcPath = new File(strSrcDir);
for (File f : srcPath.listFiles((FilenameFilter) (new WildcardFileFilter("part*")))) {
SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, new Path(f.getAbsolutePath()), conf);
- KmerBytesWritable key = new KmerBytesWritable(kmerSize);
+ //NodeWritable key = new NodeWritable(kmerSize);
+ //NullWritable value = NullWritable.get();
+ PositionWritable key = new PositionWritable();
ValueStateWritable value = new ValueStateWritable();
while (reader.next(key, value)) {
- if (key == null || value == null) {
+ if (key == null) {
break;
}
- bw.write(key.toString() + "\t" + value.toString());
+ bw.write(key.toString() + value.toString());
+ System.out.println(key.toString());
bw.newLine();
}
reader.close();
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/util/VertexUtil.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/util/VertexUtil.java
index 772690d..7962c67 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/util/VertexUtil.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/util/VertexUtil.java
@@ -19,7 +19,7 @@
* @param vertexValue
*/
public static boolean isHeadVertex(ValueStateWritable value) {
- return value.outDegree() > 0 && !isPathVertex(value);
+ return value.outDegree() > 0 && !isPathVertex(value) && !isHeadWithoutIndegree(value);
}
/**
@@ -28,10 +28,24 @@
* @param vertexValue
*/
public static boolean isRearVertex(ValueStateWritable value) {
- return value.inDegree() > 0 && !isPathVertex(value);
+ return value.inDegree() > 0 && !isPathVertex(value) && !isRearWithoutOutdegree(value);
}
/**
+ * Head Vertex without indegree: indegree = 0, outdegree = 1
+ */
+ public static boolean isHeadWithoutIndegree(ValueStateWritable value){
+ return value.inDegree() == 0 && value.outDegree() == 1;
+ }
+
+ /**
+ * Rear Vertex without outdegree: indegree = 1, outdegree = 0
+ */
+ public static boolean isRearWithoutOutdegree(ValueStateWritable value){
+ return value.inDegree() == 1 && value.outDegree() == 0;
+ }
+
+ /**
* check if mergeChain is cycle
*/
public static boolean isCycle(KmerBytesWritable kmer, KmerBytesWritable mergeChain, int kmerSize) {
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
index 16b2794..9f59282 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
@@ -4,6 +4,8 @@
import java.io.FileOutputStream;
import java.io.IOException;
+import edu.uci.ics.genomix.pregelix.format.DataCleanInputFormat;
+import edu.uci.ics.genomix.pregelix.format.DataCleanOutputFormat;
import edu.uci.ics.genomix.pregelix.format.LogAlgorithmForPathMergeInputFormat;
import edu.uci.ics.genomix.pregelix.format.LogAlgorithmForPathMergeOutputFormat;
import edu.uci.ics.genomix.pregelix.format.NaiveAlgorithmForPathMergeInputFormat;
@@ -12,6 +14,8 @@
import edu.uci.ics.genomix.pregelix.operator.pathmerge.LogAlgorithmForPathMergeVertex;
import edu.uci.ics.genomix.pregelix.operator.pathmerge.NaiveAlgorithmForPathMergeVertex;
import edu.uci.ics.genomix.pregelix.operator.pathmerge.P3ForPathMergeVertex;
+import edu.uci.ics.genomix.pregelix.operator.tipremove.TipAddVertex;
+import edu.uci.ics.genomix.pregelix.operator.tipremove.TipRemoveVertex;
import edu.uci.ics.genomix.type.PositionWritable;
import edu.uci.ics.pregelix.api.job.PregelixJob;
@@ -22,12 +26,12 @@
private static void generateNaiveAlgorithmForMergeGraphJob(String jobName, String outputPath) throws IOException {
PregelixJob job = new PregelixJob(jobName);
job.setVertexClass(NaiveAlgorithmForPathMergeVertex.class);
- job.setVertexInputFormatClass(NaiveAlgorithmForPathMergeInputFormat.class);
- job.setVertexOutputFormatClass(NaiveAlgorithmForPathMergeOutputFormat.class);
+ job.setVertexInputFormatClass(DataCleanInputFormat.class); //NaiveAlgorithmForPathMergeInputFormat
+ job.setVertexOutputFormatClass(DataCleanOutputFormat.class);
job.setDynamicVertexValueSize(true);
job.setOutputKeyClass(PositionWritable.class);
job.setOutputValueClass(ValueStateWritable.class);
- job.getConfiguration().setInt(NaiveAlgorithmForPathMergeVertex.KMER_SIZE, 5);
+ job.getConfiguration().setInt(NaiveAlgorithmForPathMergeVertex.KMER_SIZE, 3);
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
}
@@ -44,7 +48,7 @@
job.setDynamicVertexValueSize(true);
job.setOutputKeyClass(PositionWritable.class);
job.setOutputValueClass(ValueStateWritable.class);
- job.getConfiguration().setInt(LogAlgorithmForPathMergeVertex.KMER_SIZE, 5);
+ job.getConfiguration().setInt(LogAlgorithmForPathMergeVertex.KMER_SIZE, 3);
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
}
@@ -70,11 +74,47 @@
generateP3ForMergeGraphJob("P3ForMergeGraph", outputBase
+ "P3ForMergeGraph.xml");
}
+
+ private static void generateTipAddGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(TipAddVertex.class);
+ job.setVertexInputFormatClass(NaiveAlgorithmForPathMergeInputFormat.class);
+ job.setVertexOutputFormatClass(NaiveAlgorithmForPathMergeOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(PositionWritable.class);
+ job.setOutputValueClass(ValueStateWritable.class);
+ job.getConfiguration().setInt(TipAddVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genTipAddGraph() throws IOException {
+ generateTipAddGraphJob("TipAddGraph", outputBase
+ + "TipAddGraph.xml");
+ }
+
+ private static void generateTipRemoveGraphJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(TipRemoveVertex.class);
+ job.setVertexInputFormatClass(DataCleanInputFormat.class);
+ job.setVertexOutputFormatClass(DataCleanOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
+ job.setOutputKeyClass(PositionWritable.class);
+ job.setOutputValueClass(ValueStateWritable.class);
+ job.getConfiguration().setInt(TipRemoveVertex.KMER_SIZE, 3);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void genTipRemoveGraph() throws IOException {
+ generateTipRemoveGraphJob("TipRemoveGraph", outputBase
+ + "TipRemoveGraph.xml");
+ }
public static void main(String[] args) throws IOException {
genNaiveAlgorithmForMergeGraph();
//genLogAlgorithmForMergeGraph();
//genP3ForMergeGraph();
+ //genTipAddGraph();
+ //genTipRemoveGraph();
}
}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestCase.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestCase.java
index f25ad57..4405134 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestCase.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestCase.java
@@ -74,7 +74,7 @@
private void compareResults() throws Exception {
dfs.copyToLocalFile(FileOutputFormat.getOutputPath(job), new Path(resultFileDir));
- GenerateTextFile.generateFromPathmergeResult(5, resultFileDir, textFileDir);
+ GenerateTextFile.generateFromPathmergeResult(3, resultFileDir, textFileDir);
}
public String toString() {
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
index a8f5a81..49a5ed0 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
@@ -45,7 +45,7 @@
public static final String PreFix = "data/input"; //"graphbuildresult";
public static final String[] TestDir = { PreFix + File.separator
- + "test"};/*, PreFix + File.separator
+ + "tipremove"};/*, PreFix + File.separator
/*+ "CyclePath"};, PreFix + File.separator
+ "SimplePath", PreFix + File.separator
+ "SinglePath", PreFix + File.separator
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/JobRunStepByStepTest.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/JobRunStepByStepTest.java
index adf1706..c5aa0eb 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/JobRunStepByStepTest.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/JobRunStepByStepTest.java
@@ -45,12 +45,12 @@
@SuppressWarnings("deprecation")
public class JobRunStepByStepTest {
- private static final int KmerSize = 5;
- private static final int ReadLength = 9;
+ private static final int KmerSize = 3;
+ private static final int ReadLength = 8;
private static final String ACTUAL_RESULT_DIR = "actual";
private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
- private static final String DATA_INPUT_PATH = "data/graphbuild.test/text.txt";
+ private static final String DATA_INPUT_PATH = "data/graphbuild.test/tworeads.txt";
private static final String HDFS_INPUT_PATH = "/webmap";
private static final String HDFS_OUTPUT_PATH = "/webmap_result";
@@ -163,10 +163,10 @@
BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
for (int i = 0; i < numPartitionPerMachine * numberOfNC; i++) {
String partname = "/part-" + i;
- // FileUtil.copy(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH
- // + partname), FileSystem.getLocal(new Configuration()),
- // new Path(ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + partname),
- // false, conf);
+ FileUtil.copy(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH
+ + partname), FileSystem.getLocal(new Configuration()),
+ new Path(ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + partname),
+ false, conf);
Path path = new Path(HDFS_OUTPUT_PATH + partname);
FileSystem dfs = FileSystem.get(conf);
diff --git a/genomix/genomix-pregelix/src/test/resources/jobs/NaiveAlgorithmForMergeGraph.xml b/genomix/genomix-pregelix/src/test/resources/jobs/NaiveAlgorithmForMergeGraph.xml
new file mode 100644
index 0000000..39ed64e
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/jobs/NaiveAlgorithmForMergeGraph.xml
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>NaiveAlgorithmForPathMergeVertex.kmerSize</name><value>3</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>fs.checkpoint.dir</name><value>${hadoop.tmp.dir}/dfs/namesecondary</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>mapred.output.value.class</name><value>edu.uci.ics.genomix.pregelix.io.ValueStateWritable</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.job.name</name><value>NaiveAlgorithmForMergeGraph</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>fs.default.name</name><value>file:///</value></property>
+<property><name>mapred.output.key.class</name><value>edu.uci.ics.genomix.type.PositionWritable</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.ScriptBasedMapping</value></property>
+<property><name>pregelix.vertexClass</name><value>edu.uci.ics.genomix.pregelix.operator.pathmerge.NaiveAlgorithmForPathMergeVertex</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>pregelix.vertexOutputFormatClass</name><value>edu.uci.ics.genomix.pregelix.format.DataCleanOutputFormat</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.genomix.pregelix.format.DataCleanInputFormat</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>pregelix.incStateLength</name><value>true</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+</configuration>
\ No newline at end of file