add 3 tips test cases for hyracks and add sequence file
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeSequenceWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeSequenceWriterFactory.java
new file mode 100644
index 0000000..b77ee2c
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeSequenceWriterFactory.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hyracks.newgraph.io;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.SequenceFile.Writer;
+import org.apache.hadoop.mapred.JobConf;
+import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
+import edu.uci.ics.genomix.hyracks.newgraph.dataflow.AssembleKeyIntoNodeOperator;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
+import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
+
+@SuppressWarnings("deprecation")
+public class NodeSequenceWriterFactory implements ITupleWriterFactory {
+
+ /**
+ * Write the node to Text
+ */
+ private static final long serialVersionUID = 1L;
+ private final int kmerSize;
+ private ConfFactory confFactory;
+
+ public static final int OutputNodeField = AssembleKeyIntoNodeOperator.OutputNodeField;
+
+ public NodeSequenceWriterFactory(JobConf conf) throws HyracksDataException {
+ this.confFactory = new ConfFactory(conf);
+ this.kmerSize = conf.getInt(GenomixJobConf.KMER_LENGTH, GenomixJobConf.DEFAULT_KMERLEN);
+ }
+
+ public class TupleWriter implements ITupleWriter {
+
+ public TupleWriter(ConfFactory confFactory) {
+ this.cf = confFactory;
+ }
+
+ ConfFactory cf;
+ Writer writer = null;
+ NodeWritable node = new NodeWritable();
+
+ @Override
+ public void open(DataOutput output) throws HyracksDataException {
+ try {
+ writer = SequenceFile.createWriter(cf.getConf(), (FSDataOutputStream) output, NodeWritable.class,
+ NullWritable.class, CompressionType.NONE, null);
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
+ node.setAsReference(tuple.getFieldData(OutputNodeField), tuple.getFieldStart(OutputNodeField));
+ try {
+ writer.append(node, NullWritable.get());
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close(DataOutput output) throws HyracksDataException {
+ }
+
+ }
+
+ @Override
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
+ KmerBytesWritable.setGlobalKmerLength(kmerSize);
+ return new TupleWriter(confFactory);
+ }
+
+}
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
index 3861ac9..5b5ef25 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
@@ -32,11 +32,11 @@
@SuppressWarnings("deprecation")
public class JobRun {
private static final int KmerSize = 3;
- private static final int ReadLength = 5;
+ private static final int ReadLength = 6;
private static final String ACTUAL_RESULT_DIR = "actual";
private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
- private static final String DATA_INPUT_PATH = "src/test/resources/data/lastesttest/LowSplitRepeat.txt";
+ private static final String DATA_INPUT_PATH = "src/test/resources/data/lastesttest/Tips4.txt";
private static final String HDFS_INPUT_PATH = "/webmap";
private static final String HDFS_OUTPUT_PATH = "/webmap_result";
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips1.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips1.txt
new file mode 100644
index 0000000..1e16d68
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips1.txt
@@ -0,0 +1,2 @@
+1 CAGCCA
+2 GCCGTA
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips2.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips2.txt
new file mode 100644
index 0000000..8109730
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips2.txt
@@ -0,0 +1,2 @@
+1 ACAGCG
+2 GGCGAA
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips3.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips3.txt
new file mode 100644
index 0000000..a672034
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips3.txt
@@ -0,0 +1,2 @@
+1 CAGCCT
+2 CAGCCA
diff --git a/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips4.txt b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips4.txt
new file mode 100644
index 0000000..499e8e6
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/data/lastesttest/Tips4.txt
@@ -0,0 +1,2 @@
+1 CAGGCA
+2 CAGGCC