Merge branch 'jianfeng/genomix' into jianfeng/genomix-reverse
Conflicts:
genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java
genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
genomix/genomix-hyracks/src/test/resources/expected/result_after_readIDAggreage
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeSequenceWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeSequenceWriterFactory.java
index 9fb2d04..00409ef 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeSequenceWriterFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeSequenceWriterFactory.java
@@ -4,6 +4,7 @@
import java.io.IOException;
import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.SequenceFile.Writer;
@@ -59,7 +60,7 @@
@Override
public void open(DataOutput output) throws HyracksDataException {
try {
- writer = SequenceFile.createWriter(cf.getConf(), (FSDataOutputStream) output, NodeWritable.class, null,
+ writer = SequenceFile.createWriter(cf.getConf(), (FSDataOutputStream) output, NodeWritable.class, NullWritable.class,
CompressionType.NONE, null);
} catch (IOException e) {
throw new HyracksDataException(e);
@@ -85,7 +86,7 @@
tuple.getFieldData(InputKmerBytesField), tuple.getFieldStart(InputKmerBytesField));
try {
- writer.append(node, null);
+ writer.append(node, NullWritable.get());
} catch (IOException e) {
throw new HyracksDataException(e);
}
@@ -93,8 +94,6 @@
@Override
public void close(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
-
}
}
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
index 68258d7..07cfab4 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
@@ -15,6 +15,7 @@
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
@@ -26,7 +27,7 @@
import edu.uci.ics.genomix.hyracks.driver.Driver;
import edu.uci.ics.genomix.hyracks.driver.Driver.Plan;
import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
@SuppressWarnings("deprecation")
public class JobRunStepByStepTest {
@@ -59,10 +60,10 @@
@Test
public void TestAll() throws Exception {
-// TestReader();
-// TestGroupbyKmer();
-// TestMapKmerToRead();
-// TestGroupByReadID();
+ TestReader();
+ TestGroupbyKmer();
+ TestMapKmerToRead();
+ TestGroupByReadID();
TestEndToEnd();
}
@@ -97,7 +98,8 @@
}
public void TestEndToEnd() throws Exception {
- conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
+ //conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
+ conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
cleanUpReEntry();
conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
driver.runJob(new GenomixJobConf(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
@@ -187,18 +189,14 @@
}
SequenceFile.Reader reader = new SequenceFile.Reader(dfs, path, conf);
- // KmerBytesWritable key = (KmerBytesWritable)
- // ReflectionUtils.newInstance(reader.getKeyClass(), conf);
- KmerBytesWritable key = new KmerBytesWritable(conf.getInt(GenomixJobConf.KMER_LENGTH, KmerSize));
- // KmerCountValue value = (KmerCountValue)
- // ReflectionUtils.newInstance(reader.getValueClass(), conf);
- KmerBytesWritable value = null;
- while (reader.next(key, value)) {
- if (key == null || value == null) {
+ NodeWritable node = new NodeWritable(conf.getInt(GenomixJobConf.KMER_LENGTH, KmerSize));
+ NullWritable value = NullWritable.get();
+ while (reader.next(node, value)) {
+ if (node == null) {
break;
}
- bw.write(key.toString() + "\t" + value.toString());
- System.out.println(key.toString() + "\t" + value.toString());
+ bw.write(node.toString() );
+ System.out.println(node.toString());
bw.newLine();
}
reader.close();
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java
index 6e6a504..d22bd0c 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java
@@ -164,6 +164,9 @@
}
private static boolean containStrings(String lineExpected, String actualLine, int[] poslistField) {
+// if (lineExpected.equals(actualLine)){
+// return true;
+// }
String[] fieldsExp = lineExpected.split("\\\t");
String[] fieldsAct = actualLine.split("\\\t");
if (fieldsAct.length != fieldsExp.length) {