change to nonsyncwriter
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@3000 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/KMerSequenceWriterFactory.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/KMerSequenceWriterFactory.java
index c8f7e63..88ae57d 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/KMerSequenceWriterFactory.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/KMerSequenceWriterFactory.java
@@ -4,12 +4,9 @@
import java.io.IOException;
import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.CompressionType;
-import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.mapred.JobConf;
+import edu.uci.ics.genomix.dataflow.util.NonSyncWriter;
import edu.uci.ics.genomix.type.KmerCountValue;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
@@ -33,7 +30,8 @@
}
ConfFactory cf;
- Writer writer = null;
+ //Writer writer = null;
+ NonSyncWriter writer = null;
KmerCountValue reEnterCount = new KmerCountValue();
/**
@@ -44,9 +42,10 @@
throws HyracksDataException {
try {
if (writer == null) {
- writer = SequenceFile.createWriter(cf.getConf(),
- (FSDataOutputStream) output, BytesWritable.class,
- BytesWritable.class, CompressionType.NONE, null);
+ writer = new NonSyncWriter((FSDataOutputStream) output);
+// writer = SequenceFile.createWriter(cf.getConf(),
+// (FSDataOutputStream) output, BytesWritable.class,
+// BytesWritable.class, CompressionType.NONE, null);
}
byte[] kmer = tuple.getFieldData(0);
int keyStart = tuple.getFieldStart(0);
diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/util/NonSyncWriter.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/util/NonSyncWriter.java
new file mode 100644
index 0000000..24c4113
--- /dev/null
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/util/NonSyncWriter.java
@@ -0,0 +1,26 @@
+package edu.uci.ics.genomix.dataflow.util;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.io.SequenceFile.ValueBytes;
+
+public class NonSyncWriter {
+ private FSDataOutputStream out;
+
+ public NonSyncWriter(FSDataOutputStream output) {
+ out = output;
+ }
+
+ public void appendRaw(byte[] keyData, int keyOffset, int keyLength,
+ ValueBytes val) throws IOException {
+ out.writeInt(keyLength + val.getSize()); // total record length
+
+ out.writeInt(keyLength); // key portion length
+
+ out.write(keyData, keyOffset, keyLength); // key
+
+ val.writeUncompressedBytes(out); // value
+ }
+
+}
diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/type/Kmer.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/type/Kmer.java
index 7dd310c..8957d39 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/type/Kmer.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/type/Kmer.java
@@ -104,6 +104,14 @@
}
return strKmer.toString();
}
+
+ public static int getByteNumFromK(int k){
+ int x = k/4;
+ if (k%4 !=0){
+ x+=1;
+ }
+ return x;
+ }
/**
* Compress Kmer into bytes array AATAG will compress as [0 0 0 G][A T A A]
@@ -116,7 +124,7 @@
* @return initialed kmer array
*/
public static byte[] CompressKmer(int k, byte[] array, int start) {
- final int byteNum = (byte) Math.ceil((double) k / 4.0);
+ final int byteNum = getByteNumFromK(k);
byte[] bytes = new byte[byteNum];
byte l = 0;