add ReadsKeyValueParserFactory for new graph using for read HDFS
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
index be65d55..6a5ad7b 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
@@ -15,7 +15,6 @@
package edu.uci.ics.genomix.type;
-import javax.management.RuntimeErrorException;
public class GeneCode {
public final static byte[] GENE_SYMBOL = { 'A', 'C', 'G', 'T' };
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/IntermediateNodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/IntermediateNodeWritable.java
index 4684a06..3f59e32 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/IntermediateNodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/IntermediateNodeWritable.java
@@ -16,34 +16,34 @@
private KmerListWritable forwardReverseList;
private KmerListWritable reverseForwardList;
private KmerListWritable reverseReverseList;
- private long uuid;
+ private PositionWritable uniqueKey;
public IntermediateNodeWritable(){
forwardForwardList = new KmerListWritable();
forwardReverseList = new KmerListWritable();
reverseForwardList = new KmerListWritable();
reverseReverseList = new KmerListWritable();
- uuid = 0L;
+ uniqueKey = new PositionWritable();
}
public IntermediateNodeWritable(KmerListWritable FFList, KmerListWritable FRList,
- KmerListWritable RFList, KmerListWritable RRList, long uuid) {
+ KmerListWritable RFList, KmerListWritable RRList, PositionWritable uniqueKey) {
this();
- set(FFList, FRList, RFList, RRList, uuid);
+ set(FFList, FRList, RFList, RRList, uniqueKey);
}
public void set(IntermediateNodeWritable node){
set(node.forwardForwardList, node.forwardReverseList, node.reverseForwardList,
- node.reverseReverseList, node.uuid);
+ node.reverseReverseList, node.uniqueKey);
}
public void set(KmerListWritable FFList, KmerListWritable FRList,
- KmerListWritable RFList, KmerListWritable RRList, long uuid) {
+ KmerListWritable RFList, KmerListWritable RRList, PositionWritable uniqueKey) {
this.forwardForwardList.set(FFList);
this.forwardReverseList.set(FRList);
this.reverseForwardList.set(RFList);
this.reverseReverseList.set(RRList);
- this.uuid = uuid;
+ this.uniqueKey.set(uniqueKey);
}
public void reset(int kmerSize) {
@@ -51,7 +51,7 @@
forwardReverseList.reset();
reverseForwardList.reset();
reverseReverseList.reset();
- uuid = 0;
+ uniqueKey.reset();
}
public KmerListWritable getFFList() {
@@ -59,15 +59,15 @@
}
public void setFFList(KmerListWritable forwardForwardList) {
- this.forwardForwardList = forwardForwardList;
+ this.forwardForwardList.set(forwardForwardList);
}
- public KmerListWritable getFReList() {
+ public KmerListWritable getFRList() {
return forwardReverseList;
}
public void setFRList(KmerListWritable forwardReverseList) {
- this.forwardReverseList = forwardReverseList;
+ this.forwardReverseList.set(forwardReverseList);
}
public KmerListWritable getRFList() {
@@ -75,7 +75,7 @@
}
public void setRFList(KmerListWritable reverseForwardList) {
- this.reverseForwardList = reverseForwardList;
+ this.reverseForwardList.set(reverseForwardList);
}
public KmerListWritable getRRList() {
@@ -83,16 +83,24 @@
}
public void setRRList(KmerListWritable reverseReverseList) {
- this.reverseReverseList = reverseReverseList;
+ this.reverseReverseList.set(reverseReverseList);
}
- @Override
+ public PositionWritable getUniqueKey() {
+ return uniqueKey;
+ }
+
+ public void setUniqueKey(PositionWritable uniqueKey) {
+ this.uniqueKey.set(uniqueKey);
+ }
+
+ @Override
public void readFields(DataInput in) throws IOException {
this.forwardForwardList.readFields(in);
this.forwardReverseList.readFields(in);
this.reverseForwardList.readFields(in);
this.reverseReverseList.readFields(in);
- this.uuid = in.readLong();
+ this.uniqueKey.readFields(in);
}
@Override
@@ -101,18 +109,18 @@
this.forwardReverseList.write(out);
this.reverseForwardList.write(out);
this.reverseReverseList.write(out);
- out.writeLong(this.uuid);
+ this.uniqueKey.write(out);
}
@Override
public int compareTo(IntermediateNodeWritable other) {
// TODO Auto-generated method stub
- return this.uuid > other.uuid ? 1 : ((this.uuid == other.uuid) ? 0 : -1);
+ return this.uniqueKey.compareTo(other.uniqueKey);
}
@Override
public int hashCode() {
- return Long.valueOf(this.uuid).hashCode();
+ return this.uniqueKey.hashCode();
}
@Override
@@ -122,7 +130,7 @@
return (this.forwardForwardList.equals(nw.forwardForwardList)
&& this.forwardReverseList.equals(nw.forwardReverseList)
&& this.reverseForwardList.equals(nw.reverseForwardList)
- && this.reverseReverseList.equals(nw.reverseReverseList) && (this.uuid == nw.uuid));
+ && this.reverseReverseList.equals(nw.reverseReverseList) && (this.uniqueKey.equals(nw.uniqueKey)));
}
return false;
}
@@ -135,7 +143,7 @@
sbuilder.append(forwardReverseList.toString()).append('\t');
sbuilder.append(reverseForwardList.toString()).append('\t');
sbuilder.append(reverseReverseList.toString()).append('\t');
- sbuilder.append(uuid).append(')');
+ sbuilder.append(uniqueKey.toString()).append(')');
return sbuilder.toString();
}
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
index 4240b1d..6c9dfe4 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
@@ -131,14 +131,14 @@
@Override
public void readFields(DataInput in) throws IOException {
this.valueCount = in.readInt();
- setSize(valueCount * PositionWritable.LENGTH);
- in.readFully(storage, offset, valueCount * PositionWritable.LENGTH);
+ setSize(valueCount * KMER_LENGTH);
+ in.readFully(storage, offset, valueCount * KMER_LENGTH);
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(valueCount);
- out.write(storage, offset, valueCount * PositionWritable.LENGTH);
+ out.write(storage, offset, valueCount * KMER_LENGTH);
}
public int getCountOfPosition() {
@@ -152,4 +152,8 @@
public int getStartOffset() {
return offset;
}
+
+ public int getLength() {
+ return valueCount * KMER_LENGTH;
+ }
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
index 06d7d9c..1079677 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
@@ -57,6 +57,11 @@
this.offset = offset;
}
+ public void reset(){
+ storage = new byte[LENGTH];
+ offset = 0;
+ }
+
public long getUUID(){
return Marshal.getLong(storage, offset);
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java
index 2b7dd81..97bb54a 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java
@@ -24,11 +24,10 @@
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import edu.uci.ics.genomix.hyracks.data.primitive.PositionReference;
-import edu.uci.ics.genomix.type.GeneCode;
import edu.uci.ics.genomix.type.IntermediateNodeWritable;
import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.KmerListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
import edu.uci.ics.hyracks.api.comm.IFrameWriter;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
@@ -48,7 +47,6 @@
public static final int OutputPosition = 1;
public static final int OutputKmerListField = 2;
- private final boolean bReversed;
private final int readLength;
private final int kmerSize;
@@ -56,7 +54,6 @@
null });
public ReadsKeyValueParserFactory(int readlength, int k, boolean bGenerateReversed) {
- bReversed = bGenerateReversed;
this.readLength = readlength;
this.kmerSize = k;
}
@@ -72,9 +69,10 @@
private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
private KmerBytesWritable nextKmer = new KmerBytesWritable(kmerSize);
- private PositionReference pos = new PositionReference();
+ private PositionWritable uniqueKey = new PositionWritable();
private KmerListWritable kmerList = new KmerListWritable();
private IntermediateNodeWritable interMediateNode = new IntermediateNodeWritable();
+ private byte mateId = 0;
@Override
public void parse(LongWritable key, Text value, IFrameWriter writer) throws HyracksDataException {
@@ -111,37 +109,37 @@
nextKmer.set(kmer);
nextKmer.shiftKmerWithNextChar(array[kmerSize]);
kmerList.append(nextKmer);
+ uniqueKey.set(mateId, readID, 1);
+ interMediateNode.setUniqueKey(uniqueKey);
interMediateNode.setFFList(kmerList);
- InsertToFrame(kmer, readID, 1, writer);
+ InsertToFrame(kmer, interMediateNode, writer);
/** middle kmer */
for (int i = kmerSize; i < array.length; i++) {
kmer.shiftKmerWithNextChar(array[i]);
- InsertToFrame(kmer, readID, i - kmerSize + 2, writer);
- }
-
- if (bReversed) {
- /** first kmer */
- kmer.setByReadReverse(array, 0);
- InsertToFrame(kmer, readID, -1, writer);
- /** middle kmer */
- for (int i = kmerSize; i < array.length; i++) {
- kmer.shiftKmerWithPreCode(GeneCode.getPairedCodeFromSymbol(array[i]));
- InsertToFrame(kmer, readID, -(i - kmerSize + 2), writer);
- }
+ nextKmer.set(kmer);
+ nextKmer.shiftKmerWithNextChar(array[i+1]);
+ kmerList.append(nextKmer);
+ uniqueKey.set(mateId, readID, i - kmerSize + 2);
+ interMediateNode.setUniqueKey(uniqueKey);
+ interMediateNode.setFFList(kmerList);
+ InsertToFrame(kmer, interMediateNode, writer);
}
}
- private void InsertToFrame(KmerBytesWritable kmer, int readID, int posInRead, IFrameWriter writer) {
+ private void InsertToFrame(KmerBytesWritable kmer, IntermediateNodeWritable node, IFrameWriter writer) {
try {
- if (Math.abs(posInRead) > 127) {
- throw new IllegalArgumentException("Position id is beyond 127 at " + readID);
+ if (Math.abs(node.getUniqueKey().getPosId()) > 32768) {
+ throw new IllegalArgumentException("Position id is beyond 32768 at " + node.getUniqueKey().getReadId());
}
tupleBuilder.reset();
tupleBuilder.addField(kmer.getBytes(), kmer.getOffset(), kmer.getLength());
- pos.set(readID, (byte) posInRead);
- tupleBuilder.addField(pos.getByteArray(), pos.getStartOffset(), pos.getLength());
-
+ tupleBuilder.addField(node.getFFList().getByteArray(), node.getFFList().getStartOffset(), node.getFFList().getLength());
+ tupleBuilder.addField(node.getFRList().getByteArray(), node.getFRList().getStartOffset(), node.getFRList().getLength());
+ tupleBuilder.addField(node.getRFList().getByteArray(), node.getRFList().getStartOffset(), node.getRFList().getLength());
+ tupleBuilder.addField(node.getRRList().getByteArray(), node.getRRList().getStartOffset(), node.getRRList().getLength());
+ tupleBuilder.addField(node.getUniqueKey().getByteArray(), node.getUniqueKey().getStartOffset(), node.getUniqueKey().getLength());
+
if (!outputAppender.append(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
tupleBuilder.getSize())) {
FrameUtils.flushFrame(outputBuffer, writer);