increase default frameSize to 128K
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java
index 0f791a1..eab197e 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java
@@ -18,6 +18,9 @@
import java.io.DataOutput;
import java.io.IOException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import edu.uci.ics.genomix.hyracks.data.primitive.PositionReference;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
@@ -31,11 +34,12 @@
public class MergeKmerAggregateFactory implements IAggregatorDescriptorFactory {
private static final long serialVersionUID = 1L;
-
+ private static final Log LOG = LogFactory.getLog(MergeKmerAggregateFactory.class);
@Override
public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
throws HyracksDataException {
+ final int frameSize = ctx.getFrameSize();
return new IAggregatorDescriptor() {
private PositionReference position = new PositionReference();
@@ -90,6 +94,9 @@
DataOutput fieldOutput = tupleBuilder.getDataOutput();
ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
try {
+ if (inputVal.getLength() > frameSize/2){
+ LOG.warn("MergeKmer: output data size is too big: " + inputVal.getLength());
+ }
fieldOutput.write(inputVal.getByteArray(), inputVal.getStartOffset(), inputVal.getLength());
tupleBuilder.addFieldEndOffset();
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java
index ff9da9c..974dead 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java
@@ -7,7 +7,6 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import edu.uci.ics.genomix.hyracks.driver.Driver;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
@@ -26,7 +25,7 @@
private static final long serialVersionUID = 1L;
private final int ValidPosCount;
- private static final Log LOG = LogFactory.getLog(Driver.class);
+ private static final Log LOG = LogFactory.getLog(MergeReadIDAggregateFactory.class);
public MergeReadIDAggregateFactory(int readLength, int kmerLength) {
ValidPosCount = getPositionCount(readLength, kmerLength);
@@ -196,6 +195,12 @@
LOG.warn("MergeReadID on read:" + readID + " is of size: " + totalSize + ", current frameSize:"
+ frameSize + "\n Recommendate to enlarge the FrameSize");
}
+ if (totalSize > frameSize){
+ for (StackTraceElement ste : Thread.currentThread().getStackTrace()) {
+ System.out.println(ste);
+ }
+ throw new HyracksDataException("Data is too long");
+ }
} catch (IOException e) {
throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
index 2d420c3..1f12bb5 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
@@ -50,7 +50,7 @@
public static final int DEFAULT_KMERLEN = 21;
public static final int DEFAULT_READLEN = 124;
- public static final int DEFAULT_FRAME_SIZE = 32768;
+ public static final int DEFAULT_FRAME_SIZE = 128*1024;
public static final int DEFAULT_FRAME_LIMIT = 4096;
public static final int DEFAULT_TABLE_SIZE = 10485767;
public static final long DEFAULT_GROUPBY_HYBRID_INPUTSIZE = 154000000L;