Added test for LSMBTree tuple writers and references. Found and fixed a bug where antimatter tuples used more space than necessary.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_lsm_tree@1085 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/util/TupleUtils.java b/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/util/TupleUtils.java
index df3ee67..8c482cb 100644
--- a/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/util/TupleUtils.java
+++ b/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/util/TupleUtils.java
@@ -84,6 +84,19 @@
return strBuilder.toString();
}
+ public static Object[] deserializeTuple(ITupleReference tuple, ISerializerDeserializer[] fields)
+ throws HyracksDataException {
+ int numFields = Math.min(tuple.getFieldCount(), fields.length);
+ Object[] objs = new Object[numFields];
+ for (int i = 0; i < numFields; i++) {
+ ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i),
+ tuple.getFieldLength(i));
+ DataInput dataIn = new DataInputStream(inStream);
+ objs[i] = fields[i].deserialize(dataIn);
+ }
+ return objs;
+ }
+
public static ITupleReference copyTuple(ITupleReference tuple) throws HyracksDataException {
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(tuple.getFieldCount());
for (int i = 0; i < tuple.getFieldCount(); i++) {
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DataGenThread.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DataGenThread.java
index b06b7e7..150ca2e 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DataGenThread.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DataGenThread.java
@@ -6,9 +6,6 @@
import java.util.concurrent.LinkedBlockingQueue;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.dataflow.common.data.marshalling.DoubleSerializerDeserializer;
-import edu.uci.ics.hyracks.dataflow.common.data.marshalling.FloatSerializerDeserializer;
-import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
/**
* Quick & dirty data generator for performance testing.
@@ -19,7 +16,6 @@
private final int maxNumBatches;
private final int maxOutstandingBatches;
private int numBatches;
- private final boolean sorted;
private final Random rnd;
// maxOutstandingBatches pre-created tuple-batches for populating the queue.
@@ -29,13 +25,9 @@
public DataGenThread(int maxNumBatches, int batchSize, int maxOutstandingBatches, int numConsumers, ISerializerDeserializer[] fieldSerdes, int payloadSize, int rndSeed, boolean sorted) {
this.maxNumBatches = maxNumBatches;
this.maxOutstandingBatches = maxOutstandingBatches;
- this.sorted = sorted;
rnd = new Random(rndSeed);
tupleBatches = new TupleBatch[maxOutstandingBatches];
- IFieldValueGenerator[] fieldGens = new IFieldValueGenerator[fieldSerdes.length];
- for (int i = 0; i < fieldSerdes.length; i++) {
- fieldGens[i] = getFieldGenFromSerde(fieldSerdes[i]);
- }
+ IFieldValueGenerator[] fieldGens = DataGenUtils.getFieldGensFromSerdes(fieldSerdes, rnd, sorted);
for (int i = 0; i < maxOutstandingBatches; i++) {
tupleBatches[i] = new TupleBatch(batchSize, fieldGens, fieldSerdes, payloadSize);
}
@@ -64,37 +56,4 @@
}
}
}
-
- public IFieldValueGenerator getFieldGenFromSerde(ISerializerDeserializer serde) {
- if (serde instanceof IntegerSerializerDeserializer) {
- if (sorted) {
- return new SortedIntegerFieldValueGenerator();
- } else {
- return new IntegerFieldValueGenerator(rnd);
- }
- } else if (serde instanceof FloatSerializerDeserializer) {
- if (sorted) {
- return new SortedFloatFieldValueGenerator();
- } else {
- return new FloatFieldValueGenerator(rnd);
- }
- } else if (serde instanceof DoubleSerializerDeserializer) {
- if (sorted) {
- return new SortedDoubleFieldValueGenerator();
- } else {
- return new DoubleFieldValueGenerator(rnd);
- }
- }
- System.out.println("NULL");
- //if (serde instanceof Integer64SerializerDeserializer) {
- // throw new UnsupportedOperationException("Binary comparator factory for Integer64 not implemented.");
- //}
- //if (serde instanceof FloatSerializerDeserializer) {
- // return FloatBinaryComparatorFactory.INSTANCE;
- //}
- //if (serde instanceof DoubleSerializerDeserializer) {
- // return DoubleBinaryComparatorFactory.INSTANCE;
- //}
- return null;
- }
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DataGenUtils.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DataGenUtils.java
new file mode 100644
index 0000000..fdbaa3e
--- /dev/null
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DataGenUtils.java
@@ -0,0 +1,46 @@
+package edu.uci.ics.hyracks.storage.am.common.datagen;
+
+import java.util.Random;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.DoubleSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.FloatSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+
+@SuppressWarnings("rawtypes")
+public class DataGenUtils {
+ public static IFieldValueGenerator getFieldGenFromSerde(ISerializerDeserializer serde, Random rnd, boolean sorted) {
+ if (serde instanceof IntegerSerializerDeserializer) {
+ if (sorted) {
+ return new SortedIntegerFieldValueGenerator();
+ } else {
+ return new IntegerFieldValueGenerator(rnd);
+ }
+ } else if (serde instanceof FloatSerializerDeserializer) {
+ if (sorted) {
+ return new SortedFloatFieldValueGenerator();
+ } else {
+ return new FloatFieldValueGenerator(rnd);
+ }
+ } else if (serde instanceof DoubleSerializerDeserializer) {
+ if (sorted) {
+ return new SortedDoubleFieldValueGenerator();
+ } else {
+ return new DoubleFieldValueGenerator(rnd);
+ }
+ } else if (serde instanceof UTF8StringSerializerDeserializer) {
+ return new StringFieldValueGenerator(20, rnd);
+ }
+ System.out.println("NULL");
+ return null;
+ }
+
+ public static IFieldValueGenerator[] getFieldGensFromSerdes(ISerializerDeserializer[] serdes, Random rnd, boolean sorted) {
+ IFieldValueGenerator[] fieldValueGens = new IFieldValueGenerator[serdes.length];
+ for (int i = 0; i < serdes.length; i++) {
+ fieldValueGens[i] = getFieldGenFromSerde(serdes[i], rnd, sorted);
+ }
+ return fieldValueGens;
+ }
+}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
new file mode 100644
index 0000000..0218542
--- /dev/null
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
@@ -0,0 +1,27 @@
+package edu.uci.ics.hyracks.storage.am.common.datagen;
+
+import java.util.Random;
+
+public class StringFieldValueGenerator implements IFieldValueGenerator<String> {
+ private int maxLen;
+ private final Random rnd;
+
+ public StringFieldValueGenerator(int maxLen, Random rnd) {
+ this.maxLen = maxLen;
+ this.rnd = rnd;
+ }
+
+ public void setMaxLength(int maxLen) {
+ this.maxLen = maxLen;
+ }
+
+ @Override
+ public String next() {
+ String s = Long.toHexString(Double.doubleToLongBits(rnd.nextDouble()));
+ StringBuilder strBuilder = new StringBuilder();
+ for (int i = 0; i < s.length() && i < maxLen; i++) {
+ strBuilder.append(s.charAt(Math.abs(rnd.nextInt()) % s.length()));
+ }
+ return strBuilder.toString();
+ }
+}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
index 3f88dce..8266387 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
@@ -319,7 +319,9 @@
}
diskBTree.endBulkLoad(bulkLoadCtx);
resetMemBTree();
- diskBTrees.addFirst(diskBTree);
+ synchronized (diskBTrees) {
+ diskBTrees.addFirst(diskBTree);
+ }
}
private void resetMemBTree() throws HyracksDataException {
@@ -385,7 +387,7 @@
// If includeMemBTree is false, then it is possible that a concurrent
// flush adds another on-disk BTree.
// Since this mode is only used for merging trees, it doesn't really
- // matter if the merge excludes the new on-disk BTree,
+ // matter if the merge excludes the new on-disk BTree.
List<BTree> diskBTreesSnapshot = new ArrayList<BTree>();
AtomicInteger localSearcherRefCount = null;
synchronized (diskBTrees) {
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTupleWriter.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTupleWriter.java
index 4dc478f..dd8d2b9 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTupleWriter.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTupleWriter.java
@@ -21,16 +21,26 @@
import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriter;
public class LSMBTreeTupleWriter extends TypeAwareTupleWriter {
- private final boolean isDelete;
+ private final boolean isAntimatter;
private final int numKeyFields;
- public LSMBTreeTupleWriter(ITypeTraits[] typeTraits, int numKeyFields, boolean isDelete) {
+ public LSMBTreeTupleWriter(ITypeTraits[] typeTraits, int numKeyFields, boolean isAntimatter) {
super(typeTraits);
this.numKeyFields = numKeyFields;
- this.isDelete = isDelete;
+ this.isAntimatter = isAntimatter;
}
@Override
+ public int bytesRequired(ITupleReference tuple) {
+ if (isAntimatter) {
+ // Only requires space for the key fields.
+ return super.bytesRequired(tuple, 0, numKeyFields);
+ } else {
+ return super.bytesRequired(tuple);
+ }
+ }
+
+ @Override
public ITreeIndexTupleReference createTupleReference() {
return new LSMBTreeTupleReference(typeTraits, numKeyFields);
}
@@ -50,7 +60,7 @@
@Override
public int writeTuple(ITupleReference tuple, byte[] targetBuf, int targetOff) {
int bytesWritten = -1;
- if (isDelete) {
+ if (isAntimatter) {
bytesWritten = super.writeTupleFields(tuple, 0, numKeyFields, targetBuf, targetOff);
setAntimatterBit(targetBuf, targetOff);
} else {
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
new file mode 100644
index 0000000..47f4c03
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.btree.tuples;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.util.SerdeUtils;
+import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
+import edu.uci.ics.hyracks.storage.am.common.datagen.DataGenUtils;
+import edu.uci.ics.hyracks.storage.am.common.datagen.IFieldValueGenerator;
+
+@SuppressWarnings("rawtypes")
+public class LSMBTreeTuplesTest {
+
+ private final Random rnd = new Random(50);
+
+ private ByteBuffer writeTuple(ITupleReference tuple, LSMBTreeTupleWriter tupleWriter) {
+ // Write tuple into a buffer, then later try to read it.
+ int bytesRequired = tupleWriter.bytesRequired(tuple);
+ byte[] bytes = new byte[bytesRequired];
+ ByteBuffer targetBuf = ByteBuffer.wrap(bytes);
+ tupleWriter.writeTuple(tuple, bytes, 0);
+ return targetBuf;
+ }
+
+ private void testLSMBTreeTuple(ISerializerDeserializer[] maxFieldSerdes) throws HyracksDataException {
+ // Create a tuple with the max-1 fields for checking setFieldCount() of tuple references later.
+ ITypeTraits[] maxTypeTraits = SerdeUtils.serdesToTypeTraits(maxFieldSerdes);
+ IFieldValueGenerator[] maxFieldGens = DataGenUtils.getFieldGensFromSerdes(maxFieldSerdes, rnd, false);
+ // Generate a tuple with random field values.
+ Object[] maxFields = new Object[maxFieldSerdes.length];
+ for (int j = 0; j < maxFieldSerdes.length; j++) {
+ maxFields[j] = maxFieldGens[j].next();
+ }
+
+ // Run test for varying number of fields and keys.
+ for (int numKeyFields = 1; numKeyFields < maxFieldSerdes.length; numKeyFields++) {
+ // Create tuples with varying number of fields, and try to interpret their bytes with the lsmBTreeTuple.
+ for (int numFields = numKeyFields; numFields <= maxFieldSerdes.length; numFields++) {
+ // Create and write tuple to bytes using an LSMBTreeTupleWriter.
+ LSMBTreeTupleWriter maxMatterTupleWriter = new LSMBTreeTupleWriter(maxTypeTraits, numKeyFields, false);
+ ITupleReference maxTuple = TupleUtils.createTuple(maxFieldSerdes, (Object[])maxFields);
+ ByteBuffer maxMatterBuf = writeTuple(maxTuple, maxMatterTupleWriter);
+ // Tuple reference should work for both matter and antimatter tuples (doesn't matter which factory creates it).
+ LSMBTreeTupleReference maxLsmBTreeTuple = (LSMBTreeTupleReference) maxMatterTupleWriter.createTupleReference();
+
+ ISerializerDeserializer[] fieldSerdes = Arrays.copyOfRange(maxFieldSerdes, 0, numFields);
+ ITypeTraits[] typeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
+ IFieldValueGenerator[] fieldGens = DataGenUtils.getFieldGensFromSerdes(fieldSerdes, rnd, false);
+ // Generate a tuple with random field values.
+ Object[] fields = new Object[numFields];
+ for (int j = 0; j < numFields; j++) {
+ fields[j] = fieldGens[j].next();
+ }
+ // Create and write tuple to bytes using an LSMBTreeTupleWriter.
+ ITupleReference tuple = TupleUtils.createTuple(fieldSerdes, (Object[])fields);
+ LSMBTreeTupleWriter matterTupleWriter = new LSMBTreeTupleWriter(typeTraits, numKeyFields, false);
+ LSMBTreeTupleWriter antimatterTupleWriter = new LSMBTreeTupleWriter(typeTraits, numKeyFields, true);
+ ByteBuffer matterBuf = writeTuple(tuple, matterTupleWriter);
+ ByteBuffer antimatterBuf = writeTuple(tuple, antimatterTupleWriter);
+
+ // The antimatter buf should only contain keys, sanity check the size.
+ if (numFields != numKeyFields) {
+ assertTrue(antimatterBuf.array().length < matterBuf.array().length);
+ }
+
+ // Tuple reference should work for both matter and antimatter tuples (doesn't matter which factory creates it).
+ LSMBTreeTupleReference lsmBTreeTuple = (LSMBTreeTupleReference) matterTupleWriter.createTupleReference();
+
+ // Use LSMBTree tuple reference to interpret the written tuples.
+ // Repeat the block inside to test that repeated resetting to matter/antimatter tuples works.
+ for (int r = 0; r < 4; r++) {
+
+ // Check matter tuple with lsmBTreeTuple.
+ lsmBTreeTuple.resetByTupleOffset(matterBuf, 0);
+ assertEquals(numFields, lsmBTreeTuple.getFieldCount());
+ assertFalse(lsmBTreeTuple.isAntimatter());
+ Object[] deserMatterTuple = TupleUtils.deserializeTuple(lsmBTreeTuple, fieldSerdes);
+ for (int j = 0; j < numFields; j++) {
+ assertEquals(fields[j], deserMatterTuple[j]);
+ }
+
+ // Check antimatter tuple with lsmBTreeTuple.
+ lsmBTreeTuple.resetByTupleOffset(antimatterBuf, 0);
+ // Should only contain keys.
+ assertEquals(numKeyFields, lsmBTreeTuple.getFieldCount());
+ assertTrue(lsmBTreeTuple.isAntimatter());
+ Object[] deserAntimatterTuple = TupleUtils.deserializeTuple(lsmBTreeTuple, fieldSerdes);
+ for (int j = 0; j < numKeyFields; j++) {
+ assertEquals(fields[j], deserAntimatterTuple[j]);
+ }
+
+ // Check matter tuple with maxLsmBTreeTuple.
+ // We should be able to manually set a prefix of the fields
+ // (the passed type traits in the tuple factory's constructor).
+ maxLsmBTreeTuple.setFieldCount(numFields);
+ maxLsmBTreeTuple.resetByTupleOffset(matterBuf, 0);
+ assertEquals(numFields, maxLsmBTreeTuple.getFieldCount());
+ assertFalse(maxLsmBTreeTuple.isAntimatter());
+ Object[] maxDeserMatterTuple = TupleUtils.deserializeTuple(maxLsmBTreeTuple, fieldSerdes);
+ for (int j = 0; j < numFields; j++) {
+ assertEquals(fields[j], maxDeserMatterTuple[j]);
+ }
+
+ // Check antimatter tuple with maxLsmBTreeTuple.
+ maxLsmBTreeTuple.resetByTupleOffset(antimatterBuf, 0);
+ // Should only contain keys (hardcoded as 1 in the factory at beginning of this method).
+ assertEquals(numKeyFields, maxLsmBTreeTuple.getFieldCount());
+ assertTrue(maxLsmBTreeTuple.isAntimatter());
+ Object[] maxDeserAntimatterTuple = TupleUtils.deserializeTuple(maxLsmBTreeTuple, fieldSerdes);
+ for (int j = 0; j < numKeyFields; j++) {
+ assertEquals(fields[j], maxDeserAntimatterTuple[j]);
+ }
+
+ // Resetting maxLsmBTreeTuple should set its field count to
+ // maxFieldSerdes.length, based on the its type traits.
+ maxLsmBTreeTuple.resetByTupleOffset(maxMatterBuf, 0);
+ assertEquals(maxFieldSerdes.length, maxLsmBTreeTuple.getFieldCount());
+ assertFalse(maxLsmBTreeTuple.isAntimatter());
+ Object[] maxMaxMatterTuple = TupleUtils.deserializeTuple(maxLsmBTreeTuple, maxFieldSerdes);
+ for (int j = 0; j < maxFieldSerdes.length; j++) {
+ assertEquals(maxFields[j], maxMaxMatterTuple[j]);
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testLSMBTreeTuple() throws HyracksDataException {
+ ISerializerDeserializer[] intFields = new IntegerSerializerDeserializer[] {
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE };
+ testLSMBTreeTuple(intFields);
+
+ ISerializerDeserializer[] stringFields = new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE };
+ testLSMBTreeTuple(stringFields);
+
+ ISerializerDeserializer[] mixedFields = new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE };
+ testLSMBTreeTuple(mixedFields);
+ }
+}