Merged hyracks_lsm_tree r2782:r2822.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_lsm_experiments@2824 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/InsertPipelineExample.java b/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/InsertPipelineExample.java
index d3bb3f4..86c11d3 100644
--- a/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/InsertPipelineExample.java
+++ b/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/InsertPipelineExample.java
@@ -151,8 +151,8 @@
// create operator descriptor
TreeIndexInsertUpdateDeleteOperatorDescriptor primaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, recDesc, storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits,
- primaryComparatorFactories, primaryFieldPermutation, IndexOperation.INSERT, dataflowHelperFactory, null,
- NoOpOperationCallbackFactory.INSTANCE);
+ primaryComparatorFactories, null, primaryFieldPermutation, IndexOperation.INSERT,
+ dataflowHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, primaryInsert, splitNCs);
// prepare insertion into secondary index
@@ -175,8 +175,8 @@
// create operator descriptor
TreeIndexInsertUpdateDeleteOperatorDescriptor secondaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, recDesc, storageManager, lcManagerProvider, secondarySplitProvider, secondaryTypeTraits,
- secondaryComparatorFactories, secondaryFieldPermutation, IndexOperation.INSERT, dataflowHelperFactory, null,
- NoOpOperationCallbackFactory.INSTANCE);
+ secondaryComparatorFactories, null, secondaryFieldPermutation, IndexOperation.INSERT,
+ dataflowHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, secondaryInsert, splitNCs);
// end the insert pipeline at this sink operator
diff --git a/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java b/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java
index e55882a..105fe9b 100644
--- a/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java
+++ b/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java
@@ -151,8 +151,8 @@
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.btreeName);
IIndexDataflowHelperFactory dataflowHelperFactory = new BTreeDataflowHelperFactory();
TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
- storageManager, lcManagerProvider, btreeSplitProvider, typeTraits, comparatorFactories,
- fieldPermutation, 0.7f, false, dataflowHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
+ storageManager, lcManagerProvider, btreeSplitProvider, typeTraits, comparatorFactories, null,
+ fieldPermutation, 0.7f, false, 1000L, dataflowHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
// distribute the records from the datagen via hashing to the bulk load
diff --git a/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/PrimaryIndexSearchExample.java b/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/PrimaryIndexSearchExample.java
index 5402adb..afc3487 100644
--- a/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/PrimaryIndexSearchExample.java
+++ b/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/PrimaryIndexSearchExample.java
@@ -141,8 +141,8 @@
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.btreeName);
IIndexDataflowHelperFactory dataflowHelperFactory = new BTreeDataflowHelperFactory();
BTreeSearchOperatorDescriptor btreeSearchOp = new BTreeSearchOperatorDescriptor(spec, recDesc, storageManager,
- lcManagerProvider, btreeSplitProvider, typeTraits, comparatorFactories, lowKeyFields, highKeyFields,
- true, true, dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
+ lcManagerProvider, btreeSplitProvider, typeTraits, comparatorFactories, null, lowKeyFields,
+ highKeyFields, true, true, dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, btreeSearchOp, splitNCs);
// have each node print the results of its respective B-Tree
diff --git a/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java b/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java
index 3a4e59f..0c6ab8e 100644
--- a/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java
+++ b/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java
@@ -143,8 +143,8 @@
int[] fieldPermutation = { 1, 0 };
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
- storageManager, lcManagerProvider, btreeSplitProvider, secondaryTypeTraits, comparatorFactories,
- fieldPermutation, 0.7f, false, dataflowHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
+ storageManager, lcManagerProvider, btreeSplitProvider, secondaryTypeTraits, comparatorFactories, null,
+ fieldPermutation, 0.7f, false, 1000L, dataflowHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
// connect the ops
diff --git a/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/SecondaryIndexSearchExample.java b/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/SecondaryIndexSearchExample.java
index a09ff4f..e2c0be9 100644
--- a/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/SecondaryIndexSearchExample.java
+++ b/hyracks-examples/btree-example/btreeclient/src/main/java/edu/uci/ics/hyracks/examples/btree/client/SecondaryIndexSearchExample.java
@@ -168,7 +168,7 @@
IIndexDataflowHelperFactory dataflowHelperFactory = new BTreeDataflowHelperFactory();
BTreeSearchOperatorDescriptor secondarySearchOp = new BTreeSearchOperatorDescriptor(spec, secondaryRecDesc,
storageManager, lcManagerProvider, secondarySplitProvider, secondaryTypeTraits,
- searchComparatorFactories, secondaryLowKeyFields, secondaryHighKeyFields, true, true,
+ searchComparatorFactories, null, secondaryLowKeyFields, secondaryHighKeyFields, true, true,
dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, secondarySearchOp, splitNCs);
@@ -184,7 +184,7 @@
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
- primaryLowKeyFields, primaryHighKeyFields, true, true, dataflowHelperFactory, false,
+ null, primaryLowKeyFields, primaryHighKeyFields, true, true, dataflowHelperFactory, false,
NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, primarySearchOp, splitNCs);
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java
index 739f107..0af1193 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java
@@ -75,6 +75,7 @@
protected final ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
protected final int primaryKeyFieldCount = 1;
protected final IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[primaryKeyFieldCount];
+ protected final int[] primaryBloomFilterKeyFields = new int[primaryKeyFieldCount];
protected final RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
@@ -90,6 +91,7 @@
protected final ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
protected final int secondaryKeyFieldCount = 2;
protected final IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[secondaryKeyFieldCount];
+ protected final int[] secondaryBloomFilterKeyFields = new int[secondaryKeyFieldCount];
protected final RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
@@ -122,12 +124,15 @@
primaryTypeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[5] = UTF8StringPointable.TYPE_TRAITS;
primaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
+ primaryBloomFilterKeyFields[0] = 0;
// field, type and key declarations for secondary indexes
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
+ secondaryBloomFilterKeyFields[0] = 0;
+ secondaryBloomFilterKeyFields[1] = 1;
}
protected abstract IIndexDataflowHelperFactory createDataFlowHelperFactory();
@@ -137,7 +142,8 @@
TransientLocalResourceFactoryProvider localResourceFactoryProvider = new TransientLocalResourceFactoryProvider();
TreeIndexCreateOperatorDescriptor primaryCreateOp = new TreeIndexCreateOperatorDescriptor(spec, storageManager,
lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
- dataflowHelperFactory, localResourceFactoryProvider, NoOpOperationCallbackFactory.INSTANCE);
+ primaryBloomFilterKeyFields, dataflowHelperFactory, localResourceFactoryProvider,
+ NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryCreateOp, NC1_ID);
spec.addRoot(primaryCreateOp);
runTest(spec);
@@ -172,7 +178,8 @@
int[] fieldPermutation = { 0, 1, 2, 4, 5, 7 };
TreeIndexBulkLoadOperatorDescriptor primaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
- fieldPermutation, 0.7f, true, dataflowHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
+ primaryBloomFilterKeyFields, fieldPermutation, 0.7f, true, 1000L, dataflowHelperFactory,
+ NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeBulkLoad, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
@@ -188,8 +195,8 @@
TransientLocalResourceFactoryProvider localResourceFactoryProvider = new TransientLocalResourceFactoryProvider();
TreeIndexCreateOperatorDescriptor secondaryCreateOp = new TreeIndexCreateOperatorDescriptor(spec,
storageManager, lcManagerProvider, secondarySplitProvider, secondaryTypeTraits,
- secondaryComparatorFactories, dataflowHelperFactory, localResourceFactoryProvider,
- NoOpOperationCallbackFactory.INSTANCE);
+ secondaryComparatorFactories, secondaryBloomFilterKeyFields, dataflowHelperFactory,
+ localResourceFactoryProvider, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryCreateOp, NC1_ID);
spec.addRoot(secondaryCreateOp);
runTest(spec);
@@ -220,7 +227,7 @@
// scan primary index
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
- lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false,
+ primaryBloomFilterKeyFields, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false,
NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
@@ -234,8 +241,8 @@
int[] fieldPermutation = { 3, 0 };
TreeIndexBulkLoadOperatorDescriptor secondaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
storageManager, lcManagerProvider, secondarySplitProvider, secondaryTypeTraits,
- secondaryComparatorFactories, fieldPermutation, 0.7f, true, dataflowHelperFactory,
- NoOpOperationCallbackFactory.INSTANCE);
+ secondaryComparatorFactories, secondaryBloomFilterKeyFields, fieldPermutation, 0.7f, true, 1000L,
+ dataflowHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBtreeBulkLoad, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
@@ -272,16 +279,16 @@
int[] primaryFieldPermutation = { 0, 1, 2, 4, 5, 7 };
TreeIndexInsertUpdateDeleteOperatorDescriptor primaryBtreeInsertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, ordersDesc, storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits,
- primaryComparatorFactories, primaryFieldPermutation, pipelineOperation, dataflowHelperFactory, null,
- NoOpOperationCallbackFactory.INSTANCE);
+ primaryComparatorFactories, primaryBloomFilterKeyFields, primaryFieldPermutation, pipelineOperation,
+ dataflowHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeInsertOp, NC1_ID);
// first secondary index
int[] fieldPermutationB = { 4, 0 };
TreeIndexInsertUpdateDeleteOperatorDescriptor secondaryInsertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, ordersDesc, storageManager, lcManagerProvider, secondarySplitProvider, secondaryTypeTraits,
- secondaryComparatorFactories, fieldPermutationB, pipelineOperation, dataflowHelperFactory, null,
- NoOpOperationCallbackFactory.INSTANCE);
+ secondaryComparatorFactories, secondaryBloomFilterKeyFields, fieldPermutationB, pipelineOperation,
+ dataflowHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryInsertOp, NC1_ID);
NullSinkOperatorDescriptor nullSink = new NullSinkOperatorDescriptor(spec);
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java
index f1c6877..7e1c42a 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java
@@ -70,8 +70,8 @@
int[] highKeyFields = null; // + infinity
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
- storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits,
- primaryComparatorFactories, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false,
+ storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
+ primaryBloomFilterKeyFields, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false,
NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java
index 1a793fa..64bc657 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java
@@ -76,7 +76,7 @@
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
- lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false,
+ primaryBloomFilterKeyFields, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false,
NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexStatsOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexStatsOperatorTest.java
index ef93a13..c8d95ce 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexStatsOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreePrimaryIndexStatsOperatorTest.java
@@ -45,7 +45,7 @@
TreeIndexStatsOperatorDescriptor primaryStatsOp = new TreeIndexStatsOperatorDescriptor(spec, storageManager,
lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
- dataflowHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
+ primaryBloomFilterKeyFields, dataflowHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryStatsOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
createTempFile().getAbsolutePath()) });
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java
index 33bd22e..5a600a6 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java
@@ -80,8 +80,8 @@
// search secondary index
BTreeSearchOperatorDescriptor secondaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec,
secondaryRecDesc, storageManager, lcManagerProvider, secondarySplitProvider, secondaryTypeTraits,
- secondaryComparatorFactories, secondaryLowKeyFields, secondaryHighKeyFields, true, true,
- dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
+ secondaryComparatorFactories, secondaryBloomFilterKeyFields, secondaryLowKeyFields,
+ secondaryHighKeyFields, true, true, dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBtreeSearchOp, NC1_ID);
// second field from the tuples coming from secondary index
@@ -92,8 +92,8 @@
// search primary index
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
- primaryLowKeyFields, primaryHighKeyFields, true, true, dataflowHelperFactory, false,
- NoOpOperationCallbackFactory.INSTANCE);
+ primaryBloomFilterKeyFields, primaryLowKeyFields, primaryHighKeyFields, true, true,
+ dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java
index 999789a..e3005cf 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java
@@ -79,8 +79,8 @@
// search secondary index
BTreeSearchOperatorDescriptor secondaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec,
secondaryRecDesc, storageManager, lcManagerProvider, secondarySplitProvider, secondaryTypeTraits,
- secondaryComparatorFactories, secondaryLowKeyFields, secondaryHighKeyFields, true, true,
- dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
+ secondaryComparatorFactories, secondaryBloomFilterKeyFields, secondaryLowKeyFields,
+ secondaryHighKeyFields, true, true, dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBtreeSearchOp, NC1_ID);
int[] primaryLowKeyFields = { 1 }; // second field from the tuples
@@ -91,8 +91,8 @@
// search primary index
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
- primaryLowKeyFields, primaryHighKeyFields, true, true, dataflowHelperFactory, false,
- NoOpOperationCallbackFactory.INSTANCE);
+ primaryBloomFilterKeyFields, primaryLowKeyFields, primaryHighKeyFields, true, true,
+ dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java
index ebb5496..758e926 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java
@@ -65,8 +65,8 @@
// search secondary index
BTreeSearchOperatorDescriptor secondaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec,
secondaryRecDesc, storageManager, lcManagerProvider, secondarySplitProvider, secondaryTypeTraits,
- secondaryComparatorFactories, secondaryLowKeyFields, secondaryHighKeyFields, true, true,
- dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
+ secondaryComparatorFactories, secondaryBloomFilterKeyFields, secondaryLowKeyFields,
+ secondaryHighKeyFields, true, true, dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBtreeSearchOp, NC1_ID);
// second field from the tuples coming from secondary index
@@ -77,8 +77,8 @@
// search primary index
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
- primaryLowKeyFields, primaryHighKeyFields, true, true, dataflowHelperFactory, false,
- NoOpOperationCallbackFactory.INSTANCE);
+ primaryBloomFilterKeyFields, primaryLowKeyFields, primaryHighKeyFields, true, true,
+ dataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/invertedindex/AbstractfWordInvertedIndexTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/invertedindex/AbstractfWordInvertedIndexTest.java
index 05b7a26..808afac 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/invertedindex/AbstractfWordInvertedIndexTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/invertedindex/AbstractfWordInvertedIndexTest.java
@@ -155,7 +155,7 @@
JobSpecification spec = new JobSpecification();
TransientLocalResourceFactoryProvider localResourceFactoryProvider = new TransientLocalResourceFactoryProvider();
TreeIndexCreateOperatorDescriptor primaryCreateOp = new TreeIndexCreateOperatorDescriptor(spec, storageManager,
- lcManagerProvider, primaryFileSplitProvider, primaryTypeTraits, primaryComparatorFactories,
+ lcManagerProvider, primaryFileSplitProvider, primaryTypeTraits, primaryComparatorFactories, null,
btreeDataflowHelperFactory, localResourceFactoryProvider, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryCreateOp, NC1_ID);
spec.addRoot(primaryCreateOp);
@@ -199,7 +199,7 @@
int[] fieldPermutation = { 0, 1 };
TreeIndexBulkLoadOperatorDescriptor primaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
storageManager, lcManagerProvider, primaryFileSplitProvider, primaryTypeTraits,
- primaryComparatorFactories, fieldPermutation, 0.7f, true, btreeDataflowHelperFactory,
+ primaryComparatorFactories, null, fieldPermutation, 0.7f, true, 1000L, btreeDataflowHelperFactory,
NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeBulkLoad, NC1_ID);
return primaryBtreeBulkLoad;
@@ -226,8 +226,8 @@
int[] highKeyFields = null; // + infinity
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
storageManager, lcManagerProvider, primaryFileSplitProvider, primaryTypeTraits,
- primaryComparatorFactories, lowKeyFields, highKeyFields, true, true, btreeDataflowHelperFactory, false,
- NoOpOperationCallbackFactory.INSTANCE);
+ primaryComparatorFactories, null, lowKeyFields, highKeyFields, true, true, btreeDataflowHelperFactory,
+ false, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
return primaryBtreeSearchOp;
}
@@ -273,7 +273,7 @@
private IOperatorDescriptor createInvertedIndexBulkLoadOp(JobSpecification spec, int[] fieldPermutation) {
LSMInvertedIndexBulkLoadOperatorDescriptor invIndexBulkLoadOp = new LSMInvertedIndexBulkLoadOperatorDescriptor(
- spec, fieldPermutation, true, storageManager, btreeFileSplitProvider, lcManagerProvider,
+ spec, fieldPermutation, true, 1000L, storageManager, btreeFileSplitProvider, lcManagerProvider,
tokenTypeTraits, tokenComparatorFactories, invListsTypeTraits, invListsComparatorFactories,
tokenizerFactory, invertedIndexDataflowHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, invIndexBulkLoadOp, NC1_ID);
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java
index fce6e69..912ab0e 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java
@@ -18,7 +18,6 @@
import edu.uci.ics.hyracks.control.nc.io.IOManager;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.lsm.btree.dataflow.LSMBTreeDataflowHelperFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.common.dataflow.AbstractLSMIndexDataflowHelper;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.ConstantMergePolicyProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.NoOpIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.SynchronousSchedulerProvider;
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexSearchOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexSearchOperatorTest.java
index 6f8f647..c1b1cd8 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexSearchOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexSearchOperatorTest.java
@@ -25,7 +25,7 @@
protected ITreeIndexOperatorTestHelper createTestHelper() throws HyracksException {
return new LSMBTreeOperatorTestHelper(TestStorageManagerComponentHolder.getIOManager());
}
-
+
@Override
protected IIndexDataflowHelperFactory createDataFlowHelperFactory() {
return ((LSMBTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory();
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java
index 428b354..c4a63df 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java
@@ -183,7 +183,7 @@
JobSpecification spec = new JobSpecification();
TransientLocalResourceFactoryProvider localResourceFactoryProvider = new TransientLocalResourceFactoryProvider();
TreeIndexCreateOperatorDescriptor primaryCreateOp = new TreeIndexCreateOperatorDescriptor(spec, storageManager,
- lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
+ lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories, null,
btreeDataflowHelperFactory, localResourceFactoryProvider, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryCreateOp, NC1_ID);
spec.addRoot(primaryCreateOp);
@@ -223,7 +223,8 @@
int[] fieldPermutation = { 0, 1, 2, 4, 5, 7, 9, 10, 11, 12 };
TreeIndexBulkLoadOperatorDescriptor primaryBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
- fieldPermutation, 0.7f, false, btreeDataflowHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
+ null, fieldPermutation, 0.7f, false, 1000L, btreeDataflowHelperFactory,
+ NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBulkLoad, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
@@ -239,7 +240,7 @@
TransientLocalResourceFactoryProvider localResourceFactoryProvider = new TransientLocalResourceFactoryProvider();
TreeIndexCreateOperatorDescriptor secondaryCreateOp = new TreeIndexCreateOperatorDescriptor(spec,
storageManager, lcManagerProvider, secondarySplitProvider, secondaryTypeTraits,
- secondaryComparatorFactories, rtreeDataflowHelperFactory, localResourceFactoryProvider,
+ secondaryComparatorFactories, null, rtreeDataflowHelperFactory, localResourceFactoryProvider,
NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryCreateOp, NC1_ID);
spec.addRoot(secondaryCreateOp);
@@ -271,7 +272,7 @@
// scan primary index
BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
- lowKeyFields, highKeyFields, true, true, btreeDataflowHelperFactory, false,
+ null, lowKeyFields, highKeyFields, true, true, btreeDataflowHelperFactory, false,
NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primarySearchOp, NC1_ID);
@@ -279,7 +280,7 @@
int[] fieldPermutation = { 6, 7, 8, 9, 0 };
TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
storageManager, lcManagerProvider, secondarySplitProvider, secondaryTypeTraits,
- secondaryComparatorFactories, fieldPermutation, 0.7f, false, rtreeDataflowHelperFactory,
+ secondaryComparatorFactories, null, fieldPermutation, 0.7f, false, 1000L, rtreeDataflowHelperFactory,
NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBulkLoad, NC1_ID);
@@ -320,15 +321,15 @@
int[] primaryFieldPermutation = { 0, 1, 2, 4, 5, 7, 9, 10, 11, 12 };
TreeIndexInsertUpdateDeleteOperatorDescriptor primaryInsertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, ordersDesc, storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits,
- primaryComparatorFactories, primaryFieldPermutation, IndexOperation.INSERT, btreeDataflowHelperFactory,
- null, NoOpOperationCallbackFactory.INSTANCE);
+ primaryComparatorFactories, null, primaryFieldPermutation, IndexOperation.INSERT,
+ btreeDataflowHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryInsertOp, NC1_ID);
// secondary index
int[] secondaryFieldPermutation = { 9, 10, 11, 12, 0 };
TreeIndexInsertUpdateDeleteOperatorDescriptor secondaryInsertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, ordersDesc, storageManager, lcManagerProvider, secondarySplitProvider, secondaryTypeTraits,
- secondaryComparatorFactories, secondaryFieldPermutation, IndexOperation.INSERT,
+ secondaryComparatorFactories, null, secondaryFieldPermutation, IndexOperation.INSERT,
rtreeDataflowHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryInsertOp, NC1_ID);
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexInsertOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexInsertOperatorTest.java
index 417b074..8e68bcf 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexInsertOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexInsertOperatorTest.java
@@ -97,9 +97,9 @@
// search primary index
BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
- storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits,
- primaryComparatorFactories, primaryLowKeyFields, primaryHighKeyFields, true, true,
- btreeDataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
+ storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
+ null, primaryLowKeyFields, primaryHighKeyFields, true, true, btreeDataflowHelperFactory, false,
+ NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primarySearchOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
@@ -118,11 +118,12 @@
@Override
protected IIndexDataflowHelperFactory createDataFlowHelperFactory(
IPrimitiveValueProviderFactory[] secondaryValueProviderFactories, RTreePolicyType rtreePolicyType,
- IBinaryComparatorFactory[] btreeComparatorFactories, ILinearizeComparatorFactory linearizerCmpFactory) throws TreeIndexException {
+ IBinaryComparatorFactory[] btreeComparatorFactories, ILinearizeComparatorFactory linearizerCmpFactory)
+ throws TreeIndexException {
return ((RTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory(secondaryValueProviderFactories,
rtreePolicyType, null);
}
-
+
@Override
public void cleanup() throws Exception {
destroyPrimaryIndex();
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexSearchOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexSearchOperatorTest.java
index 8356c4d..5da122c 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexSearchOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexSearchOperatorTest.java
@@ -96,9 +96,9 @@
// search primary index
BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
- storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits,
- primaryComparatorFactories, primaryLowKeyFields, primaryHighKeyFields, true, true,
- btreeDataflowHelperFactory, false, NoOpOperationCallbackFactory.INSTANCE);
+ storageManager, lcManagerProvider, primarySplitProvider, primaryTypeTraits, primaryComparatorFactories,
+ null, primaryLowKeyFields, primaryHighKeyFields, true, true, btreeDataflowHelperFactory, false,
+ NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primarySearchOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
@@ -117,7 +117,8 @@
@Override
protected IIndexDataflowHelperFactory createDataFlowHelperFactory(
IPrimitiveValueProviderFactory[] secondaryValueProviderFactories, RTreePolicyType rtreePolicyType,
- IBinaryComparatorFactory[] btreeComparatorFactories, ILinearizeComparatorFactory linearizerCmpFactory) throws TreeIndexException {
+ IBinaryComparatorFactory[] btreeComparatorFactories, ILinearizeComparatorFactory linearizerCmpFactory)
+ throws TreeIndexException {
return ((RTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory(secondaryValueProviderFactories,
rtreePolicyType, null);
}
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexStatsOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexStatsOperatorTest.java
index cdc09be..83be1d9 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexStatsOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/rtree/RTreeSecondaryIndexStatsOperatorTest.java
@@ -50,7 +50,7 @@
JobSpecification spec = new JobSpecification();
TreeIndexStatsOperatorDescriptor secondaryStatsOp = new TreeIndexStatsOperatorDescriptor(spec, storageManager,
- lcManagerProvider, secondarySplitProvider, secondaryTypeTraits, secondaryComparatorFactories,
+ lcManagerProvider, secondarySplitProvider, secondaryTypeTraits, secondaryComparatorFactories, null,
rtreeDataflowHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryStatsOp, NC1_ID);
IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
@@ -70,7 +70,7 @@
return ((RTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory(secondaryValueProviderFactories,
rtreePolicyType, null);
}
-
+
@Override
public void cleanup() throws Exception {
destroyPrimaryIndex();
diff --git a/hyracks-storage-am-bloomfilter/pom.xml b/hyracks-storage-am-bloomfilter/pom.xml
new file mode 100644
index 0000000..dab96f9
--- /dev/null
+++ b/hyracks-storage-am-bloomfilter/pom.xml
@@ -0,0 +1,42 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-storage-am-bloomfilter</artifactId>
+ <version>0.2.2-SNAPSHOT</version>
+
+ <parent>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks</artifactId>
+ <version>0.2.2-SNAPSHOT</version>
+ </parent>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.6</source>
+ <target>1.6</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-storage-am-common</artifactId>
+ <version>0.2.2-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.1</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomCalculations.java b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomCalculations.java
new file mode 100644
index 0000000..9c9a7be
--- /dev/null
+++ b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomCalculations.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.bloomfilter.impls;
+
+/**
+ * This class has been taken from cassandra source code with minor modifications.
+ */
+
+/**
+ * The following calculations are taken from:
+ * http://www.cs.wisc.edu/~cao/papers/summary-cache/node8.html
+ * "Bloom Filters - the math"
+ * This class's static methods are meant to facilitate the use of the Bloom
+ * Filter class by helping to choose correct values of 'bits per element' and
+ * 'number of hash functions, k'.
+ */
+public class BloomCalculations {
+
+ private static final int minBuckets = 2;
+ private static final int minK = 1;
+
+ /**
+ * In the following table, the row 'i' shows false positive rates if i buckets
+ * per element are used. Column 'j' shows false positive rates if j hash
+ * functions are used. The first row is 'i=0', the first column is 'j=0'.
+ * Each cell (i,j) the false positive rate determined by using i buckets per
+ * element and j hash functions.
+ */
+ static final double[][] probs = new double[][] {
+ { 1.0 }, // dummy row representing 0 buckets per element
+ { 1.0, 1.0 }, // dummy row representing 1 buckets per element
+ { 1.0, 0.393, 0.400 },
+ { 1.0, 0.283, 0.237, 0.253 },
+ { 1.0, 0.221, 0.155, 0.147, 0.160 },
+ { 1.0, 0.181, 0.109, 0.092, 0.092, 0.101 }, // 5
+ { 1.0, 0.154, 0.0804, 0.0609, 0.0561, 0.0578, 0.0638 },
+ { 1.0, 0.133, 0.0618, 0.0423, 0.0359, 0.0347, 0.0364 },
+ { 1.0, 0.118, 0.0489, 0.0306, 0.024, 0.0217, 0.0216, 0.0229 },
+ { 1.0, 0.105, 0.0397, 0.0228, 0.0166, 0.0141, 0.0133, 0.0135, 0.0145 },
+ { 1.0, 0.0952, 0.0329, 0.0174, 0.0118, 0.00943, 0.00844, 0.00819, 0.00846 }, // 10
+ { 1.0, 0.0869, 0.0276, 0.0136, 0.00864, 0.0065, 0.00552, 0.00513, 0.00509 },
+ { 1.0, 0.08, 0.0236, 0.0108, 0.00646, 0.00459, 0.00371, 0.00329, 0.00314 },
+ { 1.0, 0.074, 0.0203, 0.00875, 0.00492, 0.00332, 0.00255, 0.00217, 0.00199, 0.00194 },
+ { 1.0, 0.0689, 0.0177, 0.00718, 0.00381, 0.00244, 0.00179, 0.00146, 0.00129, 0.00121, 0.0012 },
+ { 1.0, 0.0645, 0.0156, 0.00596, 0.003, 0.00183, 0.00128, 0.001, 0.000852, 0.000775, 0.000744 }, // 15
+ { 1.0, 0.0606, 0.0138, 0.005, 0.00239, 0.00139, 0.000935, 0.000702, 0.000574, 0.000505, 0.00047, 0.000459 },
+ { 1.0, 0.0571, 0.0123, 0.00423, 0.00193, 0.00107, 0.000692, 0.000499, 0.000394, 0.000335, 0.000302,
+ 0.000287, 0.000284 },
+ { 1.0, 0.054, 0.0111, 0.00362, 0.00158, 0.000839, 0.000519, 0.00036, 0.000275, 0.000226, 0.000198,
+ 0.000183, 0.000176 },
+ { 1.0, 0.0513, 0.00998, 0.00312, 0.0013, 0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132,
+ 0.000118, 0.000111, 0.000109 },
+ { 1.0, 0.0488, 0.00906, 0.0027, 0.00108, 0.00053, 0.000303, 0.000196, 0.00014, 0.000108, 8.89e-05,
+ 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05 } // 20
+ }; // the first column is a dummy column representing K=0.
+
+ /**
+ * The optimal number of hashes for a given number of bits per element.
+ * These values are automatically calculated from the data above.
+ */
+ private static final int[] optKPerBuckets = new int[probs.length];
+
+ static {
+ for (int i = 0; i < probs.length; i++) {
+ double min = Double.MAX_VALUE;
+ double[] prob = probs[i];
+ for (int j = 0; j < prob.length; j++) {
+ if (prob[j] < min) {
+ min = prob[j];
+ optKPerBuckets[i] = Math.max(minK, j);
+ }
+ }
+ }
+ }
+
+ /**
+ * Given the number of buckets that can be used per element, return a
+ * specification that minimizes the false positive rate.
+ *
+ * @param bucketsPerElement
+ * The number of buckets per element for the filter.
+ * @return A spec that minimizes the false positive rate.
+ */
+ public static BloomFilterSpecification computeBloomSpec(int bucketsPerElement) {
+ assert bucketsPerElement >= 1;
+ assert bucketsPerElement <= probs.length - 1;
+ return new BloomFilterSpecification(optKPerBuckets[bucketsPerElement], bucketsPerElement);
+ }
+
+ /**
+ * Given a maximum tolerable false positive probability, compute a Bloom
+ * specification which will give less than the specified false positive rate,
+ * but minimize the number of buckets per element and the number of hash
+ * functions used. Because bandwidth (and therefore total bitvector size)
+ * is considered more expensive than computing power, preference is given
+ * to minimizing buckets per element rather than number of hash functions.
+ *
+ * @param maxBucketsPerElement
+ * The maximum number of buckets available for the filter.
+ * @param maxFalsePosProb
+ * The maximum tolerable false positive rate.
+ * @return A Bloom Specification which would result in a false positive rate
+ * less than specified by the function call
+ * @throws UnsupportedOperationException
+ * if a filter satisfying the parameters cannot be met
+ */
+ public static BloomFilterSpecification computeBloomSpec(int maxBucketsPerElement, double maxFalsePosProb) {
+ assert maxBucketsPerElement >= 1;
+ assert maxBucketsPerElement <= probs.length - 1;
+ int maxK = probs[maxBucketsPerElement].length - 1;
+
+ // Handle the trivial cases
+ if (maxFalsePosProb >= probs[minBuckets][minK]) {
+ return new BloomFilterSpecification(2, optKPerBuckets[2]);
+ }
+ if (maxFalsePosProb < probs[maxBucketsPerElement][maxK]) {
+ throw new UnsupportedOperationException(String.format("Unable to satisfy %s with %s buckets per element",
+ maxFalsePosProb, maxBucketsPerElement));
+ }
+
+ // First find the minimal required number of buckets:
+ int bucketsPerElement = 2;
+ int K = optKPerBuckets[2];
+ while (probs[bucketsPerElement][K] > maxFalsePosProb) {
+ bucketsPerElement++;
+ K = optKPerBuckets[bucketsPerElement];
+ }
+ // Now that the number of buckets is sufficient, see if we can relax K
+ // without losing too much precision.
+ while (probs[bucketsPerElement][K - 1] <= maxFalsePosProb) {
+ K--;
+ }
+
+ return new BloomFilterSpecification(K, bucketsPerElement);
+ }
+
+ /**
+ * Calculates the maximum number of buckets per element that this implementation
+ * can support. Crucially, it will lower the bucket count if necessary to meet
+ * BitSet's size restrictions.
+ */
+ public static int maxBucketsPerElement(long numElements) {
+ numElements = Math.max(1, numElements);
+ double v = Long.MAX_VALUE / (double) numElements;
+ if (v < 1.0) {
+ throw new UnsupportedOperationException("Cannot compute probabilities for " + numElements + " elements.");
+ }
+ return Math.min(BloomCalculations.probs.length - 1, (int) v);
+ }
+}
diff --git a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilter.java b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
new file mode 100644
index 0000000..0e796b0
--- /dev/null
+++ b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
@@ -0,0 +1,269 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.bloomfilter.impls;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
+import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
+import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+
+public class BloomFilter {
+
+ private final static int METADATA_PAGE_ID = 0;
+ private final static int NUM_PAGES_OFFSET = 0; // 0
+ private final static int NUM_HASHES_USED_OFFSET = NUM_PAGES_OFFSET + 4; // 4
+ private final static int NUM_ELEMENTS_OFFSET = NUM_HASHES_USED_OFFSET + 4; // 8
+ private final static int NUM_BITS_OFFSET = NUM_ELEMENTS_OFFSET + 8; // 12
+
+ private final IBufferCache bufferCache;
+ private final IFileMapProvider fileMapProvider;
+ private final FileReference file;
+ private final int[] keyFields;
+ private int fileId = -1;
+ private boolean isActivated = false;
+
+ private int numPages;
+ private int numHashes;
+ private long numElements;
+ private long numBits;
+ private int numBitsPerPage;
+
+ private final ArrayList<ICachedPage> bloomFilterPages = new ArrayList<ICachedPage>();
+ private final static long SEED = 0L;
+
+ public BloomFilter(IBufferCache bufferCache, IFileMapProvider fileMapProvider, FileReference file, int[] keyFields)
+ throws HyracksDataException {
+ this.bufferCache = bufferCache;
+ this.fileMapProvider = fileMapProvider;
+ this.file = file;
+ this.keyFields = keyFields;
+ numBitsPerPage = bufferCache.getPageSize() * Byte.SIZE;
+ }
+
+ public int getFileId() {
+ return fileId;
+ }
+
+ public FileReference getFileReference() {
+ return file;
+ }
+
+ public int getNumPages() throws HyracksDataException {
+ if (!isActivated) {
+ throw new HyracksDataException("The bloom filter is not activated.");
+ }
+ return numPages;
+ }
+
+ public long getNumElements() throws HyracksDataException {
+ if (!isActivated) {
+ throw new HyracksDataException("The bloom filter is not activated.");
+ }
+ return numElements;
+ }
+
+ public boolean contains(ITupleReference tuple, long[] hashes) {
+ MurmurHash128Bit.hash3_x64_128(tuple, keyFields, SEED, hashes);
+ for (int i = 0; i < numHashes; ++i) {
+ long hash = Math.abs((hashes[0] + (long) i * hashes[1]) % numBits);
+
+ ByteBuffer buffer = bloomFilterPages.get((int) (hash / numBitsPerPage)).getBuffer();
+ int byteIndex = (int) (hash % numBitsPerPage) >> 3; // divide by 8
+ byte b = buffer.get(byteIndex);
+ int bitIndex = (int) (hash % numBitsPerPage) & 0x07; // mod 8
+
+ if (!((b & (1L << bitIndex)) != 0)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void prepareFile() throws HyracksDataException {
+ boolean fileIsMapped = false;
+ synchronized (fileMapProvider) {
+ fileIsMapped = fileMapProvider.isMapped(file);
+ if (!fileIsMapped) {
+ bufferCache.createFile(file);
+ }
+ fileId = fileMapProvider.lookupFileId(file);
+ try {
+ // Also creates the file if it doesn't exist yet.
+ bufferCache.openFile(fileId);
+ } catch (HyracksDataException e) {
+ // Revert state of buffer cache since file failed to open.
+ if (!fileIsMapped) {
+ bufferCache.deleteFile(fileId, false);
+ }
+ throw e;
+ }
+ }
+ }
+
+ public synchronized void create() throws HyracksDataException {
+ if (isActivated) {
+ throw new HyracksDataException("Failed to create the bloom filter since it is activated.");
+ }
+ prepareFile();
+ ICachedPage metaPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, METADATA_PAGE_ID), true);
+ metaPage.acquireWriteLatch();
+ metaPage.getBuffer().putInt(NUM_PAGES_OFFSET, 0);
+ metaPage.getBuffer().putInt(NUM_HASHES_USED_OFFSET, 0);
+ metaPage.getBuffer().putLong(NUM_ELEMENTS_OFFSET, 0L);
+ metaPage.getBuffer().putLong(NUM_BITS_OFFSET, 0L);
+ metaPage.releaseWriteLatch();
+ bufferCache.unpin(metaPage);
+ bufferCache.closeFile(fileId);
+ }
+
+ public synchronized void activate() throws HyracksDataException {
+ if (isActivated) {
+ return;
+ }
+
+ prepareFile();
+ readBloomFilterMetaData();
+
+ int currentPageId = 1;
+ while (currentPageId <= numPages) {
+ ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), false);
+ bloomFilterPages.add(page);
+ ++currentPageId;
+ }
+ isActivated = true;
+ }
+
+ private void readBloomFilterMetaData() throws HyracksDataException {
+ ICachedPage metaPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, METADATA_PAGE_ID), false);
+ metaPage.acquireReadLatch();
+ numPages = metaPage.getBuffer().getInt(NUM_PAGES_OFFSET);
+ numHashes = metaPage.getBuffer().getInt(NUM_HASHES_USED_OFFSET);
+ numElements = metaPage.getBuffer().getLong(NUM_ELEMENTS_OFFSET);
+ numBits = metaPage.getBuffer().getLong(NUM_BITS_OFFSET);
+ metaPage.releaseReadLatch();
+ bufferCache.unpin(metaPage);
+ }
+
+ public synchronized void deactivate() throws HyracksDataException {
+ if (!isActivated) {
+ return;
+ }
+
+ for (int i = 0; i < numPages; ++i) {
+ bufferCache.unpin(bloomFilterPages.get(i));
+ }
+ bloomFilterPages.clear();
+ bufferCache.closeFile(fileId);
+ isActivated = false;
+ }
+
+ public synchronized void destroy() throws HyracksDataException {
+ if (isActivated) {
+ throw new HyracksDataException("Failed to destroy the bloom filter since it is activated.");
+ }
+
+ file.delete();
+ if (fileId == -1) {
+ return;
+ }
+ bufferCache.deleteFile(fileId, false);
+ fileId = -1;
+ }
+
+ public IIndexBulkLoader createBuilder(long numElements, int numHashes, int numBitsPerElement)
+ throws HyracksDataException {
+ return new BloomFilterBuilder(numElements, numHashes, numBitsPerElement);
+ }
+
+ public class BloomFilterBuilder implements IIndexBulkLoader {
+ private final long[] hashes = new long[2];
+
+ private final long numElements;
+ private final int numHashes;
+ private final long numBits;
+ private final int numPages;
+
+ public BloomFilterBuilder(long numElements, int numHashes, int numBitsPerElement) throws HyracksDataException {
+ if (!isActivated) {
+ throw new HyracksDataException("Failed to create the bloom filter builder since it is not activated.");
+ }
+
+ this.numElements = numElements;
+ this.numHashes = numHashes;
+ numBits = numElements * numBitsPerElement;
+ long tmp = (long) Math.ceil(numBits / (double) numBitsPerPage);
+ if (tmp > Integer.MAX_VALUE) {
+ throw new HyracksDataException("Cannot create a bloom filter with his huge number of pages.");
+ }
+ numPages = (int) tmp;
+ if (numElements > 0) {
+ persistBloomFilterMetaData();
+ readBloomFilterMetaData();
+ int currentPageId = 1;
+ while (currentPageId <= numPages) {
+ ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
+ page.acquireWriteLatch();
+ bloomFilterPages.add(page);
+ ++currentPageId;
+ }
+ }
+ }
+
+ private void persistBloomFilterMetaData() throws HyracksDataException {
+ ICachedPage metaPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, METADATA_PAGE_ID), false);
+ metaPage.acquireWriteLatch();
+ metaPage.getBuffer().putInt(NUM_PAGES_OFFSET, numPages);
+ metaPage.getBuffer().putInt(NUM_HASHES_USED_OFFSET, numHashes);
+ metaPage.getBuffer().putLong(NUM_ELEMENTS_OFFSET, numElements);
+ metaPage.getBuffer().putLong(NUM_BITS_OFFSET, numBits);
+ metaPage.releaseWriteLatch();
+ bufferCache.unpin(metaPage);
+ }
+
+ @Override
+ public void add(ITupleReference tuple) throws IndexException, HyracksDataException {
+ MurmurHash128Bit.hash3_x64_128(tuple, keyFields, SEED, hashes);
+ for (int i = 0; i < numHashes; ++i) {
+ long hash = Math.abs((hashes[0] + (long) i * hashes[1]) % numBits);
+
+ ByteBuffer buffer = bloomFilterPages.get((int) (hash / numBitsPerPage)).getBuffer();
+ int byteIndex = (int) (hash % numBitsPerPage) >> 3; // divide by 8
+ byte b = buffer.get(byteIndex);
+ int bitIndex = (int) (hash % numBitsPerPage) & 0x07; // mod 8
+ b = (byte) (b | (1 << bitIndex));
+
+ buffer.put(byteIndex, b);
+ }
+ }
+
+ @Override
+ public void end() throws HyracksDataException, IndexException {
+ for (int i = 0; i < numPages; ++i) {
+ ICachedPage page = bloomFilterPages.get(i);
+ page.releaseWriteLatch();
+ }
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java
new file mode 100644
index 0000000..d430e54
--- /dev/null
+++ b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.bloomfilter.impls;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+
+public class BloomFilterFactory {
+ private final IBufferCache bufferCache;
+ private final IFileMapProvider fileMapProvider;
+ private final int[] bloomFilterKeyFields;
+
+ public BloomFilterFactory(IBufferCache bufferCache, IFileMapProvider fileMapProvider, int[] bloomFilterKeyFields) {
+ this.bufferCache = bufferCache;
+ this.fileMapProvider = fileMapProvider;
+ this.bloomFilterKeyFields = bloomFilterKeyFields;
+ }
+
+ public BloomFilter createBloomFiltertInstance(FileReference file) throws HyracksDataException {
+ return new BloomFilter(bufferCache, fileMapProvider, file, bloomFilterKeyFields);
+ }
+
+ public int[] getBloomFilterKeyFields() {
+ return bloomFilterKeyFields;
+ }
+}
diff --git a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterSpecification.java b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterSpecification.java
new file mode 100644
index 0000000..a1e5517
--- /dev/null
+++ b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterSpecification.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.bloomfilter.impls;
+
+public final class BloomFilterSpecification {
+ private final int numBucketsPerElement;
+ private final int numHashes;
+
+ public BloomFilterSpecification(int numBucketsPerElement, int numHashes) {
+ this.numBucketsPerElement = numBucketsPerElement;
+ this.numHashes = numHashes;
+ }
+
+ public int getNumBucketsPerElements() {
+ return numBucketsPerElement;
+ }
+
+ public int getNumHashes() {
+ return numHashes;
+ }
+}
diff --git a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/MurmurHash128Bit.java b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/MurmurHash128Bit.java
new file mode 100644
index 0000000..0bc0a7f
--- /dev/null
+++ b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/MurmurHash128Bit.java
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.bloomfilter.impls;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+
+/**
+ * The idea of this class is borrowed from http://murmurhash.googlepages.com/ and cassandra source code.
+ * We changed the hash function to operate on ITupleReference instead of a byte array.
+ **/
+public class MurmurHash128Bit {
+
+ private final static int DUMMY_FIELD = 0;
+
+ public static long rotl64(long v, int n) {
+ return ((v << n) | (v >>> (64 - n)));
+ }
+
+ public static long fmix(long k) {
+ k ^= k >>> 33;
+ k *= 0xff51afd7ed558ccdL;
+ k ^= k >>> 33;
+ k *= 0xc4ceb9fe1a85ec53L;
+ k ^= k >>> 33;
+
+ return k;
+ }
+
+ public static void hash3_x64_128(ITupleReference tuple, int[] keyFields, long seed, long[] hashes) {
+ int length = 0;
+ for (int i = 0; i < keyFields.length; ++i) {
+ length += tuple.getFieldLength(keyFields[i]);
+ }
+ final int nblocks = length >> 4; // Process as 128-bit blocks.
+
+ long h1 = seed;
+ long h2 = seed;
+
+ long c1 = 0x87c37b91114253d5L;
+ long c2 = 0x4cf5ad432745937fL;
+
+ //----------
+ // body
+
+ int currentFieldIndex = 0;
+ int bytePos = 0;
+ for (int i = 0; i < nblocks; ++i) {
+
+ long k1 = 0L;
+ for (int j = 0; j < 8; ++j) {
+ k1 += (((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos] & 0xff) << (j << 3));
+ ++bytePos;
+ if (tuple.getFieldLength(keyFields[currentFieldIndex]) == bytePos) {
+ ++currentFieldIndex;
+ bytePos = 0;
+ }
+ }
+ long k2 = 0L;
+ for (int j = 0; j < 8; ++j) {
+ k2 += (((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos] & 0xff) << (j << 3));
+ ++bytePos;
+ if (tuple.getFieldLength(keyFields[currentFieldIndex]) == bytePos) {
+ ++currentFieldIndex;
+ bytePos = 0;
+ }
+ }
+
+ k1 *= c1;
+ k1 = rotl64(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+
+ h1 = rotl64(h1, 27);
+ h1 += h2;
+ h1 = h1 * 5 + 0x52dce729;
+
+ k2 *= c2;
+ k2 = rotl64(k2, 33);
+ k2 *= c1;
+ h2 ^= k2;
+
+ h2 = rotl64(h2, 31);
+ h2 += h1;
+ h2 = h2 * 5 + 0x38495ab5;
+ }
+
+ //----------
+ // tail
+
+ long k1 = 0L;
+ long k2 = 0L;
+
+ currentFieldIndex = keyFields.length - 1;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ switch (length & 15) {
+ case 15:
+ k2 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 48;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 14:
+ k2 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 40;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 13:
+ k2 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 32;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 12:
+ k2 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 24;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 11:
+ k2 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 16;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 10:
+ k2 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 8;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 9:
+ k2 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]);
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ k2 *= c2;
+ k2 = rotl64(k2, 33);
+ k2 *= c1;
+ h2 ^= k2;
+
+ case 8:
+ k1 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 56;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 7:
+ k1 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 48;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 6:
+ k1 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 40;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 5:
+ k1 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 32;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 4:
+ k1 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 24;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 3:
+ k1 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 16;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 2:
+ k1 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]) << 8;
+ --bytePos;
+ if (bytePos == -1) {
+ --currentFieldIndex;
+ bytePos = tuple.getFieldLength(keyFields[currentFieldIndex]) - 1;
+ }
+ case 1:
+ k1 ^= ((long) tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex])
+ + bytePos]);
+ k1 *= c1;
+ k1 = rotl64(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= length;
+ h2 ^= length;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix(h1);
+ h2 = fmix(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ hashes[0] = h1;
+ hashes[1] = h2;
+ }
+
+}
\ No newline at end of file
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeSearchOperatorDescriptor.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeSearchOperatorDescriptor.java
index 26ea6f61..c56308b 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeSearchOperatorDescriptor.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeSearchOperatorDescriptor.java
@@ -43,11 +43,12 @@
public BTreeSearchOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor recDesc,
IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
- IBinaryComparatorFactory[] comparatorFactories, int[] lowKeyFields, int[] highKeyFields,
- boolean lowKeyInclusive, boolean highKeyInclusive, IIndexDataflowHelperFactory dataflowHelperFactory,
- boolean retainInput, ISearchOperationCallbackFactory searchOpCallbackProvider) {
+ IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields, int[] lowKeyFields,
+ int[] highKeyFields, boolean lowKeyInclusive, boolean highKeyInclusive,
+ IIndexDataflowHelperFactory dataflowHelperFactory, boolean retainInput,
+ ISearchOperationCallbackFactory searchOpCallbackProvider) {
super(spec, 1, 1, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, dataflowHelperFactory, null, retainInput,
+ comparatorFactories, bloomFilterKeyFields, dataflowHelperFactory, null, retainInput,
NoOpLocalResourceFactoryProvider.INSTANCE, searchOpCallbackProvider,
NoOpOperationCallbackFactory.INSTANCE);
this.lowKeyFields = lowKeyFields;
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeUpdateSearchOperatorDescriptor.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeUpdateSearchOperatorDescriptor.java
index 2901fce..f13ecae 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeUpdateSearchOperatorDescriptor.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeUpdateSearchOperatorDescriptor.java
@@ -38,13 +38,13 @@
public BTreeUpdateSearchOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor recDesc,
IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
- IBinaryComparatorFactory[] comparatorFactories, int[] lowKeyFields, int[] highKeyFields,
- boolean lowKeyInclusive, boolean highKeyInclusive, IIndexDataflowHelperFactory dataflowHelperFactory,
- boolean retainInput, ISearchOperationCallbackFactory searchOpCallbackProvider,
- ITupleUpdaterFactory tupleUpdaterFactory) {
+ IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields, int[] lowKeyFields,
+ int[] highKeyFields, boolean lowKeyInclusive, boolean highKeyInclusive,
+ IIndexDataflowHelperFactory dataflowHelperFactory, boolean retainInput,
+ ISearchOperationCallbackFactory searchOpCallbackProvider, ITupleUpdaterFactory tupleUpdaterFactory) {
super(spec, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, lowKeyFields, highKeyFields, lowKeyInclusive, highKeyInclusive,
- dataflowHelperFactory, retainInput, searchOpCallbackProvider);
+ comparatorFactories, bloomFilterKeyFields, lowKeyFields, highKeyFields, lowKeyInclusive,
+ highKeyInclusive, dataflowHelperFactory, retainInput, searchOpCallbackProvider);
this.tupleUpdaterFactory = tupleUpdaterFactory;
}
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
index d825b89..86bc32a 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
@@ -910,7 +910,8 @@
}
@Override
- public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput) throws TreeIndexException {
+ public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
+ throws TreeIndexException {
try {
return new BTreeBulkLoader(fillFactor, verifyInput);
} catch (HyracksDataException e) {
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
index c1cab0a..0ed1dbe 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
@@ -12,6 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package edu.uci.ics.hyracks.storage.am.btree.impls;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
index 2f2eb43..607e00a 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
@@ -65,7 +65,7 @@
private RangePredicate pred;
private MultiComparator lowKeyCmp;
private MultiComparator highKeyCmp;
- private ITupleReference lowKey;
+ protected ITupleReference lowKey;
private ITupleReference highKey;
public BTreeRangeSearchCursor(IBTreeLeafFrame frame, boolean exclusiveLatchNodes) {
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IIndex.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IIndex.java
index d6d74ee..1557c75 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IIndex.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IIndex.java
@@ -22,69 +22,62 @@
* This interface describes the operations common to all indexes. Indexes
* implementing this interface can easily reuse existing index operators for
* dataflow. Users must perform operations on an via an {@link IIndexAccessor}.
- *
- * During dataflow, the lifecycle of IIndexes are handled through an
- * {@link IIndexLifecycleManager}.
+ * During dataflow, the lifecycle of IIndexes are handled through an {@link IIndexLifecycleManager}.
*/
public interface IIndex {
/**
- * Initializes the persistent state of an index.
- *
+ * Initializes the persistent state of an index.
* An index cannot be created if it is in the activated state.
* Calling create on an index that is deactivated has the effect of clearing the index.
*
- * @throws HyracksDataException
- * if there is an error in the BufferCache while (un)pinning pages, (un)latching pages,
- * creating files, or deleting files
- *
- * if the index is in the activated state
+ * @throws HyracksDataException
+ * if there is an error in the BufferCache while (un)pinning pages, (un)latching pages,
+ * creating files, or deleting files
+ * if the index is in the activated state
*/
public void create() throws HyracksDataException;
/**
- * Initializes the index's operational state. An index in the activated state may perform
+ * Initializes the index's operational state. An index in the activated state may perform
* operations via an {@link IIndexAccessor}.
*
* @throws HyracksDataException
- * if there is a problem in the BufferCache while (un)pinning pages, (un)latching pages,
- * creating files, or deleting files
+ * if there is a problem in the BufferCache while (un)pinning pages, (un)latching pages,
+ * creating files, or deleting files
*/
public void activate() throws HyracksDataException;
/**
- * Resets the operational state of the index. Calling clear has the same logical effect
- * as calling deactivate(), destroy(), create(), then activate(), but not necessarily the
+ * Resets the operational state of the index. Calling clear has the same logical effect
+ * as calling deactivate(), destroy(), create(), then activate(), but not necessarily the
* same physical effect.
*
* @throws HyracksDataException
- * if there is a problem in the BufferCache while (un)pinning pages, (un)latching pages,
- * creating files, or deleting files
- *
- * if the index is not in the activated state
+ * if there is a problem in the BufferCache while (un)pinning pages, (un)latching pages,
+ * creating files, or deleting files
+ * if the index is not in the activated state
*/
public void clear() throws HyracksDataException;
/**
- * Deinitializes the index's operational state. An index in the deactivated state may not
+ * Deinitializes the index's operational state. An index in the deactivated state may not
* perform operations.
*
* @throws HyracksDataException
- * if there is a problem in the BufferCache while (un)pinning pages, (un)latching pages,
- * creating files, or deleting files
+ * if there is a problem in the BufferCache while (un)pinning pages, (un)latching pages,
+ * creating files, or deleting files
*/
public void deactivate() throws HyracksDataException;
/**
- * Removes the persistent state of an index.
- *
+ * Removes the persistent state of an index.
* An index cannot be destroyed if it is in the activated state.
*
- * @throws HyracksDataException
- * if there is an error in the BufferCache while (un)pinning pages, (un)latching pages,
- * creating files, or deleting files
- *
- * if the index is already activated
+ * @throws HyracksDataException
+ * if there is an error in the BufferCache while (un)pinning pages, (un)latching pages,
+ * creating files, or deleting files
+ * if the index is already activated
*/
public void destroy() throws HyracksDataException;
@@ -94,8 +87,10 @@
* on the same {@link IIndex}.
*
* @returns IIndexAccessor an accessor for this {@link IIndex}
- * @param modificationCallback the callback to be used for modification operations
- * @param searchCallback the callback to be used for search operations
+ * @param modificationCallback
+ * the callback to be used for modification operations
+ * @param searchCallback
+ * the callback to be used for search operations
*/
public IIndexAccessor createAccessor(IModificationOperationCallback modificationCallback,
ISearchOperationCallback searchCallback);
@@ -105,7 +100,7 @@
* An assertion error is thrown if validation fails.
*
* @throws HyracksDataException
- * if there is an error performing validation
+ * if there is an error performing validation
*/
public void validate() throws HyracksDataException;
@@ -124,5 +119,6 @@
* @param verifyInput
* @throws IndexException
*/
- public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput) throws IndexException;
+ public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
+ throws IndexException;
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/AbstractTreeIndexOperatorDescriptor.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/AbstractTreeIndexOperatorDescriptor.java
index 1a5885e..8e87dfe 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/AbstractTreeIndexOperatorDescriptor.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/AbstractTreeIndexOperatorDescriptor.java
@@ -33,11 +33,12 @@
protected final ITypeTraits[] typeTraits;
protected final IBinaryComparatorFactory[] comparatorFactories;
+ protected final int[] bloomFilterKeyFields;
public AbstractTreeIndexOperatorDescriptor(IOperatorDescriptorRegistry spec, int inputArity, int outputArity,
RecordDescriptor recDesc, IStorageManagerInterface storageManager,
IIndexLifecycleManagerProvider lifecycleManagerProvider, IFileSplitProvider fileSplitProvider,
- ITypeTraits[] typeTraits, IBinaryComparatorFactory[] comparatorFactories,
+ ITypeTraits[] typeTraits, IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields,
IIndexDataflowHelperFactory dataflowHelperFactory, ITupleFilterFactory tupleFilterFactory,
boolean retainInput, ILocalResourceFactoryProvider localResourceFactoryProvider,
ISearchOperationCallbackFactory searchOpCallbackFactory,
@@ -47,6 +48,7 @@
searchOpCallbackFactory, modificationOpCallbackFactory);
this.typeTraits = typeTraits;
this.comparatorFactories = comparatorFactories;
+ this.bloomFilterKeyFields = bloomFilterKeyFields;
}
public IBinaryComparatorFactory[] getTreeIndexComparatorFactories() {
@@ -56,4 +58,8 @@
public ITypeTraits[] getTreeIndexTypeTraits() {
return typeTraits;
}
+
+ public int[] getTreeIndexBloomFilterKeyFields() {
+ return bloomFilterKeyFields;
+ }
}
\ No newline at end of file
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexBulkLoadOperatorNodePushable.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexBulkLoadOperatorNodePushable.java
index de4e627..1b6271d 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexBulkLoadOperatorNodePushable.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexBulkLoadOperatorNodePushable.java
@@ -33,6 +33,7 @@
private final IHyracksTaskContext ctx;
private final float fillFactor;
private final boolean verifyInput;
+ private final long numElementsHint;
private final IIndexDataflowHelper indexHelper;
private FrameTupleAccessor accessor;
private IIndex index;
@@ -41,12 +42,14 @@
private PermutingFrameTupleReference tuple = new PermutingFrameTupleReference();
public IndexBulkLoadOperatorNodePushable(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- int[] fieldPermutation, float fillFactor, boolean verifyInput, IRecordDescriptorProvider recordDescProvider) {
+ int[] fieldPermutation, float fillFactor, boolean verifyInput, long numElementsHint,
+ IRecordDescriptorProvider recordDescProvider) {
this.opDesc = opDesc;
this.ctx = ctx;
this.indexHelper = opDesc.getIndexDataflowHelperFactory().createIndexDataflowHelper(opDesc, ctx, partition);
this.fillFactor = fillFactor;
this.verifyInput = verifyInput;
+ this.numElementsHint = numElementsHint;
this.recDescProvider = recordDescProvider;
tuple.setFieldPermutation(fieldPermutation);
}
@@ -58,7 +61,7 @@
indexHelper.open();
index = indexHelper.getIndexInstance();
try {
- bulkLoader = index.createBulkLoader(fillFactor, verifyInput);
+ bulkLoader = index.createBulkLoader(fillFactor, verifyInput, numElementsHint);
} catch (Exception e) {
indexHelper.close();
throw new HyracksDataException(e);
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexDropOperatorDescriptor.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexDropOperatorDescriptor.java
index d9b25d7..6f890d7 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexDropOperatorDescriptor.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexDropOperatorDescriptor.java
@@ -40,7 +40,7 @@
// 2) The LSMRTreeDataflowHelper to get acceptable type traits
// This should eventually not be *hacked*, but I don't know the proper fix yet. -zheilbron
super(spec, 0, 0, null, storageManager, lifecycleManagerProvider, fileSplitProvider, new ITypeTraits[] {
- IntegerPointable.TYPE_TRAITS, IntegerPointable.TYPE_TRAITS }, new IBinaryComparatorFactory[] { null },
+ IntegerPointable.TYPE_TRAITS, IntegerPointable.TYPE_TRAITS }, new IBinaryComparatorFactory[] { null }, null,
dataflowHelperFactory, null, false, NoOpLocalResourceFactoryProvider.INSTANCE,
NoOpOperationCallbackFactory.INSTANCE, NoOpOperationCallbackFactory.INSTANCE);
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexBulkLoadOperatorDescriptor.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexBulkLoadOperatorDescriptor.java
index fea6463..f7f57e6 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexBulkLoadOperatorDescriptor.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexBulkLoadOperatorDescriptor.java
@@ -35,25 +35,29 @@
private final int[] fieldPermutation;
private final float fillFactor;
private final boolean verifyInput;
+ private final long numElementsHint;
public TreeIndexBulkLoadOperatorDescriptor(IOperatorDescriptorRegistry spec,
IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
- IBinaryComparatorFactory[] comparatorFactories, int[] fieldPermutation, float fillFactor,
- boolean verifyInput, IIndexDataflowHelperFactory dataflowHelperFactory,
+ IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields, int[] fieldPermutation,
+ float fillFactor, boolean verifyInput, long numElementsHint,
+ IIndexDataflowHelperFactory dataflowHelperFactory,
IModificationOperationCallbackFactory modificationOpCallbackFactory) {
super(spec, 1, 0, null, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, dataflowHelperFactory, null, false, NoOpLocalResourceFactoryProvider.INSTANCE,
- NoOpOperationCallbackFactory.INSTANCE, modificationOpCallbackFactory);
+ comparatorFactories, bloomFilterKeyFields, dataflowHelperFactory, null, false,
+ NoOpLocalResourceFactoryProvider.INSTANCE, NoOpOperationCallbackFactory.INSTANCE,
+ modificationOpCallbackFactory);
this.fieldPermutation = fieldPermutation;
this.fillFactor = fillFactor;
this.verifyInput = verifyInput;
+ this.numElementsHint = numElementsHint;
}
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) {
return new IndexBulkLoadOperatorNodePushable(this, ctx, partition, fieldPermutation, fillFactor, verifyInput,
- recordDescProvider);
+ numElementsHint, recordDescProvider);
}
}
\ No newline at end of file
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexCreateOperatorDescriptor.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexCreateOperatorDescriptor.java
index 2207295..8b7e81d 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexCreateOperatorDescriptor.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexCreateOperatorDescriptor.java
@@ -34,13 +34,13 @@
public TreeIndexCreateOperatorDescriptor(IOperatorDescriptorRegistry spec, IStorageManagerInterface storageManager,
IIndexLifecycleManagerProvider lifecycleManagerProvider, IFileSplitProvider fileSplitProvider,
- ITypeTraits[] typeTraits, IBinaryComparatorFactory[] comparatorFactories,
+ ITypeTraits[] typeTraits, IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields,
IIndexDataflowHelperFactory dataflowHelperFactory,
ILocalResourceFactoryProvider localResourceFactoryProvider,
IModificationOperationCallbackFactory modificationOpCallbackFactory) {
super(spec, 0, 0, null, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, dataflowHelperFactory, null, false, localResourceFactoryProvider,
- NoOpOperationCallbackFactory.INSTANCE, modificationOpCallbackFactory);
+ comparatorFactories, bloomFilterKeyFields, dataflowHelperFactory, null, false,
+ localResourceFactoryProvider, NoOpOperationCallbackFactory.INSTANCE, modificationOpCallbackFactory);
}
@Override
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexDiskOrderScanOperatorDescriptor.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexDiskOrderScanOperatorDescriptor.java
index 1adbc5f..a8644e4 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexDiskOrderScanOperatorDescriptor.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexDiskOrderScanOperatorDescriptor.java
@@ -36,7 +36,7 @@
IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
IIndexDataflowHelperFactory dataflowHelperFactory, ISearchOperationCallbackFactory searchOpCallbackProvider) {
- super(spec, 0, 1, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits, null,
+ super(spec, 0, 1, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits, null, null,
dataflowHelperFactory, null, false, NoOpLocalResourceFactoryProvider.INSTANCE,
searchOpCallbackProvider, NoOpOperationCallbackFactory.INSTANCE);
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexInsertUpdateDeleteOperatorDescriptor.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexInsertUpdateDeleteOperatorDescriptor.java
index 0326b2e..84e6090 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexInsertUpdateDeleteOperatorDescriptor.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexInsertUpdateDeleteOperatorDescriptor.java
@@ -41,11 +41,11 @@
public TreeIndexInsertUpdateDeleteOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor recDesc,
IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
- IBinaryComparatorFactory[] comparatorFactories, int[] fieldPermutation, IndexOperation op,
- IIndexDataflowHelperFactory dataflowHelperFactory, ITupleFilterFactory tupleFilterFactory,
- IModificationOperationCallbackFactory modificationOpCallbackProvider) {
+ IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields, int[] fieldPermutation,
+ IndexOperation op, IIndexDataflowHelperFactory dataflowHelperFactory,
+ ITupleFilterFactory tupleFilterFactory, IModificationOperationCallbackFactory modificationOpCallbackProvider) {
super(spec, 1, 1, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, dataflowHelperFactory, tupleFilterFactory, false,
+ comparatorFactories, bloomFilterKeyFields, dataflowHelperFactory, tupleFilterFactory, false,
NoOpLocalResourceFactoryProvider.INSTANCE, NoOpOperationCallbackFactory.INSTANCE,
modificationOpCallbackProvider);
this.fieldPermutation = fieldPermutation;
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
index 6e19ecb..b589f96 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
@@ -38,11 +38,12 @@
public TreeIndexStatsOperatorDescriptor(IOperatorDescriptorRegistry spec, IStorageManagerInterface storageManager,
IIndexLifecycleManagerProvider lifecycleManagerProvider, IFileSplitProvider fileSplitProvider,
- ITypeTraits[] typeTraits, IBinaryComparatorFactory[] comparatorFactories,
+ ITypeTraits[] typeTraits, IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields,
IIndexDataflowHelperFactory dataflowHelperFactory, ISearchOperationCallbackFactory searchOpCallbackProvider) {
super(spec, 0, 1, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, dataflowHelperFactory, null, false, NoOpLocalResourceFactoryProvider.INSTANCE,
- searchOpCallbackProvider, NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, bloomFilterKeyFields, dataflowHelperFactory, null, false,
+ NoOpLocalResourceFactoryProvider.INSTANCE, searchOpCallbackProvider,
+ NoOpOperationCallbackFactory.INSTANCE);
}
@Override
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/ophelpers/MultiComparator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/ophelpers/MultiComparator.java
index b7ae688..567973d 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/ophelpers/MultiComparator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/ophelpers/MultiComparator.java
@@ -105,6 +105,19 @@
}
}
+ public static MultiComparator createIgnoreFieldLength(IBinaryComparatorFactory[] cmpFactories, int startIndex,
+ int numCmps) {
+ IBinaryComparator[] cmps = new IBinaryComparator[numCmps];
+ for (int i = startIndex; i < startIndex + numCmps; i++) {
+ cmps[i] = cmpFactories[i].createBinaryComparator();
+ }
+ if (cmps.length == 1) {
+ return new FieldLengthIgnoringSingleComparator(cmps);
+ } else {
+ return new FieldLengthIgnoringMultiComparator(cmps);
+ }
+ }
+
public static MultiComparator create(IBinaryComparatorFactory[] cmpFactories, int startIndex, int numCmps) {
IBinaryComparator[] cmps = new IBinaryComparator[numCmps];
for (int i = startIndex; i < startIndex + numCmps; i++) {
diff --git a/hyracks-storage-am-lsm-btree/pom.xml b/hyracks-storage-am-lsm-btree/pom.xml
index 17f3714..afef819 100644
--- a/hyracks-storage-am-lsm-btree/pom.xml
+++ b/hyracks-storage-am-lsm-btree/pom.xml
@@ -33,6 +33,13 @@
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-storage-am-bloomfilter</artifactId>
+ <version>0.2.2-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-common</artifactId>
<version>0.2.2-SNAPSHOT</version>
<type>jar</type>
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java
index 0bed4ca..06f06c6 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java
@@ -61,7 +61,8 @@
IInMemoryFreePageManager memFreePageManager = new InMemoryFreePageManager(memNumPages, metaDataFrameFactory);
return LSMBTreeUtils.createLSMTree(memBufferCache, memFreePageManager, ctx.getIOManager(), file, opDesc
.getStorageManager().getBufferCache(ctx), opDesc.getStorageManager().getFileMapProvider(ctx),
- treeOpDesc.getTreeIndexTypeTraits(), treeOpDesc.getTreeIndexComparatorFactories(), mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider, partition);
+ treeOpDesc.getTreeIndexTypeTraits(), treeOpDesc.getTreeIndexComparatorFactories(), treeOpDesc
+ .getTreeIndexBloomFilterKeyFields(), mergePolicy, opTrackerFactory, ioScheduler,
+ ioOpCallbackProvider, partition);
}
}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java
index f451748..ebf4bc0 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java
@@ -38,7 +38,8 @@
@Override
public IndexDataflowHelper createIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
int partition) {
- return new LSMBTreeDataflowHelper(opDesc, ctx, partition, memPageSize, memNumPages, mergePolicyProvider.getMergePolicy(ctx),
- opTrackerFactory, ioSchedulerProvider.getIOScheduler(ctx), ioOpCallbackProvider);
+ return new LSMBTreeDataflowHelper(opDesc, ctx, partition, memPageSize, memNumPages,
+ mergePolicyProvider.getMergePolicy(ctx), opTrackerFactory, ioSchedulerProvider.getIOScheduler(ctx),
+ ioOpCallbackProvider);
}
}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
index 4cd4974..677b467 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
@@ -23,8 +23,14 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomCalculations;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeBulkLoader;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeRangeSearchCursor;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
@@ -64,7 +70,6 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractLSMIndex;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BlockingIOOperationCallbackWrapper;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMFlushOperation;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
@@ -90,9 +95,10 @@
ITreeIndexFrameFactory interiorFrameFactory, ITreeIndexFrameFactory insertLeafFrameFactory,
ITreeIndexFrameFactory deleteLeafFrameFactory, ILSMIndexFileManager fileManager,
TreeIndexFactory<BTree> diskBTreeFactory, TreeIndexFactory<BTree> bulkLoadBTreeFactory,
- IFileMapProvider diskFileMapProvider, int fieldCount, IBinaryComparatorFactory[] cmpFactories,
- ILSMMergePolicy mergePolicy, ILSMOperationTrackerFactory opTrackerFactory,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
+ BloomFilterFactory bloomFilterFactory, IFileMapProvider diskFileMapProvider, int fieldCount,
+ IBinaryComparatorFactory[] cmpFactories, ILSMMergePolicy mergePolicy,
+ ILSMOperationTrackerFactory opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
+ ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
super(memFreePageManager, diskBTreeFactory.getBufferCache(), fileManager, diskFileMapProvider, mergePolicy,
opTrackerFactory, ioScheduler, ioOpCallbackProvider);
mutableComponent = new LSMBTreeMutableComponent(new BTree(memBufferCache,
@@ -102,8 +108,8 @@
this.insertLeafFrameFactory = insertLeafFrameFactory;
this.deleteLeafFrameFactory = deleteLeafFrameFactory;
this.cmpFactories = cmpFactories;
- componentFactory = new LSMBTreeImmutableComponentFactory(diskBTreeFactory);
- bulkLoadComponentFactory = new LSMBTreeImmutableComponentFactory(bulkLoadBTreeFactory);
+ componentFactory = new LSMBTreeImmutableComponentFactory(diskBTreeFactory, bloomFilterFactory);
+ bulkLoadComponentFactory = new LSMBTreeImmutableComponentFactory(bulkLoadBTreeFactory, bloomFilterFactory);
}
@Override
@@ -135,14 +141,15 @@
throw new HyracksDataException(e);
}
for (LSMComponentFileReferences lsmComonentFileReference : validFileReferences) {
- LSMBTreeImmutableComponent btree;
+ LSMBTreeImmutableComponent component;
try {
- btree = createDiskComponent(componentFactory, lsmComonentFileReference.getInsertIndexFileReference(),
- false);
+ component = createDiskComponent(componentFactory,
+ lsmComonentFileReference.getInsertIndexFileReference(),
+ lsmComonentFileReference.getBloomFilterFileReference(), false);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
- immutableComponents.add(btree);
+ immutableComponents.add(component);
}
isActivated = true;
}
@@ -167,8 +174,11 @@
List<ILSMComponent> immutableComponents = componentsRef.get();
for (ILSMComponent c : immutableComponents) {
- BTree btree = (BTree) ((LSMBTreeImmutableComponent) c).getBTree();
+ LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) c;
+ BTree btree = component.getBTree();
+ BloomFilter bloomFilter = component.getBloomFilter();
btree.deactivate();
+ bloomFilter.deactivate();
}
mutableComponent.getBTree().deactivate();
mutableComponent.getBTree().destroy();
@@ -189,8 +199,9 @@
List<ILSMComponent> immutableComponents = componentsRef.get();
for (ILSMComponent c : immutableComponents) {
- BTree btree = (BTree) ((LSMBTreeImmutableComponent) c).getBTree();
- btree.destroy();
+ LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) c;
+ component.getBTree().destroy();
+ component.getBloomFilter().destroy();
}
mutableComponent.getBTree().destroy();
fileManager.deleteDirs();
@@ -205,9 +216,11 @@
List<ILSMComponent> immutableComponents = componentsRef.get();
mutableComponent.getBTree().clear();
for (ILSMComponent c : immutableComponents) {
- BTree btree = (BTree) ((LSMBTreeImmutableComponent) c).getBTree();
- btree.deactivate();
- btree.destroy();
+ LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) c;
+ component.getBloomFilter().deactivate();
+ component.getBTree().deactivate();
+ component.getBloomFilter().destroy();
+ component.getBTree().destroy();
}
immutableComponents.clear();
}
@@ -303,10 +316,25 @@
int numBTrees = operationalComponents.size();
assert numBTrees > 0;
+ boolean isPointSearch = false;
+ RangePredicate btreePred = (RangePredicate) pred;
+ if (btreePred.getLowKey() != null && btreePred.getHighKey() != null) {
+ if (btreePred.isLowKeyInclusive() && btreePred.isHighKeyInclusive()) {
+ if (btreePred.getLowKeyComparator().getKeyFieldCount() == btreePred.getHighKeyComparator()
+ .getKeyFieldCount()) {
+ if (btreePred.getLowKeyComparator().getKeyFieldCount() == componentFactory
+ .getBloomFilterKeyFields().length) {
+ if (ctx.bloomFilterCmps.compare(btreePred.getLowKey(), btreePred.getHighKey()) == 0) {
+ isPointSearch = true;
+ }
+ }
+ }
+ }
+ }
boolean includeMutableComponent = operationalComponents.get(0) == mutableComponent;
LSMBTreeCursorInitialState initialState = new LSMBTreeCursorInitialState(numBTrees, insertLeafFrameFactory,
- ctx.cmp, includeMutableComponent, lsmHarness, ctx.memBTreeAccessor, pred, ctx.searchCallback,
- operationalComponents);
+ ctx.cmp, includeMutableComponent, isPointSearch, lsmHarness, ctx.memBTreeAccessor, pred,
+ ctx.searchCallback, operationalComponents);
lsmTreeCursor.open(initialState, pred);
int cursorIx;
@@ -346,28 +374,53 @@
opCtx.setOperation(IndexOperation.FLUSH);
opCtx.getComponentHolder().add(flushingComponent);
ILSMIndexAccessorInternal flushAccessor = new LSMBTreeAccessor(lsmHarness, opCtx);
- ioScheduler.scheduleOperation(new LSMFlushOperation(flushAccessor, flushingComponent, componentFileRefs
- .getInsertIndexFileReference(), callback));
+ ioScheduler.scheduleOperation(new LSMBTreeFlushOperation(flushAccessor, flushingComponent, componentFileRefs
+ .getInsertIndexFileReference(), componentFileRefs.getBloomFilterFileReference(), callback));
}
@Override
public ILSMComponent flush(ILSMIOOperation operation) throws HyracksDataException, IndexException {
- LSMFlushOperation flushOp = (LSMFlushOperation) operation;
+ LSMBTreeFlushOperation flushOp = (LSMBTreeFlushOperation) operation;
LSMBTreeMutableComponent flushingComponent = (LSMBTreeMutableComponent) flushOp.getFlushingComponent();
IIndexAccessor accessor = flushingComponent.getBTree().createAccessor(NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
- IIndexCursor scanCursor = accessor.createSearchCursor();
+
RangePredicate nullPred = new RangePredicate(null, null, true, true, null, null);
+ IIndexCursor countingCursor = ((BTreeAccessor) accessor).createCountingSearchCursor();
+ accessor.search(countingCursor, nullPred);
+ long numElements = 0L;
+ try {
+ while (countingCursor.hasNext()) {
+ countingCursor.next();
+ ITupleReference countTuple = countingCursor.getTuple();
+ numElements = IntegerSerializerDeserializer.getInt(countTuple.getFieldData(0),
+ countTuple.getFieldStart(0));
+ }
+ } finally {
+ countingCursor.close();
+ }
+
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE);
+
+ LSMBTreeImmutableComponent component = createDiskComponent(componentFactory, flushOp.getBTreeFlushTarget(),
+ flushOp.getBloomFilterFlushTarget(), true);
+ IIndexBulkLoader bulkLoader = component.getBTree().createBulkLoader(1.0f, false, numElements);
+ IIndexBulkLoader builder = component.getBloomFilter().createBuilder(numElements,
+ bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
+
+ IIndexCursor scanCursor = accessor.createSearchCursor();
accessor.search(scanCursor, nullPred);
- LSMBTreeImmutableComponent component = createDiskComponent(componentFactory, flushOp.getFlushTarget(), true);
- IIndexBulkLoader bulkLoader = component.getBTree().createBulkLoader(1.0f, false);
try {
while (scanCursor.hasNext()) {
scanCursor.next();
+ builder.add(scanCursor.getTuple());
bulkLoader.add(scanCursor.getTuple());
}
} finally {
scanCursor.close();
+ builder.end();
}
bulkLoader.end();
return component;
@@ -392,7 +445,7 @@
.getName(), lastFile.getFile().getName());
ILSMIndexAccessorInternal accessor = new LSMBTreeAccessor(lsmHarness, opCtx);
ioScheduler.scheduleOperation(new LSMBTreeMergeOperation(accessor, mergingComponents, cursor, relMergeFileRefs
- .getInsertIndexFileReference(), callback));
+ .getInsertIndexFileReference(), relMergeFileRefs.getBloomFilterFileReference(), callback));
}
@Override
@@ -401,56 +454,90 @@
LSMBTreeMergeOperation mergeOp = (LSMBTreeMergeOperation) operation;
ITreeIndexCursor cursor = mergeOp.getCursor();
mergedComponents.addAll(mergeOp.getMergingComponents());
- LSMBTreeImmutableComponent mergedBTree = createDiskComponent(componentFactory, mergeOp.getMergeTarget(), true);
- IIndexBulkLoader bulkLoader = mergedBTree.getBTree().createBulkLoader(1.0f, false);
+
+ long numElements = 0L;
+ for (int i = 0; i < mergedComponents.size(); ++i) {
+ numElements += ((LSMBTreeImmutableComponent) mergedComponents.get(i)).getBloomFilter().getNumElements();
+ }
+
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE);
+ LSMBTreeImmutableComponent mergedComponent = createDiskComponent(componentFactory,
+ mergeOp.getBTreeMergeTarget(), mergeOp.getBloomFilterMergeTarget(), true);
+
+ IIndexBulkLoader bulkLoader = mergedComponent.getBTree().createBulkLoader(1.0f, false, numElements);
+ IIndexBulkLoader builder = mergedComponent.getBloomFilter().createBuilder(numElements,
+ bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
try {
while (cursor.hasNext()) {
cursor.next();
ITupleReference frameTuple = cursor.getTuple();
+ builder.add(frameTuple);
bulkLoader.add(frameTuple);
}
} finally {
cursor.close();
+ builder.end();
}
bulkLoader.end();
- return mergedBTree;
+ return mergedComponent;
}
private LSMBTreeImmutableComponent createDiskComponent(LSMBTreeImmutableComponentFactory factory,
- FileReference fileRef, boolean createComponent) throws HyracksDataException, IndexException {
+ FileReference btreeFileRef, FileReference bloomFilterFileRef, boolean createComponent)
+ throws HyracksDataException, IndexException {
// Create new BTree instance.
LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) factory
- .createLSMComponentInstance(new LSMComponentFileReferences(fileRef, null));
+ .createLSMComponentInstance(new LSMComponentFileReferences(btreeFileRef, null, bloomFilterFileRef));
if (createComponent) {
component.getBTree().create();
+ component.getBloomFilter().create();
}
// BTree will be closed during cleanup of merge().
component.getBTree().activate();
+ component.getBloomFilter().activate();
return component;
}
@Override
- public IIndexBulkLoader createBulkLoader(float fillLevel, boolean verifyInput) throws TreeIndexException {
- return new LSMBTreeBulkLoader(fillLevel, verifyInput);
+ public IIndexBulkLoader createBulkLoader(float fillLevel, boolean verifyInput, long numElementsHint)
+ throws TreeIndexException {
+ try {
+ return new LSMBTreeBulkLoader(fillLevel, verifyInput, numElementsHint);
+ } catch (HyracksDataException e) {
+ throw new TreeIndexException(e);
+ }
}
private ILSMComponent createBulkLoadTarget() throws HyracksDataException, IndexException {
LSMComponentFileReferences componentFileRefs = fileManager.getRelFlushFileReference();
- return createDiskComponent(bulkLoadComponentFactory, componentFileRefs.getInsertIndexFileReference(), true);
+ return createDiskComponent(bulkLoadComponentFactory, componentFileRefs.getInsertIndexFileReference(),
+ componentFileRefs.getBloomFilterFileReference(), true);
}
@Override
public void markAsValid(ILSMComponent lsmComponent) throws HyracksDataException {
- BTree btree = ((LSMBTreeImmutableComponent) lsmComponent).getBTree();
- forceFlushDirtyPages(btree);
- markAsValidInternal(btree);
+ // The order of forcing the dirty page to be flushed is critical. The bloom filter must be always done first.
+ LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) lsmComponent;
+ // Flush the bloom filter first.
+ int fileId = component.getBloomFilter().getFileId();
+ IBufferCache bufferCache = component.getBTree().getBufferCache();
+ int startPage = 0;
+ int maxPage = component.getBloomFilter().getNumPages();
+ forceFlushDirtyPages(bufferCache, fileId, startPage, maxPage);
+ forceFlushDirtyPages(component.getBTree());
+ markAsValidInternal(component.getBTree());
}
public class LSMBTreeBulkLoader implements IIndexBulkLoader {
private final ILSMComponent component;
private final BTreeBulkLoader bulkLoader;
+ private final IIndexBulkLoader builder;
+ private boolean endHasBeenCalled = false;
- public LSMBTreeBulkLoader(float fillFactor, boolean verifyInput) throws TreeIndexException {
+ public LSMBTreeBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
+ throws TreeIndexException, HyracksDataException {
try {
component = createBulkLoadTarget();
} catch (HyracksDataException e) {
@@ -459,13 +546,20 @@
throw new TreeIndexException(e);
}
bulkLoader = (BTreeBulkLoader) ((LSMBTreeImmutableComponent) component).getBTree().createBulkLoader(
- fillFactor, verifyInput);
+ fillFactor, verifyInput, numElementsHint);
+
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElementsHint);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE);
+ builder = ((LSMBTreeImmutableComponent) component).getBloomFilter().createBuilder(numElementsHint,
+ bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
}
@Override
public void add(ITupleReference tuple) throws IndexException, HyracksDataException {
try {
bulkLoader.add(tuple);
+ builder.add(tuple);
} catch (IndexException e) {
handleException();
throw e;
@@ -478,14 +572,21 @@
}
}
- protected void handleException() throws HyracksDataException {
+ protected void handleException() throws HyracksDataException, IndexException {
+ if (!endHasBeenCalled) {
+ builder.end();
+ }
((LSMBTreeImmutableComponent) component).getBTree().deactivate();
((LSMBTreeImmutableComponent) component).getBTree().destroy();
+ ((LSMBTreeImmutableComponent) component).getBloomFilter().deactivate();
+ ((LSMBTreeImmutableComponent) component).getBloomFilter().destroy();
}
@Override
public void end() throws HyracksDataException, IndexException {
bulkLoader.end();
+ builder.end();
+ endHasBeenCalled = true;
lsmHarness.addBulkLoadedComponent(component);
}
@@ -494,7 +595,7 @@
public LSMBTreeOpContext createOpContext(IModificationOperationCallback modificationCallback,
ISearchOperationCallback searchCallback) {
return new LSMBTreeOpContext(mutableComponent.getBTree(), insertLeafFrameFactory, deleteLeafFrameFactory,
- modificationCallback, searchCallback);
+ modificationCallback, searchCallback, componentFactory.getBloomFilterKeyFields().length);
}
@Override
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeCursorInitialState.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeCursorInitialState.java
index 84f1c64..d59e0d7 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeCursorInitialState.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeCursorInitialState.java
@@ -33,6 +33,7 @@
private final ITreeIndexFrameFactory leafFrameFactory;
private MultiComparator cmp;
private final boolean includeMemComponent;
+ private final boolean pointSearch;
private final ILSMHarness lsmHarness;
private final IIndexAccessor memBtreeAccessor;
@@ -42,7 +43,7 @@
private final List<ILSMComponent> operationalComponents;
public LSMBTreeCursorInitialState(int numBTrees, ITreeIndexFrameFactory leafFrameFactory, MultiComparator cmp,
- boolean includeMemComponent, ILSMHarness lsmHarness, IIndexAccessor memBtreeAccessor,
+ boolean includeMemComponent, boolean pointSearch, ILSMHarness lsmHarness, IIndexAccessor memBtreeAccessor,
ISearchPredicate predicate, ISearchOperationCallback searchCallback,
List<ILSMComponent> operationalComponents) {
this.numBTrees = numBTrees;
@@ -54,6 +55,7 @@
this.memBtreeAccessor = memBtreeAccessor;
this.predicate = predicate;
this.operationalComponents = operationalComponents;
+ this.pointSearch = pointSearch;
}
public int getNumBTrees() {
@@ -77,6 +79,10 @@
return includeMemComponent;
}
+ public boolean isPointSearch() {
+ return pointSearch;
+ }
+
public ILSMHarness getLSMHarness() {
return lsmHarness;
}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java
new file mode 100644
index 0000000..38766c3
--- /dev/null
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.btree.impls;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.api.io.IIOManager;
+import edu.uci.ics.hyracks.api.io.IODeviceHandle;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractLSMIndexFileManager;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
+import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+
+public class LSMBTreeFileManager extends AbstractLSMIndexFileManager {
+ private static final String BTREE_STRING = "b";
+
+ private final TreeIndexFactory<? extends ITreeIndex> btreeFactory;
+
+ public LSMBTreeFileManager(IIOManager ioManager, IFileMapProvider fileMapProvider, FileReference file,
+ TreeIndexFactory<? extends ITreeIndex> btreeFactory, int startIODeviceIndex) {
+ super(ioManager, fileMapProvider, file, null, startIODeviceIndex);
+ this.btreeFactory = btreeFactory;
+ }
+
+ @Override
+ public LSMComponentFileReferences getRelFlushFileReference() {
+ Date date = new Date();
+ String ts = formatter.format(date);
+ String baseName = baseDir + ts + SPLIT_STRING + ts;
+ // Begin timestamp and end timestamp are identical since it is a flush
+ return new LSMComponentFileReferences(createFlushFile(baseName + SPLIT_STRING + BTREE_STRING), null,
+ createFlushFile(baseName + SPLIT_STRING + BLOOM_FILTER_STRING));
+ }
+
+ @Override
+ public LSMComponentFileReferences getRelMergeFileReference(String firstFileName, String lastFileName)
+ throws HyracksDataException {
+ String[] firstTimestampRange = firstFileName.split(SPLIT_STRING);
+ String[] lastTimestampRange = lastFileName.split(SPLIT_STRING);
+
+ String baseName = baseDir + firstTimestampRange[0] + SPLIT_STRING + lastTimestampRange[1];
+ // Get the range of timestamps by taking the earliest and the latest timestamps
+ return new LSMComponentFileReferences(createMergeFile(baseName + SPLIT_STRING + BTREE_STRING), null,
+ createMergeFile(baseName + SPLIT_STRING + BLOOM_FILTER_STRING));
+ }
+
+ private static FilenameFilter btreeFilter = new FilenameFilter() {
+ public boolean accept(File dir, String name) {
+ return !name.startsWith(".") && name.endsWith(BTREE_STRING);
+ }
+ };
+
+ @Override
+ public List<LSMComponentFileReferences> cleanupAndGetValidFiles() throws HyracksDataException, IndexException {
+ List<LSMComponentFileReferences> validFiles = new ArrayList<LSMComponentFileReferences>();
+ ArrayList<ComparableFileName> allBTreeFiles = new ArrayList<ComparableFileName>();
+ ArrayList<ComparableFileName> allBloomFilterFiles = new ArrayList<ComparableFileName>();
+
+ // Gather files from all IODeviceHandles.
+ for (IODeviceHandle dev : ioManager.getIODevices()) {
+ cleanupAndGetValidFilesInternal(dev, bloomFilterFilter, null, allBloomFilterFiles);
+ HashSet<String> bloomFilterFilesSet = new HashSet<String>();
+ for (ComparableFileName cmpFileName : allBloomFilterFiles) {
+ int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
+ bloomFilterFilesSet.add(cmpFileName.fileName.substring(0, index));
+ }
+ // List of valid BTree files that may or may not have a bloom filter buddy. Will check for buddies below.
+ ArrayList<ComparableFileName> tmpAllBTreeFiles = new ArrayList<ComparableFileName>();
+ cleanupAndGetValidFilesInternal(dev, btreeFilter, btreeFactory, tmpAllBTreeFiles);
+ // Look for buddy bloom filters for all valid BTrees.
+ // If no buddy is found, delete the file, otherwise add the BTree to allBTreeFiles.
+ for (ComparableFileName cmpFileName : tmpAllBTreeFiles) {
+ int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
+ String file = cmpFileName.fileName.substring(0, index);
+ if (bloomFilterFilesSet.contains(file)) {
+ allBTreeFiles.add(cmpFileName);
+ } else {
+ // Couldn't find the corresponding bloom filter file; thus, delete
+ // the BTree file.
+ File invalidBTreeFile = new File(cmpFileName.fullPath);
+ invalidBTreeFile.delete();
+ }
+ }
+ }
+ // Sanity check.
+ if (allBTreeFiles.size() != allBloomFilterFiles.size()) {
+ throw new HyracksDataException(
+ "Unequal number of valid BTree and bloom filter files found. Aborting cleanup.");
+ }
+
+ // Trivial cases.
+ if (allBTreeFiles.isEmpty() || allBloomFilterFiles.isEmpty()) {
+ return validFiles;
+ }
+
+ if (allBTreeFiles.size() == 1 && allBloomFilterFiles.size() == 1) {
+ validFiles.add(new LSMComponentFileReferences(allBTreeFiles.get(0).fileRef, null, allBloomFilterFiles
+ .get(0).fileRef));
+ return validFiles;
+ }
+
+ // Sorts files names from earliest to latest timestamp.
+ Collections.sort(allBTreeFiles);
+ Collections.sort(allBloomFilterFiles);
+
+ List<ComparableFileName> validComparableBTreeFiles = new ArrayList<ComparableFileName>();
+ ComparableFileName lastBTree = allBTreeFiles.get(0);
+ validComparableBTreeFiles.add(lastBTree);
+
+ List<ComparableFileName> validComparableBloomFilterFiles = new ArrayList<ComparableFileName>();
+ ComparableFileName lastBloomFilter = allBloomFilterFiles.get(0);
+ validComparableBloomFilterFiles.add(lastBloomFilter);
+
+ for (int i = 1; i < allBTreeFiles.size(); i++) {
+ ComparableFileName currentBTree = allBTreeFiles.get(i);
+ ComparableFileName currentBloomFilter = allBloomFilterFiles.get(i);
+ // Current start timestamp is greater than last stop timestamp.
+ if (currentBTree.interval[0].compareTo(lastBTree.interval[1]) > 0
+ && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[1]) > 0) {
+ validComparableBTreeFiles.add(currentBTree);
+ validComparableBloomFilterFiles.add(currentBloomFilter);
+ lastBTree = currentBTree;
+ lastBloomFilter = currentBloomFilter;
+ } else if (currentBTree.interval[0].compareTo(lastBTree.interval[0]) >= 0
+ && currentBTree.interval[1].compareTo(lastBTree.interval[1]) <= 0
+ && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[0]) >= 0
+ && currentBloomFilter.interval[1].compareTo(lastBloomFilter.interval[1]) <= 0) {
+ // Invalid files are completely contained in last interval.
+ File invalidBTreeFile = new File(currentBTree.fullPath);
+ invalidBTreeFile.delete();
+ File invalidBloomFilterFile = new File(currentBloomFilter.fullPath);
+ invalidBloomFilterFile.delete();
+ } else {
+ // This scenario should not be possible.
+ throw new HyracksDataException("Found LSM files with overlapping but not contained timetamp intervals.");
+ }
+ }
+
+ // Sort valid files in reverse lexicographical order, such that newer
+ // files come first.
+ Collections.sort(validComparableBTreeFiles, recencyCmp);
+ Collections.sort(validComparableBloomFilterFiles, recencyCmp);
+
+ Iterator<ComparableFileName> btreeFileIter = validComparableBTreeFiles.iterator();
+ Iterator<ComparableFileName> bloomFilterFileIter = validComparableBloomFilterFiles.iterator();
+ while (btreeFileIter.hasNext() && bloomFilterFileIter.hasNext()) {
+ ComparableFileName cmpBTreeFileName = btreeFileIter.next();
+ ComparableFileName cmpBloomFilterFileName = bloomFilterFileIter.next();
+ validFiles.add(new LSMComponentFileReferences(cmpBTreeFileName.fileRef, null,
+ cmpBloomFilterFileName.fileRef));
+ }
+
+ return validFiles;
+ }
+}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java
new file mode 100644
index 0000000..dfda07b
--- /dev/null
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java
@@ -0,0 +1,71 @@
+package edu.uci.ics.hyracks.storage.am.lsm.btree.impls;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.api.io.IODeviceHandle;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperation;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessorInternal;
+
+public class LSMBTreeFlushOperation implements ILSMIOOperation {
+
+ private final ILSMIndexAccessorInternal accessor;
+ private final ILSMComponent flushingComponent;
+ private final FileReference btreeFlushTarget;
+ private final FileReference bloomFilterFlushTarget;
+ private final ILSMIOOperationCallback callback;
+
+ public LSMBTreeFlushOperation(ILSMIndexAccessorInternal accessor, ILSMComponent flushingComponent,
+ FileReference btreeFlushTarget, FileReference bloomFilterFlushTarget, ILSMIOOperationCallback callback) {
+ this.accessor = accessor;
+ this.flushingComponent = flushingComponent;
+ this.btreeFlushTarget = btreeFlushTarget;
+ this.bloomFilterFlushTarget = bloomFilterFlushTarget;
+ this.callback = callback;
+ }
+
+ @Override
+ public Set<IODeviceHandle> getReadDevices() {
+ return Collections.emptySet();
+ }
+
+ @Override
+ public Set<IODeviceHandle> getWriteDevices() {
+ Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>();
+ devs.add(btreeFlushTarget.getDeviceHandle());
+ devs.add(bloomFilterFlushTarget.getDeviceHandle());
+ return devs;
+ }
+
+ @Override
+ public void perform() throws HyracksDataException, IndexException {
+ accessor.flush(this);
+ }
+
+ @Override
+ public ILSMIOOperationCallback getCallback() {
+ return callback;
+ }
+
+ public FileReference getBTreeFlushTarget() {
+ return btreeFlushTarget;
+ }
+
+ public FileReference getBloomFilterFlushTarget() {
+ return bloomFilterFlushTarget;
+ }
+
+ public ILSMIndexAccessorInternal getAccessor() {
+ return accessor;
+ }
+
+ public ILSMComponent getFlushingComponent() {
+ return flushingComponent;
+ }
+}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponent.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponent.java
index 2251a49..daa86d9 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponent.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponent.java
@@ -1,24 +1,32 @@
package edu.uci.ics.hyracks.storage.am.lsm.btree.impls;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractImmutableLSMComponent;
public class LSMBTreeImmutableComponent extends AbstractImmutableLSMComponent {
-
private final BTree btree;
+ private final BloomFilter bloomFilter;
- public LSMBTreeImmutableComponent(BTree btree) {
+ public LSMBTreeImmutableComponent(BTree btree, BloomFilter bloomFilter) {
this.btree = btree;
+ this.bloomFilter = bloomFilter;
}
@Override
public void destroy() throws HyracksDataException {
btree.deactivate();
btree.destroy();
+ bloomFilter.deactivate();
+ bloomFilter.destroy();
}
public BTree getBTree() {
return btree;
}
+
+ public BloomFilter getBloomFilter() {
+ return bloomFilter;
+ }
}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponentFactory.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponentFactory.java
index 696fc2c..e9da5a5 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponentFactory.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponentFactory.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.btree.impls;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
@@ -25,18 +27,26 @@
public class LSMBTreeImmutableComponentFactory implements ILSMComponentFactory {
private final TreeIndexFactory<BTree> btreeFactory;
+ private final BloomFilterFactory bloomFilterFactory;
- public LSMBTreeImmutableComponentFactory(TreeIndexFactory<BTree> btreeFactory) {
+ public LSMBTreeImmutableComponentFactory(TreeIndexFactory<BTree> btreeFactory, BloomFilterFactory bloomFilterFactory) {
this.btreeFactory = btreeFactory;
+ this.bloomFilterFactory = bloomFilterFactory;
}
@Override
- public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException {
- return new LSMBTreeImmutableComponent(btreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()));
+ public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException,
+ HyracksDataException {
+ return new LSMBTreeImmutableComponent(btreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()),
+ bloomFilterFactory.createBloomFiltertInstance(cfr.getBloomFilterFileReference()));
}
@Override
public IBufferCache getBufferCache() {
return btreeFactory.getBufferCache();
}
+
+ public int[] getBloomFilterKeyFields() {
+ return bloomFilterFactory.getBloomFilterKeyFields();
+ }
}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java
index a3a7097..180fb9a 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java
@@ -15,7 +15,6 @@
package edu.uci.ics.hyracks.storage.am.lsm.btree.impls;
-import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -35,15 +34,18 @@
private final ILSMIndexAccessorInternal accessor;
private final List<ILSMComponent> mergingComponents;
private final ITreeIndexCursor cursor;
- private final FileReference mergeTarget;
+ private final FileReference btreeMergeTarget;
+ private final FileReference bloomFilterMergeTarget;
private final ILSMIOOperationCallback callback;
public LSMBTreeMergeOperation(ILSMIndexAccessorInternal accessor, List<ILSMComponent> mergingComponents,
- ITreeIndexCursor cursor, FileReference mergeTarget, ILSMIOOperationCallback callback) {
+ ITreeIndexCursor cursor, FileReference btreeMergeTarget, FileReference bloomFilterMergeTarget,
+ ILSMIOOperationCallback callback) {
this.accessor = accessor;
this.mergingComponents = mergingComponents;
this.cursor = cursor;
- this.mergeTarget = mergeTarget;
+ this.btreeMergeTarget = btreeMergeTarget;
+ this.bloomFilterMergeTarget = bloomFilterMergeTarget;
this.callback = callback;
}
@@ -53,13 +55,17 @@
for (ILSMComponent o : mergingComponents) {
LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) o;
devs.add(component.getBTree().getFileReference().getDeviceHandle());
+ devs.add(component.getBloomFilter().getFileReference().getDeviceHandle());
}
return devs;
}
@Override
public Set<IODeviceHandle> getWriteDevices() {
- return Collections.singleton(mergeTarget.getDeviceHandle());
+ Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>();
+ devs.add(btreeMergeTarget.getDeviceHandle());
+ devs.add(bloomFilterMergeTarget.getDeviceHandle());
+ return devs;
}
@Override
@@ -72,8 +78,12 @@
return callback;
}
- public FileReference getMergeTarget() {
- return mergeTarget;
+ public FileReference getBTreeMergeTarget() {
+ return btreeMergeTarget;
+ }
+
+ public FileReference getBloomFilterMergeTarget() {
+ return bloomFilterMergeTarget;
}
public ITreeIndexCursor getCursor() {
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java
index a9c39f6..8edd9b9 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java
@@ -42,19 +42,24 @@
public BTreeOpContext memBTreeOpCtx;
public IndexOperation op;
public final MultiComparator cmp;
+ public final MultiComparator bloomFilterCmps;
public final IModificationOperationCallback modificationCallback;
public final ISearchOperationCallback searchCallback;
private final List<ILSMComponent> componentHolder;
public LSMBTreeOpContext(BTree memBTree, ITreeIndexFrameFactory insertLeafFrameFactory,
ITreeIndexFrameFactory deleteLeafFrameFactory, IModificationOperationCallback modificationCallback,
- ISearchOperationCallback searchCallback) {
+ ISearchOperationCallback searchCallback, int numBloomFilterKeyFields) {
IBinaryComparatorFactory cmpFactories[] = memBTree.getComparatorFactories();
if (cmpFactories[0] != null) {
this.cmp = MultiComparator.createIgnoreFieldLength(memBTree.getComparatorFactories());
} else {
this.cmp = null;
}
+
+ bloomFilterCmps = MultiComparator.createIgnoreFieldLength(memBTree.getComparatorFactories(), 0,
+ numBloomFilterKeyFields);
+
this.memBTree = memBTree;
this.insertLeafFrameFactory = insertLeafFrameFactory;
this.deleteLeafFrameFactory = deleteLeafFrameFactory;
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java
index a96ac81..e1ad93a 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java
@@ -30,6 +30,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexSearchCursor;
public class LSMBTreeRangeSearchCursor extends LSMIndexSearchCursor {
@@ -39,10 +40,10 @@
private ISearchOperationCallback searchCallback;
private RangePredicate predicate;
private IIndexAccessor memBTreeAccessor;
- private ArrayTupleBuilder tupleBuilder;
-
+ private ArrayTupleBuilder tupleBuilder;
+
public LSMBTreeRangeSearchCursor(ILSMIndexOperationContext opCtx) {
- super(opCtx);
+ super(opCtx);
this.copyTuple = new ArrayTupleReference();
this.reusablePred = new RangePredicate(null, null, true, true, null, null);
}
@@ -130,12 +131,6 @@
public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
LSMBTreeCursorInitialState lsmInitialState = (LSMBTreeCursorInitialState) initialState;
cmp = lsmInitialState.getOriginalKeyComparator();
- int numBTrees = lsmInitialState.getNumBTrees();
- rangeCursors = new BTreeRangeSearchCursor[numBTrees];
- for (int i = 0; i < numBTrees; i++) {
- IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) lsmInitialState.getLeafFrameFactory().createFrame();
- rangeCursors[i] = new BTreeRangeSearchCursor(leafFrame, false);
- }
includeMemComponent = lsmInitialState.getIncludeMemComponent();
operationalComponents = lsmInitialState.getOperationalComponents();
lsmHarness = lsmInitialState.getLSMHarness();
@@ -145,6 +140,29 @@
reusablePred.setLowKeyComparator(cmp);
reusablePred.setHighKey(predicate.getHighKey(), predicate.isHighKeyInclusive());
reusablePred.setHighKeyComparator(predicate.getHighKeyComparator());
+
+ int numBTrees = lsmInitialState.getNumBTrees();
+ rangeCursors = new BTreeRangeSearchCursor[numBTrees];
+ if (lsmInitialState.isPointSearch()) {
+ int i = 0;
+ if (includeMemComponent) {
+ // No need for a bloom filter for the in-memory BTree.
+ IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) lsmInitialState.getLeafFrameFactory().createFrame();
+ rangeCursors[i] = new BTreeRangeSearchCursor(leafFrame, false);
+ ++i;
+ }
+ for (; i < numBTrees; i++) {
+ IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) lsmInitialState.getLeafFrameFactory().createFrame();
+ rangeCursors[i] = new BloomFilterAwareBTreePointSearchCursor(leafFrame, false,
+ ((LSMBTreeImmutableComponent) operationalComponents.get(i)).getBloomFilter());
+ }
+ } else {
+ for (int i = 0; i < numBTrees; i++) {
+ IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) lsmInitialState.getLeafFrameFactory().createFrame();
+ rangeCursors[i] = new BTreeRangeSearchCursor(leafFrame, false);
+ }
+ }
+
setPriorityQueueComparator();
}
}
\ No newline at end of file
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java
index 3706230..ac20b6d 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java
@@ -19,6 +19,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.io.IIOManager;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMLeafFrameFactory;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
@@ -29,6 +30,7 @@
import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManagerFactory;
import edu.uci.ics.hyracks.storage.am.lsm.btree.impls.LSMBTree;
+import edu.uci.ics.hyracks.storage.am.lsm.btree.impls.LSMBTreeFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.btree.tuples.LSMBTreeCopyTupleWriterFactory;
import edu.uci.ics.hyracks.storage.am.lsm.btree.tuples.LSMBTreeTupleWriterFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IInMemoryBufferCache;
@@ -38,7 +40,6 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BTreeFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
@@ -48,17 +49,18 @@
public static LSMBTree createLSMTree(IInMemoryBufferCache memBufferCache,
IInMemoryFreePageManager memFreePageManager, IIOManager ioManager, FileReference file,
IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits,
- IBinaryComparatorFactory[] cmpFactories, ILSMMergePolicy mergePolicy,
+ IBinaryComparatorFactory[] cmpFactories, int[] bloomFilterKeyFields, ILSMMergePolicy mergePolicy,
ILSMOperationTrackerFactory opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
return createLSMTree(memBufferCache, memFreePageManager, ioManager, file, diskBufferCache, diskFileMapProvider,
- typeTraits, cmpFactories, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider, 0);
+ typeTraits, cmpFactories, bloomFilterKeyFields, mergePolicy, opTrackerFactory, ioScheduler,
+ ioOpCallbackProvider, 0);
}
public static LSMBTree createLSMTree(IInMemoryBufferCache memBufferCache,
IInMemoryFreePageManager memFreePageManager, IIOManager ioManager, FileReference file,
IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits,
- IBinaryComparatorFactory[] cmpFactories, ILSMMergePolicy mergePolicy,
+ IBinaryComparatorFactory[] cmpFactories, int[] bloomFilterKeyFields, ILSMMergePolicy mergePolicy,
ILSMOperationTrackerFactory opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
ILSMIOOperationCallbackProvider ioOpCallbackProvider, int startIODeviceIndex) {
LSMBTreeTupleWriterFactory insertTupleWriterFactory = new LSMBTreeTupleWriterFactory(typeTraits,
@@ -80,13 +82,17 @@
typeTraits.length);
TreeIndexFactory<BTree> bulkLoadBTreeFactory = new BTreeFactory(diskBufferCache, diskFileMapProvider,
freePageManagerFactory, interiorFrameFactory, insertLeafFrameFactory, cmpFactories, typeTraits.length);
- ILSMIndexFileManager fileNameManager = new LSMIndexFileManager(ioManager, diskFileMapProvider, file,
+
+ BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider,
+ bloomFilterKeyFields);
+
+ ILSMIndexFileManager fileNameManager = new LSMBTreeFileManager(ioManager, diskFileMapProvider, file,
diskBTreeFactory, startIODeviceIndex);
LSMBTree lsmTree = new LSMBTree(memBufferCache, memFreePageManager, interiorFrameFactory,
insertLeafFrameFactory, deleteLeafFrameFactory, fileNameManager, diskBTreeFactory,
- bulkLoadBTreeFactory, diskFileMapProvider, typeTraits.length, cmpFactories, mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ bulkLoadBTreeFactory, bloomFilterFactory, diskFileMapProvider, typeTraits.length, cmpFactories,
+ mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider);
return lsmTree;
}
}
diff --git a/hyracks-storage-am-lsm-common/pom.xml b/hyracks-storage-am-lsm-common/pom.xml
index 6402d63..94ed2f4 100644
--- a/hyracks-storage-am-lsm-common/pom.xml
+++ b/hyracks-storage-am-lsm-common/pom.xml
@@ -33,6 +33,13 @@
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-storage-am-bloomfilter</artifactId>
+ <version>0.2.2-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
<version>0.2.2-SNAPSHOT</version>
<type>jar</type>
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMComponentFactory.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMComponentFactory.java
index d00b805..1f3a2b7 100644
--- a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMComponentFactory.java
+++ b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMComponentFactory.java
@@ -1,11 +1,13 @@
package edu.uci.ics.hyracks.storage.am.lsm.common.api;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
public interface ILSMComponentFactory {
- public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException;
+ public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException,
+ HyracksDataException;
public IBufferCache getBufferCache();
}
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/LSMTreeIndexInsertUpdateDeleteOperatorDescriptor.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/LSMTreeIndexInsertUpdateDeleteOperatorDescriptor.java
index d763356..6297576 100644
--- a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/LSMTreeIndexInsertUpdateDeleteOperatorDescriptor.java
+++ b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/LSMTreeIndexInsertUpdateDeleteOperatorDescriptor.java
@@ -43,11 +43,11 @@
public LSMTreeIndexInsertUpdateDeleteOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor recDesc,
IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
- IBinaryComparatorFactory[] comparatorFactories, int[] fieldPermutation, IndexOperation op,
- IIndexDataflowHelperFactory dataflowHelperFactory, ITupleFilterFactory tupleFilterFactory,
- IModificationOperationCallbackFactory modificationOpCallbackProvider) {
+ IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields, int[] fieldPermutation,
+ IndexOperation op, IIndexDataflowHelperFactory dataflowHelperFactory,
+ ITupleFilterFactory tupleFilterFactory, IModificationOperationCallbackFactory modificationOpCallbackProvider) {
super(spec, 1, 1, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, dataflowHelperFactory, tupleFilterFactory, false,
+ comparatorFactories, bloomFilterKeyFields, dataflowHelperFactory, tupleFilterFactory, false,
NoOpLocalResourceFactoryProvider.INSTANCE, NoOpOperationCallbackFactory.INSTANCE,
modificationOpCallbackProvider);
this.fieldPermutation = fieldPermutation;
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java
index 3cc7196..0c6b9ab 100644
--- a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java
+++ b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java
@@ -40,6 +40,8 @@
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
public abstract class AbstractLSMIndex implements ILSMIndexInternal {
+ protected final static double MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE = 0.1;
+
protected final ILSMHarness lsmHarness;
protected final ILSMIOOperationScheduler ioScheduler;
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java
index 3d2ebcd..a84f8c9 100644
--- a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java
+++ b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java
@@ -41,6 +41,7 @@
public abstract class AbstractLSMIndexFileManager implements ILSMIndexFileManager {
protected static final String SPLIT_STRING = "_";
+ protected static final String BLOOM_FILTER_STRING = "f";
// Use all IODevices registered in ioManager in a round-robin fashion to choose
// where to flush and merge
@@ -48,7 +49,6 @@
protected final IFileMapProvider fileMapProvider;
// baseDir should reflect dataset name and partition name.
- protected FileReference file;
protected String baseDir;
protected final Format formatter = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss-SSS");
protected final Comparator<String> cmp = new FileNameComparator();
@@ -61,7 +61,6 @@
public AbstractLSMIndexFileManager(IIOManager ioManager, IFileMapProvider fileMapProvider, FileReference file,
TreeIndexFactory<? extends ITreeIndex> treeFactory, int startIODeviceIndex) {
- this.file = file;
this.baseDir = file.getFile().getPath();
if (!baseDir.endsWith(System.getProperty("file.separator"))) {
baseDir += System.getProperty("file.separator");
@@ -100,9 +99,21 @@
}
}
- abstract protected void cleanupAndGetValidFilesInternal(IODeviceHandle dev, FilenameFilter filter,
+ protected void cleanupAndGetValidFilesInternal(IODeviceHandle dev, FilenameFilter filter,
TreeIndexFactory<? extends ITreeIndex> treeFactory, ArrayList<ComparableFileName> allFiles)
- throws HyracksDataException, IndexException;
+ throws HyracksDataException, IndexException {
+ File dir = new File(dev.getPath(), baseDir);
+ String[] files = dir.list(filter);
+ for (String fileName : files) {
+ File file = new File(dir.getPath() + File.separator + fileName);
+ FileReference fileRef = new FileReference(file);
+ if (treeFactory == null || isValidTreeIndex(treeFactory.createIndexInstance(fileRef))) {
+ allFiles.add(new ComparableFileName(fileRef));
+ } else {
+ file.delete();
+ }
+ }
+ }
@Override
public void createDirs() {
@@ -129,6 +140,12 @@
f.delete();
}
+ protected static FilenameFilter bloomFilterFilter = new FilenameFilter() {
+ public boolean accept(File dir, String name) {
+ return !name.startsWith(".") && name.endsWith(BLOOM_FILTER_STRING);
+ }
+ };
+
protected FileReference createFlushFile(String relFlushFileName) {
// Assigns new files to I/O devices in round-robin fashion.
IODeviceHandle dev = ioManager.getIODevices().get(ioDeviceIndex);
@@ -145,7 +162,7 @@
Date date = new Date();
String ts = formatter.format(date);
// Begin timestamp and end timestamp are identical since it is a flush
- return new LSMComponentFileReferences(createFlushFile(baseDir + ts + SPLIT_STRING + ts), null);
+ return new LSMComponentFileReferences(createFlushFile(baseDir + ts + SPLIT_STRING + ts), null, null);
}
@Override
@@ -155,7 +172,7 @@
String[] lastTimestampRange = lastFileName.split(SPLIT_STRING);
// Get the range of timestamps by taking the earliest and the latest timestamps
return new LSMComponentFileReferences(createMergeFile(baseDir + firstTimestampRange[0] + SPLIT_STRING
- + lastTimestampRange[1]), null);
+ + lastTimestampRange[1]), null, null);
}
@Override
@@ -177,7 +194,7 @@
}
if (allFiles.size() == 1) {
- validFiles.add(new LSMComponentFileReferences(allFiles.get(0).fileRef, null));
+ validFiles.add(new LSMComponentFileReferences(allFiles.get(0).fileRef, null, null));
return validFiles;
}
@@ -209,7 +226,7 @@
// Sort valid files in reverse lexicographical order, such that newer files come first.
Collections.sort(validComparableFiles, recencyCmp);
for (ComparableFileName cmpFileName : validComparableFiles) {
- validFiles.add(new LSMComponentFileReferences(cmpFileName.fileRef, null));
+ validFiles.add(new LSMComponentFileReferences(cmpFileName.fileRef, null, null));
}
return validFiles;
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/BloomFilterAwareBTreePointSearchCursor.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/BloomFilterAwareBTreePointSearchCursor.java
new file mode 100644
index 0000000..af08bdb
--- /dev/null
+++ b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/BloomFilterAwareBTreePointSearchCursor.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.common.impls;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeRangeSearchCursor;
+
+public class BloomFilterAwareBTreePointSearchCursor extends BTreeRangeSearchCursor {
+ private final BloomFilter bloomFilter;
+ private long[] hashes = new long[2];
+
+ public BloomFilterAwareBTreePointSearchCursor(IBTreeLeafFrame frame, boolean exclusiveLatchNodes,
+ BloomFilter bloomFilter) {
+ super(frame, exclusiveLatchNodes);
+ this.bloomFilter = bloomFilter;
+ }
+
+ @Override
+ public boolean hasNext() throws HyracksDataException {
+ if (bloomFilter.contains(lowKey, hashes)) {
+ return super.hasNext();
+ }
+ return false;
+ }
+}
\ No newline at end of file
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMComponentFileReferences.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMComponentFileReferences.java
index ac6ddcf..019dca4 100644
--- a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMComponentFileReferences.java
+++ b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMComponentFileReferences.java
@@ -24,9 +24,14 @@
// This FileReference for the delete index (if any). For example, this will be the the FileReference of the buddy BTree in one component of the LSM-RTree.
private final FileReference deleteIndexFileReference;
- public LSMComponentFileReferences(FileReference insertIndexFileReference, FileReference deleteIndexFileReference) {
+ // This FileReference for the bloom filter (if any).
+ private final FileReference bloomFilterFileReference;
+
+ public LSMComponentFileReferences(FileReference insertIndexFileReference, FileReference deleteIndexFileReference,
+ FileReference bloomFilterFileReference) {
this.insertIndexFileReference = insertIndexFileReference;
this.deleteIndexFileReference = deleteIndexFileReference;
+ this.bloomFilterFileReference = bloomFilterFileReference;
}
public FileReference getInsertIndexFileReference() {
@@ -36,4 +41,8 @@
public FileReference getDeleteIndexFileReference() {
return deleteIndexFileReference;
}
+
+ public FileReference getBloomFilterFileReference() {
+ return bloomFilterFileReference;
+ }
}
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMFlushOperation.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMFlushOperation.java
deleted file mode 100644
index 6ce1f08..0000000
--- a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMFlushOperation.java
+++ /dev/null
@@ -1,61 +0,0 @@
-package edu.uci.ics.hyracks.storage.am.lsm.common.impls;
-
-import java.util.Collections;
-import java.util.Set;
-
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.io.FileReference;
-import edu.uci.ics.hyracks.api.io.IODeviceHandle;
-import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperation;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessorInternal;
-
-public class LSMFlushOperation implements ILSMIOOperation {
-
- private final ILSMIndexAccessorInternal accessor;
- private final ILSMComponent flushingComponent;
- private final FileReference flushTarget;
- private final ILSMIOOperationCallback callback;
-
- public LSMFlushOperation(ILSMIndexAccessorInternal accessor, ILSMComponent flushingComponent,
- FileReference flushTarget, ILSMIOOperationCallback callback) {
- this.accessor = accessor;
- this.flushingComponent = flushingComponent;
- this.flushTarget = flushTarget;
- this.callback = callback;
- }
-
- @Override
- public Set<IODeviceHandle> getReadDevices() {
- return Collections.emptySet();
- }
-
- @Override
- public Set<IODeviceHandle> getWriteDevices() {
- return Collections.singleton(flushTarget.getDeviceHandle());
- }
-
- @Override
- public void perform() throws HyracksDataException, IndexException {
- accessor.flush(this);
- }
-
- @Override
- public ILSMIOOperationCallback getCallback() {
- return callback;
- }
-
- public FileReference getFlushTarget() {
- return flushTarget;
- }
-
- public ILSMIndexAccessorInternal getAccessor() {
- return accessor;
- }
-
- public ILSMComponent getFlushingComponent() {
- return flushingComponent;
- }
-}
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMIndexFileManager.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMIndexFileManager.java
deleted file mode 100644
index a7a16ac..0000000
--- a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMIndexFileManager.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.hyracks.storage.am.lsm.common.impls;
-
-import java.io.File;
-import java.io.FilenameFilter;
-import java.util.ArrayList;
-
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.io.FileReference;
-import edu.uci.ics.hyracks.api.io.IIOManager;
-import edu.uci.ics.hyracks.api.io.IODeviceHandle;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
-import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
-import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
-
-public class LSMIndexFileManager extends AbstractLSMIndexFileManager {
-
- public LSMIndexFileManager(IIOManager ioManager, IFileMapProvider fileMapProvider, FileReference file,
- TreeIndexFactory<? extends ITreeIndex> treeFactory, int startIODeviceIndex) {
- super(ioManager, fileMapProvider, file, treeFactory, startIODeviceIndex);
- }
-
- protected void cleanupAndGetValidFilesInternal(IODeviceHandle dev, FilenameFilter filter,
- TreeIndexFactory<? extends ITreeIndex> treeFactory, ArrayList<ComparableFileName> allFiles)
- throws HyracksDataException, IndexException {
- File dir = new File(dev.getPath(), baseDir);
- String[] files = dir.list(filter);
- for (String fileName : files) {
- File file = new File(dir.getPath() + File.separator + fileName);
- FileReference fileRef = new FileReference(file);
- if (isValidTreeIndex(treeFactory.createIndexInstance(fileRef))) {
- allFiles.add(new ComparableFileName(fileRef));
- } else {
- file.delete();
- }
- }
- }
-}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexBulkLoadOperatorDescriptor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexBulkLoadOperatorDescriptor.java
index 6b07608..da3cad5 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexBulkLoadOperatorDescriptor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexBulkLoadOperatorDescriptor.java
@@ -37,13 +37,14 @@
private final int[] fieldPermutation;
private final boolean verifyInput;
+ private final long numElementsHint;
public LSMInvertedIndexBulkLoadOperatorDescriptor(IOperatorDescriptorRegistry spec, int[] fieldPermutation,
- boolean verifyInput, IStorageManagerInterface storageManager, IFileSplitProvider fileSplitProvider,
- IIndexLifecycleManagerProvider lifecycleManagerProvider, ITypeTraits[] tokenTypeTraits,
- IBinaryComparatorFactory[] tokenComparatorFactories, ITypeTraits[] invListsTypeTraits,
- IBinaryComparatorFactory[] invListComparatorFactories, IBinaryTokenizerFactory tokenizerFactory,
- IIndexDataflowHelperFactory invertedIndexDataflowHelperFactory,
+ boolean verifyInput, long numElementsHint, IStorageManagerInterface storageManager,
+ IFileSplitProvider fileSplitProvider, IIndexLifecycleManagerProvider lifecycleManagerProvider,
+ ITypeTraits[] tokenTypeTraits, IBinaryComparatorFactory[] tokenComparatorFactories,
+ ITypeTraits[] invListsTypeTraits, IBinaryComparatorFactory[] invListComparatorFactories,
+ IBinaryTokenizerFactory tokenizerFactory, IIndexDataflowHelperFactory invertedIndexDataflowHelperFactory,
IModificationOperationCallbackFactory modificationOpCallbackFactory) {
super(spec, 1, 0, null, storageManager, fileSplitProvider, lifecycleManagerProvider, tokenTypeTraits,
tokenComparatorFactories, invListsTypeTraits, invListComparatorFactories, tokenizerFactory,
@@ -51,12 +52,13 @@
NoOpOperationCallbackFactory.INSTANCE, modificationOpCallbackFactory);
this.fieldPermutation = fieldPermutation;
this.verifyInput = verifyInput;
+ this.numElementsHint = numElementsHint;
}
@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) {
return new IndexBulkLoadOperatorNodePushable(this, ctx, partition, fieldPermutation, 1.0f, verifyInput,
- recordDescProvider);
+ numElementsHint, recordDescProvider);
}
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index da6bc28..c69a8df 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -23,6 +23,10 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomCalculations;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
@@ -94,7 +98,8 @@
public LSMInvertedIndex(IInMemoryBufferCache memBufferCache, IInMemoryFreePageManager memFreePageManager,
OnDiskInvertedIndexFactory diskInvIndexFactory, BTreeFactory deletedKeysBTreeFactory,
- ILSMIndexFileManager fileManager, IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
+ BloomFilterFactory bloomFilterFactory, ILSMIndexFileManager fileManager,
+ IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
IBinaryComparatorFactory[] tokenCmpFactories, IBinaryTokenizerFactory tokenizerFactory,
ILSMMergePolicy mergePolicy, ILSMOperationTrackerFactory opTrackerFactory,
@@ -114,7 +119,8 @@
((InMemoryBufferCache) memBufferCache).getFileMapProvider(), invListTypeTraits, invListCmpFactories,
BTreeLeafFrameType.REGULAR_NSM, new FileReference(new File("membtree")));
mutableComponent = new LSMInvertedIndexMutableComponent(memInvIndex, deleteKeysBTree, memFreePageManager);
- componentFactory = new LSMInvertedIndexComponentFactory(diskInvIndexFactory, deletedKeysBTreeFactory);
+ componentFactory = new LSMInvertedIndexComponentFactory(diskInvIndexFactory, deletedKeysBTreeFactory,
+ bloomFilterFactory);
}
@Override
@@ -147,7 +153,8 @@
try {
component = createDiskInvIndexComponent(componentFactory,
lsmComonentFileReference.getInsertIndexFileReference(),
- lsmComonentFileReference.getDeleteIndexFileReference(), false);
+ lsmComonentFileReference.getDeleteIndexFileReference(),
+ lsmComonentFileReference.getBloomFilterFileReference(), false);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
@@ -170,8 +177,10 @@
mutableComponent.getDeletedKeysBTree().clear();
for (ILSMComponent c : immutableComponents) {
LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) c;
+ component.getBloomFilter().deactivate();
component.getInvIndex().deactivate();
component.getDeletedKeysBTree().deactivate();
+ component.getBloomFilter().destroy();
component.getInvIndex().destroy();
component.getDeletedKeysBTree().destroy();
}
@@ -202,6 +211,7 @@
List<ILSMComponent> immutableComponents = componentsRef.get();
for (ILSMComponent c : immutableComponents) {
LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) c;
+ component.getBloomFilter().deactivate();
component.getInvIndex().deactivate();
component.getDeletedKeysBTree().deactivate();
}
@@ -230,6 +240,7 @@
LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) c;
component.getInvIndex().destroy();
component.getDeletedKeysBTree().destroy();
+ component.getBloomFilter().destroy();
}
fileManager.deleteDirs();
}
@@ -357,13 +368,14 @@
// Distinguish between regular searches and range searches (mostly used in merges).
if (pred instanceof InvertedIndexSearchPredicate) {
initState = new LSMInvertedIndexSearchCursorInitialState(keyCmp, keysOnlyTuple, indexAccessors,
- deletedKeysBTreeAccessors, ictx, includeMutableComponent, lsmHarness, operationalComponents);
+ deletedKeysBTreeAccessors, mutableComponent.getDeletedKeysBTree().getLeafFrameFactory(), ictx,
+ includeMutableComponent, lsmHarness, operationalComponents);
} else {
InMemoryInvertedIndex memInvIndex = (InMemoryInvertedIndex) mutableComponent.getInvIndex();
MultiComparator tokensAndKeysCmp = MultiComparator.create(memInvIndex.getBTree().getComparatorFactories());
initState = new LSMInvertedIndexRangeSearchCursorInitialState(tokensAndKeysCmp, keyCmp, keysOnlyTuple,
- includeMutableComponent, lsmHarness, indexAccessors, deletedKeysBTreeAccessors, pred,
- operationalComponents);
+ mutableComponent.getDeletedKeysBTree().getLeafFrameFactory(), includeMutableComponent, lsmHarness,
+ indexAccessors, deletedKeysBTreeAccessors, pred, operationalComponents);
}
return initState;
}
@@ -392,7 +404,8 @@
opCtx.getComponentHolder().add(flushingComponent);
ioScheduler.scheduleOperation(new LSMInvertedIndexFlushOperation(new LSMInvertedIndexAccessor(this, lsmHarness,
fileManager, opCtx), mutableComponent, componentFileRefs.getInsertIndexFileReference(),
- componentFileRefs.getDeleteIndexFileReference(), callback));
+ componentFileRefs.getDeleteIndexFileReference(), componentFileRefs.getBloomFilterFileReference(),
+ callback));
}
@Override
@@ -401,7 +414,8 @@
// Create an inverted index instance to be bulk loaded.
LSMInvertedIndexImmutableComponent component = createDiskInvIndexComponent(componentFactory,
- flushOp.getDictBTreeFlushTarget(), flushOp.getDeletedKeysBTreeFlushTarget(), true);
+ flushOp.getDictBTreeFlushTarget(), flushOp.getDeletedKeysBTreeFlushTarget(),
+ flushOp.getBloomFilterFlushTarget(), true);
IInvertedIndex diskInvertedIndex = component.getInvIndex();
// Create a scan cursor on the BTree underlying the in-memory inverted index.
@@ -414,7 +428,7 @@
memBTreeAccessor.search(scanCursor, nullPred);
// Bulk load the disk inverted index from the in-memory inverted index.
- IIndexBulkLoader invIndexBulkLoader = diskInvertedIndex.createBulkLoader(1.0f, false);
+ IIndexBulkLoader invIndexBulkLoader = diskInvertedIndex.createBulkLoader(1.0f, false, 0L);
try {
while (scanCursor.hasNext()) {
scanCursor.next();
@@ -425,28 +439,53 @@
}
invIndexBulkLoader.end();
- // Create an BTree instance for the deleted keys.
- BTree diskDeletedKeysBTree = component.getDeletedKeysBTree();
-
- // Create a scan cursor on the deleted keys BTree underlying the in-memory inverted index.
IIndexAccessor deletedKeysBTreeAccessor = flushingComponent.getDeletedKeysBTree().createAccessor(
NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- IIndexCursor deletedKeysScanCursor = deletedKeysBTreeAccessor.createSearchCursor();
- deletedKeysBTreeAccessor.search(deletedKeysScanCursor, nullPred);
-
- // Bulk load the deleted-keys BTree.
- IIndexBulkLoader deletedKeysBTreeBulkLoader = diskDeletedKeysBTree.createBulkLoader(1.0f, false);
+ IIndexCursor btreeCountingCursor = ((BTreeAccessor) deletedKeysBTreeAccessor).createCountingSearchCursor();
+ deletedKeysBTreeAccessor.search(btreeCountingCursor, nullPred);
+ long numBTreeTuples = 0L;
try {
- while (deletedKeysScanCursor.hasNext()) {
- deletedKeysScanCursor.next();
- deletedKeysBTreeBulkLoader.add(deletedKeysScanCursor.getTuple());
+ while (btreeCountingCursor.hasNext()) {
+ btreeCountingCursor.next();
+ ITupleReference countTuple = btreeCountingCursor.getTuple();
+ numBTreeTuples = IntegerSerializerDeserializer.getInt(countTuple.getFieldData(0),
+ countTuple.getFieldStart(0));
}
} finally {
- deletedKeysScanCursor.close();
+ btreeCountingCursor.close();
}
- deletedKeysBTreeBulkLoader.end();
- return new LSMInvertedIndexImmutableComponent(diskInvertedIndex, diskDeletedKeysBTree);
+ if (numBTreeTuples > 0) {
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numBTreeTuples);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE);
+
+ // Create an BTree instance for the deleted keys.
+ BTree diskDeletedKeysBTree = component.getDeletedKeysBTree();
+
+ // Create a scan cursor on the deleted keys BTree underlying the in-memory inverted index.
+ IIndexCursor deletedKeysScanCursor = deletedKeysBTreeAccessor.createSearchCursor();
+ deletedKeysBTreeAccessor.search(deletedKeysScanCursor, nullPred);
+
+ // Bulk load the deleted-keys BTree.
+ IIndexBulkLoader deletedKeysBTreeBulkLoader = diskDeletedKeysBTree.createBulkLoader(1.0f, false, 0L);
+ IIndexBulkLoader builder = component.getBloomFilter().createBuilder(numBTreeTuples,
+ bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
+
+ try {
+ while (deletedKeysScanCursor.hasNext()) {
+ deletedKeysScanCursor.next();
+ deletedKeysBTreeBulkLoader.add(deletedKeysScanCursor.getTuple());
+ builder.add(deletedKeysScanCursor.getTuple());
+ }
+ } finally {
+ deletedKeysScanCursor.close();
+ builder.end();
+ }
+ deletedKeysBTreeBulkLoader.end();
+ }
+
+ return component;
}
@Override
@@ -476,7 +515,7 @@
ILSMIndexAccessorInternal accessor = new LSMInvertedIndexAccessor(this, lsmHarness, fileManager, ictx);
ioScheduler.scheduleOperation(new LSMInvertedIndexMergeOperation(accessor, mergingComponents, cursor,
relMergeFileRefs.getInsertIndexFileReference(), relMergeFileRefs.getDeleteIndexFileReference(),
- callback));
+ relMergeFileRefs.getBloomFilterFileReference(), callback));
}
@Override
@@ -486,11 +525,12 @@
// Create an inverted index instance.
LSMInvertedIndexImmutableComponent component = createDiskInvIndexComponent(componentFactory,
- mergeOp.getDictBTreeMergeTarget(), mergeOp.getDeletedKeysBTreeMergeTarget(), true);
+ mergeOp.getDictBTreeMergeTarget(), mergeOp.getDeletedKeysBTreeMergeTarget(),
+ mergeOp.getBloomFilterMergeTarget(), true);
IInvertedIndex mergedDiskInvertedIndex = component.getInvIndex();
IIndexCursor cursor = mergeOp.getCursor();
- IIndexBulkLoader invIndexBulkLoader = mergedDiskInvertedIndex.createBulkLoader(1.0f, true);
+ IIndexBulkLoader invIndexBulkLoader = mergedDiskInvertedIndex.createBulkLoader(1.0f, true, 0L);
try {
while (cursor.hasNext()) {
cursor.next();
@@ -502,31 +542,30 @@
}
invIndexBulkLoader.end();
- // Create an empty deleted keys BTree (do nothing with the returned index).
- BTree deletedKeysBTree = component.getDeletedKeysBTree();
-
// Add the merged components for cleanup.
mergedComponents.addAll(mergeOp.getMergingComponents());
- return new LSMInvertedIndexImmutableComponent(mergedDiskInvertedIndex, deletedKeysBTree);
+ return component;
}
private ILSMComponent createBulkLoadTarget() throws HyracksDataException, IndexException {
LSMComponentFileReferences componentFileRefs = fileManager.getRelFlushFileReference();
return createDiskInvIndexComponent(componentFactory, componentFileRefs.getInsertIndexFileReference(),
- componentFileRefs.getDeleteIndexFileReference(), true);
+ componentFileRefs.getDeleteIndexFileReference(), componentFileRefs.getBloomFilterFileReference(), true);
}
@Override
- public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput) throws IndexException {
- return new LSMInvertedIndexBulkLoader(fillFactor, verifyInput);
+ public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
+ throws IndexException {
+ return new LSMInvertedIndexBulkLoader(fillFactor, verifyInput, numElementsHint);
}
public class LSMInvertedIndexBulkLoader implements IIndexBulkLoader {
private final ILSMComponent component;
private final IIndexBulkLoader invIndexBulkLoader;
- public LSMInvertedIndexBulkLoader(float fillFactor, boolean verifyInput) throws IndexException {
+ public LSMInvertedIndexBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
+ throws IndexException {
// Note that by using a flush target file name, we state that the
// new bulk loaded tree is "newer" than any other merged tree.
try {
@@ -537,7 +576,7 @@
throw new TreeIndexException(e);
}
invIndexBulkLoader = ((LSMInvertedIndexImmutableComponent) component).getInvIndex().createBulkLoader(
- fillFactor, verifyInput);
+ fillFactor, verifyInput, numElementsHint);
}
@Override
@@ -561,6 +600,8 @@
((LSMInvertedIndexImmutableComponent) component).getInvIndex().destroy();
((LSMInvertedIndexImmutableComponent) component).getDeletedKeysBTree().deactivate();
((LSMInvertedIndexImmutableComponent) component).getDeletedKeysBTree().destroy();
+ ((LSMInvertedIndexImmutableComponent) component).getBloomFilter().deactivate();
+ ((LSMInvertedIndexImmutableComponent) component).getBloomFilter().destroy();
}
@Override
@@ -577,17 +618,20 @@
}
protected LSMInvertedIndexImmutableComponent createDiskInvIndexComponent(ILSMComponentFactory factory,
- FileReference dictBTreeFileRef, FileReference btreeFileRef, boolean create) throws HyracksDataException,
- IndexException {
+ FileReference dictBTreeFileRef, FileReference btreeFileRef, FileReference bloomFilterFileRef, boolean create)
+ throws HyracksDataException, IndexException {
LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) factory
- .createLSMComponentInstance(new LSMComponentFileReferences(dictBTreeFileRef, btreeFileRef));
+ .createLSMComponentInstance(new LSMComponentFileReferences(dictBTreeFileRef, btreeFileRef,
+ bloomFilterFileRef));
if (create) {
component.getInvIndex().create();
component.getDeletedKeysBTree().create();
+ component.getBloomFilter().create();
}
// Will be closed during cleanup of merge().
component.getInvIndex().activate();
component.getDeletedKeysBTree().activate();
+ component.getBloomFilter().activate();
return component;
}
@@ -657,8 +701,15 @@
public void markAsValid(ILSMComponent lsmComponent) throws HyracksDataException {
LSMInvertedIndexImmutableComponent invIndexComponent = (LSMInvertedIndexImmutableComponent) lsmComponent;
OnDiskInvertedIndex invIndex = (OnDiskInvertedIndex) invIndexComponent.getInvIndex();
+ // Flush the bloom filter first.
+ int fileId = invIndexComponent.getBloomFilter().getFileId();
+ IBufferCache bufferCache = invIndex.getBufferCache();
+ int startPage = 0;
+ int maxPage = invIndexComponent.getBloomFilter().getNumPages();
+ forceFlushDirtyPages(bufferCache, fileId, startPage, maxPage);
+
ITreeIndex treeIndex = invIndex.getBTree();
- // Flush inverted index first.
+ // Flush inverted index second.
forceFlushDirtyPages(treeIndex);
forceFlushInvListsFileDirtyPages(invIndex);
// Flush deleted keys BTree.
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexComponentFactory.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexComponentFactory.java
index f856f90..1f4db63 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexComponentFactory.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexComponentFactory.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
@@ -27,17 +29,21 @@
public class LSMInvertedIndexComponentFactory implements ILSMComponentFactory {
private final OnDiskInvertedIndexFactory diskInvIndexFactory;
private final TreeIndexFactory<BTree> btreeFactory;
+ private final BloomFilterFactory bloomFilterFactory;
public LSMInvertedIndexComponentFactory(OnDiskInvertedIndexFactory diskInvIndexFactory,
- TreeIndexFactory<BTree> btreeFactory) {
+ TreeIndexFactory<BTree> btreeFactory, BloomFilterFactory bloomFilterFactory) {
this.diskInvIndexFactory = diskInvIndexFactory;
this.btreeFactory = btreeFactory;
+ this.bloomFilterFactory = bloomFilterFactory;
}
@Override
- public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException {
+ public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException,
+ HyracksDataException {
return new LSMInvertedIndexImmutableComponent(diskInvIndexFactory.createIndexInstance(cfr
- .getInsertIndexFileReference()), btreeFactory.createIndexInstance(cfr.getDeleteIndexFileReference()));
+ .getInsertIndexFileReference()), btreeFactory.createIndexInstance(cfr.getDeleteIndexFileReference()),
+ bloomFilterFactory.createBloomFiltertInstance(cfr.getBloomFilterFileReference()));
}
@Override
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
index 8ffb0bd..15a1633 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
@@ -29,14 +29,14 @@
import edu.uci.ics.hyracks.api.io.IIOManager;
import edu.uci.ics.hyracks.api.io.IODeviceHandle;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractLSMIndexFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BTreeFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexFileNameMapper;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
// TODO: Refactor for better code sharing with other file managers.
-public class LSMInvertedIndexFileManager extends LSMIndexFileManager implements IInvertedIndexFileNameMapper {
+public class LSMInvertedIndexFileManager extends AbstractLSMIndexFileManager implements IInvertedIndexFileNameMapper {
private static final String DICT_BTREE_SUFFIX = "b";
private static final String INVLISTS_SUFFIX = "i";
private static final String DELETED_KEYS_BTREE_SUFFIX = "d";
@@ -69,7 +69,8 @@
String baseName = baseDir + ts + SPLIT_STRING + ts;
// Begin timestamp and end timestamp are identical since it is a flush
return new LSMComponentFileReferences(createFlushFile(baseName + SPLIT_STRING + DICT_BTREE_SUFFIX),
- createFlushFile(baseName + SPLIT_STRING + DELETED_KEYS_BTREE_SUFFIX));
+ createFlushFile(baseName + SPLIT_STRING + DELETED_KEYS_BTREE_SUFFIX), createFlushFile(baseName
+ + SPLIT_STRING + BLOOM_FILTER_STRING));
}
@Override
@@ -81,7 +82,8 @@
String baseName = baseDir + firstTimestampRange[0] + SPLIT_STRING + lastTimestampRange[1];
// Get the range of timestamps by taking the earliest and the latest timestamps
return new LSMComponentFileReferences(createMergeFile(baseName + SPLIT_STRING + DICT_BTREE_SUFFIX),
- createMergeFile(baseName + SPLIT_STRING + DELETED_KEYS_BTREE_SUFFIX));
+ createMergeFile(baseName + SPLIT_STRING + DELETED_KEYS_BTREE_SUFFIX), createMergeFile(baseName
+ + SPLIT_STRING + BLOOM_FILTER_STRING));
}
@Override
@@ -89,15 +91,40 @@
List<LSMComponentFileReferences> validFiles = new ArrayList<LSMComponentFileReferences>();
ArrayList<ComparableFileName> allDictBTreeFiles = new ArrayList<ComparableFileName>();
ArrayList<ComparableFileName> allDeletedKeysBTreeFiles = new ArrayList<ComparableFileName>();
+ ArrayList<ComparableFileName> allBloomFilterFiles = new ArrayList<ComparableFileName>();
// Gather files from all IODeviceHandles.
for (IODeviceHandle dev : ioManager.getIODevices()) {
- cleanupAndGetValidFilesInternal(dev, deletedKeysBTreeFilter, btreeFactory, allDeletedKeysBTreeFiles);
- HashSet<String> deletedKeysBTreeFilesSet = new HashSet<String>();
- for (ComparableFileName cmpFileName : allDeletedKeysBTreeFiles) {
+ cleanupAndGetValidFilesInternal(dev, bloomFilterFilter, null, allBloomFilterFiles);
+ HashSet<String> bloomFilterFilesSet = new HashSet<String>();
+ for (ComparableFileName cmpFileName : allBloomFilterFiles) {
int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
- deletedKeysBTreeFilesSet.add(cmpFileName.fileName.substring(0, index));
+ bloomFilterFilesSet.add(cmpFileName.fileName.substring(0, index));
}
+ // List of valid BTree files that may or may not have a bloom filter buddy. Will check for buddies below.
+ ArrayList<ComparableFileName> tmpAllDeletedBTreeFiles = new ArrayList<ComparableFileName>();
+ cleanupAndGetValidFilesInternal(dev, deletedKeysBTreeFilter, btreeFactory, tmpAllDeletedBTreeFiles);
+
+ // Look for buddy bloom filters for all valid BTrees.
+ // If no buddy is found, delete the file, otherwise add the BTree to allBTreeFiles.
+ HashSet<String> deletedKeysBTreeFilesSet = new HashSet<String>();
+ for (ComparableFileName cmpFileName : tmpAllDeletedBTreeFiles) {
+ int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
+ String file = cmpFileName.fileName.substring(0, index);
+ if (bloomFilterFilesSet.contains(file)) {
+ allDeletedKeysBTreeFiles.add(cmpFileName);
+ deletedKeysBTreeFilesSet.add(cmpFileName.fileName.substring(0, index));
+ } else {
+ // Couldn't find the corresponding BTree file; thus, delete
+ // the deleted-keys BTree file.
+ // There is no need to delete the inverted-lists file corresponding to the non-existent
+ // dictionary BTree, because we flush the dictionary BTree first. So if a dictionary BTree
+ // file does not exists, then neither can its inverted-list file.
+ File invalidDeletedKeysBTreeFile = new File(cmpFileName.fullPath);
+ invalidDeletedKeysBTreeFile.delete();
+ }
+ }
+
// We use the dictionary BTree of the inverted index for validation.
// List of valid dictionary BTree files that may or may not have a deleted-keys BTree buddy. Will check for buddies below.
ArrayList<ComparableFileName> tmpAllBTreeFiles = new ArrayList<ComparableFileName>();
@@ -121,24 +148,27 @@
}
}
// Sanity check.
- if (allDictBTreeFiles.size() != allDeletedKeysBTreeFiles.size()) {
- throw new HyracksDataException("Unequal number of valid RTree and BTree files found. Aborting cleanup.");
+ if (allDictBTreeFiles.size() != allDeletedKeysBTreeFiles.size()
+ || allDictBTreeFiles.size() != allBloomFilterFiles.size()) {
+ throw new HyracksDataException(
+ "Unequal number of valid Dictionary BTree, Deleted BTree, and Bloom Filter files found. Aborting cleanup.");
}
// Trivial cases.
- if (allDictBTreeFiles.isEmpty() || allDeletedKeysBTreeFiles.isEmpty()) {
+ if (allDictBTreeFiles.isEmpty() || allDeletedKeysBTreeFiles.isEmpty() || allBloomFilterFiles.isEmpty()) {
return validFiles;
}
- if (allDictBTreeFiles.size() == 1 && allDeletedKeysBTreeFiles.size() == 1) {
+ if (allDictBTreeFiles.size() == 1 && allDeletedKeysBTreeFiles.size() == 1 && allBloomFilterFiles.size() == 1) {
validFiles.add(new LSMComponentFileReferences(allDictBTreeFiles.get(0).fileRef, allDeletedKeysBTreeFiles
- .get(0).fileRef));
+ .get(0).fileRef, allBloomFilterFiles.get(0).fileRef));
return validFiles;
}
// Sorts files names from earliest to latest timestamp.
Collections.sort(allDeletedKeysBTreeFiles);
Collections.sort(allDictBTreeFiles);
+ Collections.sort(allBloomFilterFiles);
List<ComparableFileName> validComparableDictBTreeFiles = new ArrayList<ComparableFileName>();
ComparableFileName lastDictBTree = allDictBTreeFiles.get(0);
@@ -148,25 +178,37 @@
ComparableFileName lastDeletedKeysBTree = allDeletedKeysBTreeFiles.get(0);
validComparableDeletedKeysBTreeFiles.add(lastDeletedKeysBTree);
+ List<ComparableFileName> validComparableBloomFilterFiles = new ArrayList<ComparableFileName>();
+ ComparableFileName lastBloomFilter = allBloomFilterFiles.get(0);
+ validComparableBloomFilterFiles.add(lastBloomFilter);
+
for (int i = 1; i < allDictBTreeFiles.size(); i++) {
ComparableFileName currentRTree = allDictBTreeFiles.get(i);
ComparableFileName currentBTree = allDictBTreeFiles.get(i);
+ ComparableFileName currentBloomFilter = allBloomFilterFiles.get(i);
// Current start timestamp is greater than last stop timestamp.
if (currentRTree.interval[0].compareTo(lastDeletedKeysBTree.interval[1]) > 0
- && currentBTree.interval[0].compareTo(lastDeletedKeysBTree.interval[1]) > 0) {
+ && currentBTree.interval[0].compareTo(lastDeletedKeysBTree.interval[1]) > 0
+ && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[1]) > 0) {
validComparableDictBTreeFiles.add(currentRTree);
validComparableDeletedKeysBTreeFiles.add(currentBTree);
+ validComparableBloomFilterFiles.add(currentBloomFilter);
lastDictBTree = currentRTree;
lastDeletedKeysBTree = currentBTree;
+ lastBloomFilter = currentBloomFilter;
} else if (currentRTree.interval[0].compareTo(lastDictBTree.interval[0]) >= 0
&& currentRTree.interval[1].compareTo(lastDictBTree.interval[1]) <= 0
&& currentBTree.interval[0].compareTo(lastDeletedKeysBTree.interval[0]) >= 0
- && currentBTree.interval[1].compareTo(lastDeletedKeysBTree.interval[1]) <= 0) {
+ && currentBTree.interval[1].compareTo(lastDeletedKeysBTree.interval[1]) <= 0
+ && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[0]) >= 0
+ && currentBloomFilter.interval[1].compareTo(lastBloomFilter.interval[1]) <= 0) {
// Invalid files are completely contained in last interval.
File invalidRTreeFile = new File(currentRTree.fullPath);
invalidRTreeFile.delete();
File invalidBTreeFile = new File(currentBTree.fullPath);
invalidBTreeFile.delete();
+ File invalidBloomFilterFile = new File(currentBloomFilter.fullPath);
+ invalidBloomFilterFile.delete();
} else {
// This scenario should not be possible.
throw new HyracksDataException("Found LSM files with overlapping but not contained timetamp intervals.");
@@ -177,13 +219,17 @@
// files come first.
Collections.sort(validComparableDictBTreeFiles, recencyCmp);
Collections.sort(validComparableDeletedKeysBTreeFiles, recencyCmp);
+ Collections.sort(validComparableBloomFilterFiles, recencyCmp);
Iterator<ComparableFileName> dictBTreeFileIter = validComparableDictBTreeFiles.iterator();
Iterator<ComparableFileName> deletedKeysBTreeIter = validComparableDeletedKeysBTreeFiles.iterator();
+ Iterator<ComparableFileName> bloomFilterFileIter = validComparableBloomFilterFiles.iterator();
while (dictBTreeFileIter.hasNext() && deletedKeysBTreeIter.hasNext()) {
ComparableFileName cmpDictBTreeFile = dictBTreeFileIter.next();
ComparableFileName cmpDeletedKeysBTreeFile = deletedKeysBTreeIter.next();
- validFiles.add(new LSMComponentFileReferences(cmpDictBTreeFile.fileRef, cmpDeletedKeysBTreeFile.fileRef));
+ ComparableFileName cmpBloomFilterFileName = bloomFilterFileIter.next();
+ validFiles.add(new LSMComponentFileReferences(cmpDictBTreeFile.fileRef, cmpDeletedKeysBTreeFile.fileRef,
+ cmpBloomFilterFileName.fileRef));
}
return validFiles;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java
index fb55ce0..eedf0da 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java
@@ -16,6 +16,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls;
import java.util.Collections;
+import java.util.HashSet;
import java.util.Set;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -31,15 +32,18 @@
private final LSMInvertedIndexMutableComponent flushingComponent;
private final FileReference dictBTreeFlushTarget;
private final FileReference deletedKeysBTreeFlushTarget;
+ private final FileReference bloomFilterFlushTarget;
private final ILSMIOOperationCallback callback;
public LSMInvertedIndexFlushOperation(ILSMIndexAccessorInternal accessor,
LSMInvertedIndexMutableComponent flushingComponent, FileReference dictBTreeFlushTarget,
- FileReference deletedKeysBTreeFlushTarget, ILSMIOOperationCallback callback) {
+ FileReference deletedKeysBTreeFlushTarget, FileReference bloomFilterFlushTarget,
+ ILSMIOOperationCallback callback) {
this.accessor = accessor;
this.flushingComponent = flushingComponent;
this.dictBTreeFlushTarget = dictBTreeFlushTarget;
this.deletedKeysBTreeFlushTarget = deletedKeysBTreeFlushTarget;
+ this.bloomFilterFlushTarget = bloomFilterFlushTarget;
this.callback = callback;
}
@@ -50,7 +54,12 @@
@Override
public Set<IODeviceHandle> getWriteDevices() {
- return Collections.singleton(dictBTreeFlushTarget.getDeviceHandle());
+ Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>();
+ devs.add(dictBTreeFlushTarget.getDeviceHandle());
+ devs.add(deletedKeysBTreeFlushTarget.getDeviceHandle());
+ devs.add(bloomFilterFlushTarget.getDeviceHandle());
+ return devs;
+
}
@Override
@@ -71,6 +80,10 @@
return deletedKeysBTreeFlushTarget;
}
+ public FileReference getBloomFilterFlushTarget() {
+ return bloomFilterFlushTarget;
+ }
+
public LSMInvertedIndexMutableComponent getFlushingComponent() {
return flushingComponent;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexImmutableComponent.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexImmutableComponent.java
index 5099a7b..4c9b5e8 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexImmutableComponent.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexImmutableComponent.java
@@ -1,6 +1,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractImmutableLSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
@@ -9,10 +10,12 @@
private final IInvertedIndex invIndex;
private final BTree deletedKeysBTree;
+ private final BloomFilter bloomFilter;
- public LSMInvertedIndexImmutableComponent(IInvertedIndex invIndex, BTree deletedKeysBTree) {
+ public LSMInvertedIndexImmutableComponent(IInvertedIndex invIndex, BTree deletedKeysBTree, BloomFilter bloomFilter) {
this.invIndex = invIndex;
this.deletedKeysBTree = deletedKeysBTree;
+ this.bloomFilter = bloomFilter;
}
@Override
@@ -21,6 +24,8 @@
invIndex.destroy();
deletedKeysBTree.deactivate();
deletedKeysBTree.destroy();
+ bloomFilter.deactivate();
+ bloomFilter.destroy();
}
public IInvertedIndex getInvIndex() {
@@ -31,4 +36,7 @@
return deletedKeysBTree;
}
+ public BloomFilter getBloomFilter() {
+ return bloomFilter;
+ }
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java
index 63b604e..dea628c 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java
@@ -36,16 +36,18 @@
private final IIndexCursor cursor;
private final FileReference dictBTreeMergeTarget;
private final FileReference deletedKeysBTreeMergeTarget;
+ private final FileReference bloomFilterMergeTarget;
private final ILSMIOOperationCallback callback;
public LSMInvertedIndexMergeOperation(ILSMIndexAccessorInternal accessor, List<ILSMComponent> mergingComponents,
IIndexCursor cursor, FileReference dictBTreeMergeTarget, FileReference deletedKeysBTreeMergeTarget,
- ILSMIOOperationCallback callback) {
+ FileReference bloomFilterMergeTarget, ILSMIOOperationCallback callback) {
this.accessor = accessor;
this.mergingComponents = mergingComponents;
this.cursor = cursor;
this.dictBTreeMergeTarget = dictBTreeMergeTarget;
this.deletedKeysBTreeMergeTarget = deletedKeysBTreeMergeTarget;
+ this.bloomFilterMergeTarget = bloomFilterMergeTarget;
this.callback = callback;
}
@@ -57,15 +59,17 @@
OnDiskInvertedIndex invIndex = (OnDiskInvertedIndex) component.getInvIndex();
devs.add(invIndex.getBTree().getFileReference().getDeviceHandle());
devs.add(component.getDeletedKeysBTree().getFileReference().getDeviceHandle());
+ devs.add(component.getBloomFilter().getFileReference().getDeviceHandle());
}
return devs;
}
@Override
public Set<IODeviceHandle> getWriteDevices() {
- Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>(2);
+ Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>();
devs.add(dictBTreeMergeTarget.getDeviceHandle());
devs.add(deletedKeysBTreeMergeTarget.getDeviceHandle());
+ devs.add(bloomFilterMergeTarget.getDeviceHandle());
return devs;
}
@@ -87,6 +91,10 @@
return deletedKeysBTreeMergeTarget;
}
+ public FileReference getBloomFilterMergeTarget() {
+ return bloomFilterMergeTarget;
+ }
+
public IIndexCursor getCursor() {
return cursor;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
index 43626f5..259af5b 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
@@ -18,6 +18,7 @@
import java.util.ArrayList;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
@@ -27,13 +28,14 @@
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
public class LSMInvertedIndexRangeSearchCursor extends LSMIndexSearchCursor {
// Assuming the cursor for all deleted-keys indexes are of the same type.
- protected IIndexCursor deletedKeysBTreeCursor;
+ private IIndexCursor[] deletedKeysBTreeCursors;
protected ArrayList<IIndexAccessor> deletedKeysBTreeAccessors;
protected PermutingTupleReference keysOnlyTuple;
protected RangePredicate keySearchPred;
@@ -59,19 +61,32 @@
rangeCursors[i] = invIndexAccessor.createRangeSearchCursor();
invIndexAccessor.rangeSearch(rangeCursors[i], lsmInitState.getSearchPredicate());
}
+ lsmHarness = lsmInitState.getLSMHarness();
+ operationalComponents = lsmInitState.getOperationalComponents();
+ includeMemComponent = lsmInitState.getIncludeMemComponent();
// For searching the deleted-keys BTrees.
this.keysOnlyTuple = lsmInitState.getKeysOnlyTuple();
deletedKeysBTreeAccessors = lsmInitState.getDeletedKeysBTreeAccessors();
+
if (!deletedKeysBTreeAccessors.isEmpty()) {
- deletedKeysBTreeCursor = deletedKeysBTreeAccessors.get(0).createSearchCursor();
+ deletedKeysBTreeCursors = new IIndexCursor[deletedKeysBTreeAccessors.size()];
+ int i = 0;
+ if (includeMemComponent) {
+ // No need for a bloom filter for the in-memory BTree.
+ deletedKeysBTreeCursors[i] = deletedKeysBTreeAccessors.get(i).createSearchCursor();
+ ++i;
+ }
+ for (; i < deletedKeysBTreeCursors.length; i++) {
+ deletedKeysBTreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitState
+ .getgetDeletedKeysBTreeLeafFrameFactory().createFrame(), false,
+ ((LSMInvertedIndexImmutableComponent) operationalComponents.get(i)).getBloomFilter());
+ }
+
}
MultiComparator keyCmp = lsmInitState.getKeyComparator();
keySearchPred = new RangePredicate(keysOnlyTuple, keysOnlyTuple, true, true, keyCmp, keyCmp);
- lsmHarness = lsmInitState.getLSMHarness();
- includeMemComponent = lsmInitState.getIncludeMemComponent();
- operationalComponents = lsmInitState.getOperationalComponents();
setPriorityQueueComparator();
initPriorityQueue();
}
@@ -84,16 +99,16 @@
keysOnlyTuple.reset(checkElement.getTuple());
int end = checkElement.getCursorIndex();
for (int i = 0; i < end; i++) {
- deletedKeysBTreeCursor.reset();
+ deletedKeysBTreeCursors[i].reset();
try {
- deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursor, keySearchPred);
- if (deletedKeysBTreeCursor.hasNext()) {
+ deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursors[i], keySearchPred);
+ if (deletedKeysBTreeCursors[i].hasNext()) {
return true;
}
} catch (IndexException e) {
throw new HyracksDataException(e);
} finally {
- deletedKeysBTreeCursor.close();
+ deletedKeysBTreeCursors[i].close();
}
}
return false;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
index 5a81a2e..0cec92e 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
@@ -22,6 +22,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
@@ -38,17 +39,20 @@
private final ArrayList<IIndexAccessor> deletedKeysBTreeAccessors;
private final ISearchPredicate predicate;
private final PermutingTupleReference keysOnlyTuple;
+ private final ITreeIndexFrameFactory deletedKeysBtreeLeafFrameFactory;
private final boolean includeMemComponent;
private final List<ILSMComponent> operationalComponents;
public LSMInvertedIndexRangeSearchCursorInitialState(MultiComparator tokensAndKeysCmp, MultiComparator keyCmp,
- PermutingTupleReference keysOnlyTuple, boolean includeMemComponent, ILSMHarness lsmHarness,
- ArrayList<IIndexAccessor> indexAccessors, ArrayList<IIndexAccessor> deletedKeysBTreeAccessors,
- ISearchPredicate predicate, List<ILSMComponent> operationalComponents) {
+ PermutingTupleReference keysOnlyTuple, ITreeIndexFrameFactory deletedKeysBtreeLeafFrameFactory,
+ boolean includeMemComponent, ILSMHarness lsmHarness, ArrayList<IIndexAccessor> indexAccessors,
+ ArrayList<IIndexAccessor> deletedKeysBTreeAccessors, ISearchPredicate predicate,
+ List<ILSMComponent> operationalComponents) {
this.tokensAndKeysCmp = tokensAndKeysCmp;
this.keyCmp = keyCmp;
this.keysOnlyTuple = keysOnlyTuple;
+ this.deletedKeysBtreeLeafFrameFactory = deletedKeysBtreeLeafFrameFactory;
this.lsmHarness = lsmHarness;
this.indexAccessors = indexAccessors;
this.deletedKeysBTreeAccessors = deletedKeysBTreeAccessors;
@@ -114,6 +118,10 @@
return keyCmp;
}
+ public ITreeIndexFrameFactory getgetDeletedKeysBTreeLeafFrameFactory() {
+ return deletedKeysBtreeLeafFrameFactory;
+ }
+
public boolean getIncludeMemComponent() {
return includeMemComponent;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
index 1ea66f7..36ad51b 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
@@ -18,6 +18,7 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
@@ -29,6 +30,7 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMHarness;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
/**
@@ -48,7 +50,7 @@
private ISearchOperationCallback searchCallback;
// Assuming the cursor for all deleted-keys indexes are of the same type.
- private IIndexCursor deletedKeysBTreeCursor;
+ private IIndexCursor[] deletedKeysBTreeCursors;
private List<IIndexAccessor> deletedKeysBTreeAccessors;
private RangePredicate keySearchPred;
private ILSMIndexOperationContext opCtx;
@@ -69,7 +71,19 @@
// For searching the deleted-keys BTrees.
deletedKeysBTreeAccessors = lsmInitState.getDeletedKeysBTreeAccessors();
- deletedKeysBTreeCursor = deletedKeysBTreeAccessors.get(0).createSearchCursor();
+ deletedKeysBTreeCursors = new IIndexCursor[deletedKeysBTreeAccessors.size()];
+ int i = 0;
+ if (includeMemComponent) {
+ // No need for a bloom filter for the in-memory BTree.
+ deletedKeysBTreeCursors[i] = deletedKeysBTreeAccessors.get(i).createSearchCursor();
+ ++i;
+ }
+ for (; i < deletedKeysBTreeCursors.length; i++) {
+ deletedKeysBTreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitState
+ .getgetDeletedKeysBTreeLeafFrameFactory().createFrame(), false,
+ ((LSMInvertedIndexImmutableComponent) operationalComponents.get(i)).getBloomFilter());
+ }
+
MultiComparator keyCmp = lsmInitState.getKeyComparator();
keySearchPred = new RangePredicate(null, null, true, true, keyCmp, keyCmp);
}
@@ -78,16 +92,16 @@
keySearchPred.setLowKey(key, true);
keySearchPred.setHighKey(key, true);
for (int i = 0; i < accessorIndex; i++) {
- deletedKeysBTreeCursor.reset();
+ deletedKeysBTreeCursors[i].reset();
try {
- deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursor, keySearchPred);
- if (deletedKeysBTreeCursor.hasNext()) {
+ deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursors[i], keySearchPred);
+ if (deletedKeysBTreeCursors[i].hasNext()) {
return true;
}
} catch (IndexException e) {
throw new HyracksDataException(e);
} finally {
- deletedKeysBTreeCursor.close();
+ deletedKeysBTreeCursors[i].close();
}
}
return false;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursorInitialState.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursorInitialState.java
index 15fc769..eb6f338 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursorInitialState.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursorInitialState.java
@@ -21,6 +21,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
@@ -39,17 +40,20 @@
private MultiComparator originalCmp;
private final MultiComparator keyCmp;
private final PermutingTupleReference keysOnlyTuple;
+ private final ITreeIndexFrameFactory deletedKeysBtreeLeafFrameFactory;
private final List<ILSMComponent> operationalComponents;
public LSMInvertedIndexSearchCursorInitialState(final MultiComparator keyCmp,
PermutingTupleReference keysOnlyTuple, List<IIndexAccessor> indexAccessors,
- List<IIndexAccessor> deletedKeysBTreeAccessors, IIndexOperationContext ctx, boolean includeMemComponent,
- ILSMHarness lsmHarness, List<ILSMComponent> operationalComponents) {
+ List<IIndexAccessor> deletedKeysBTreeAccessors, ITreeIndexFrameFactory deletedKeysBtreeLeafFrameFactory,
+ IIndexOperationContext ctx, boolean includeMemComponent, ILSMHarness lsmHarness,
+ List<ILSMComponent> operationalComponents) {
this.keyCmp = keyCmp;
this.keysOnlyTuple = keysOnlyTuple;
this.indexAccessors = indexAccessors;
this.deletedKeysBTreeAccessors = deletedKeysBTreeAccessors;
+ this.deletedKeysBtreeLeafFrameFactory = deletedKeysBtreeLeafFrameFactory;
this.includeMemComponent = includeMemComponent;
this.operationalComponents = operationalComponents;
this.lsmHarness = lsmHarness;
@@ -113,6 +117,10 @@
public List<IIndexAccessor> getDeletedKeysBTreeAccessors() {
return deletedKeysBTreeAccessors;
}
+
+ public ITreeIndexFrameFactory getgetDeletedKeysBTreeLeafFrameFactory() {
+ return deletedKeysBtreeLeafFrameFactory;
+ }
public PermutingTupleReference getKeysOnlyTuple() {
return keysOnlyTuple;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java
index 3cc5deb..1b293eb 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java
@@ -17,6 +17,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.common.api.IInMemoryFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IInMemoryBufferCache;
@@ -36,16 +37,16 @@
public PartitionedLSMInvertedIndex(IInMemoryBufferCache memBufferCache,
IInMemoryFreePageManager memFreePageManager, OnDiskInvertedIndexFactory diskInvIndexFactory,
- BTreeFactory deletedKeysBTreeFactory, ILSMIndexFileManager fileManager,
- IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
+ BTreeFactory deletedKeysBTreeFactory, BloomFilterFactory bloomFilterFactory,
+ ILSMIndexFileManager fileManager, IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
IBinaryComparatorFactory[] tokenCmpFactories, IBinaryTokenizerFactory tokenizerFactory,
ILSMMergePolicy mergePolicy, ILSMOperationTrackerFactory opTrackerFactory,
ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider)
throws IndexException {
- super(memBufferCache, memFreePageManager, diskInvIndexFactory, deletedKeysBTreeFactory, fileManager,
- diskFileMapProvider, invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories,
- tokenizerFactory, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ super(memBufferCache, memFreePageManager, diskInvIndexFactory, deletedKeysBTreeFactory, bloomFilterFactory,
+ fileManager, diskFileMapProvider, invListTypeTraits, invListCmpFactories, tokenTypeTraits,
+ tokenCmpFactories, tokenizerFactory, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider);
}
protected InMemoryInvertedIndex createInMemoryInvertedIndex(IInMemoryBufferCache memBufferCache)
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
index 668250c..d5a074e 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
@@ -191,7 +191,8 @@
}
@Override
- public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput) throws IndexException {
+ public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
+ throws IndexException {
throw new UnsupportedOperationException("Bulk load not supported by in-memory inverted index.");
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
index f1552eb..afeaf90 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
@@ -76,7 +76,7 @@
protected final int invListEndPageIdField;
protected final int invListStartOffField;
protected final int invListNumElementsField;
-
+
// Type traits to be appended to the token type trait which finally form the BTree field type traits.
protected static final ITypeTraits[] btreeValueTypeTraits = new ITypeTraits[4];
static {
@@ -283,7 +283,7 @@
btreeTuple.getFieldStart(invListNumElementsField));
listCursor.reset(startPageId, endPageId, startOff, numElements);
}
-
+
public final class OnDiskInvertedIndexBulkLoader implements IIndexBulkLoader {
private final ArrayTupleBuilder btreeTupleBuilder;
private final ArrayTupleReference btreeTupleReference;
@@ -302,8 +302,8 @@
private final boolean verifyInput;
private final MultiComparator allCmp;
- public OnDiskInvertedIndexBulkLoader(float btreeFillFactor, boolean verifyInput, int startPageId, int fileId)
- throws IndexException, HyracksDataException {
+ public OnDiskInvertedIndexBulkLoader(float btreeFillFactor, boolean verifyInput, long numElementsHint,
+ int startPageId, int fileId) throws IndexException, HyracksDataException {
this.verifyInput = verifyInput;
this.tokenCmp = MultiComparator.create(btree.getComparatorFactories());
this.invListCmp = MultiComparator.create(invListCmpFactories);
@@ -316,7 +316,7 @@
this.btreeTupleReference = new ArrayTupleReference();
this.lastTupleBuilder = new ArrayTupleBuilder(numTokenFields + numInvListKeys);
this.lastTuple = new ArrayTupleReference();
- this.btreeBulkloader = btree.createBulkLoader(btreeFillFactor, verifyInput);
+ this.btreeBulkloader = btree.createBulkLoader(btreeFillFactor, verifyInput, numElementsHint);
currentPageId = startPageId;
currentPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
currentPage.acquireWriteLatch();
@@ -477,7 +477,7 @@
this.index = index;
this.searcher = searcher;
}
-
+
@Override
public IIndexCursor createSearchCursor() {
return new OnDiskInvertedIndexSearchCursor(searcher, index.getInvListTypeTraits().length);
@@ -563,9 +563,10 @@
}
@Override
- public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput) throws IndexException {
+ public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
+ throws IndexException {
try {
- return new OnDiskInvertedIndexBulkLoader(fillFactor, verifyInput, rootPageId, fileId);
+ return new OnDiskInvertedIndexBulkLoader(fillFactor, verifyInput, numElementsHint, rootPageId, fileId);
} catch (HyracksDataException e) {
throw new InvertedIndexException(e);
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
index 8a4f989..79c8ccf 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
@@ -21,6 +21,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.io.IIOManager;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeException;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
@@ -141,6 +142,13 @@
BTreeFactory deletedKeysBTreeFactory = createDeletedKeysBTreeFactory(diskFileMapProvider, invListTypeTraits,
invListCmpFactories, diskBufferCache);
+ int[] bloomFilterKeyFields = new int[invListCmpFactories.length];
+ for (int i = 0; i < invListCmpFactories.length; i++) {
+ bloomFilterKeyFields[i] = i;
+ }
+ BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider,
+ bloomFilterKeyFields);
+
FileReference onDiskDirFileRef = new FileReference(new File(onDiskDir));
LSMInvertedIndexFileManager fileManager = new LSMInvertedIndexFileManager(ioManager, diskFileMapProvider,
onDiskDirFileRef, deletedKeysBTreeFactory, startIODeviceIndex);
@@ -152,9 +160,9 @@
tokenCmpFactories, fileManager);
LSMInvertedIndex invIndex = new LSMInvertedIndex(memBufferCache, memFreePageManager, invIndexFactory,
- deletedKeysBTreeFactory, fileManager, diskFileMapProvider, invListTypeTraits, invListCmpFactories,
- tokenTypeTraits, tokenCmpFactories, tokenizerFactory, mergePolicy, opTrackerFactory, ioScheduler,
- ioOpCallbackProvider);
+ deletedKeysBTreeFactory, bloomFilterFactory, fileManager, diskFileMapProvider, invListTypeTraits,
+ invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, mergePolicy,
+ opTrackerFactory, ioScheduler, ioOpCallbackProvider);
return invIndex;
}
@@ -184,6 +192,13 @@
BTreeFactory deletedKeysBTreeFactory = createDeletedKeysBTreeFactory(diskFileMapProvider, invListTypeTraits,
invListCmpFactories, diskBufferCache);
+ int[] bloomFilterKeyFields = new int[invListCmpFactories.length];
+ for (int i = 0; i < invListCmpFactories.length; i++) {
+ bloomFilterKeyFields[i] = i;
+ }
+ BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider,
+ bloomFilterKeyFields);
+
FileReference onDiskDirFileRef = new FileReference(new File(onDiskDir));
LSMInvertedIndexFileManager fileManager = new LSMInvertedIndexFileManager(ioManager, diskFileMapProvider,
onDiskDirFileRef, deletedKeysBTreeFactory, startIODeviceIndex);
@@ -195,9 +210,9 @@
tokenTypeTraits, tokenCmpFactories, fileManager);
PartitionedLSMInvertedIndex invIndex = new PartitionedLSMInvertedIndex(memBufferCache, memFreePageManager,
- invIndexFactory, deletedKeysBTreeFactory, fileManager, diskFileMapProvider, invListTypeTraits,
- invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ invIndexFactory, deletedKeysBTreeFactory, bloomFilterFactory, fileManager, diskFileMapProvider,
+ invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory,
+ mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider);
return invIndex;
}
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
index 5a1482f..23137ab 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
@@ -67,10 +67,6 @@
protected final LSMRTreeMutableComponent mutableComponent;
protected final IInMemoryBufferCache memBufferCache;
- // This is used to estimate number of tuples in the memory RTree and BTree
- // for efficient memory allocation in the sort operation prior to flushing
- protected int memRTreeTuples;
- protected int memBTreeTuples;
protected TreeTupleSorter rTreeTupleSorter;
// On-disk components.
@@ -115,8 +111,6 @@
this.linearizer = linearizer;
this.comparatorFields = comparatorFields;
this.linearizerArray = linearizerArray;
- memRTreeTuples = 0;
- memBTreeTuples = 0;
rTreeTupleSorter = null;
}
@@ -226,20 +220,24 @@
}
protected LSMRTreeImmutableComponent createDiskComponent(ILSMComponentFactory factory, FileReference insertFileRef,
- FileReference deleteFileRef, boolean createComponent) throws HyracksDataException, IndexException {
+ FileReference deleteFileRef, FileReference bloomFilterFileRef, boolean createComponent)
+ throws HyracksDataException, IndexException {
// Create new tree instance.
LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) factory
- .createLSMComponentInstance(new LSMComponentFileReferences(insertFileRef, deleteFileRef));
+ .createLSMComponentInstance(new LSMComponentFileReferences(insertFileRef, deleteFileRef,
+ bloomFilterFileRef));
if (createComponent) {
component.getRTree().create();
if (component.getBTree() != null) {
component.getBTree().create();
+ component.getBloomFilter().create();
}
}
// Tree will be closed during cleanup of merge().
component.getRTree().activate();
if (component.getBTree() != null) {
component.getBTree().activate();
+ component.getBloomFilter().activate();
}
return component;
}
@@ -302,7 +300,6 @@
if (foundTupleInMemoryBTree) {
try {
ctx.memBTreeAccessor.delete(tuple);
- memBTreeTuples--;
} catch (BTreeNonExistentKeyException e) {
// Tuple has been deleted in the meantime. Do nothing.
// This normally shouldn't happen if we are dealing with
@@ -312,13 +309,11 @@
}
} else {
ctx.memRTreeAccessor.insert(tuple);
- memRTreeTuples++;
}
} else {
try {
ctx.memBTreeAccessor.insert(tuple);
- memBTreeTuples++;
} catch (BTreeDuplicateKeyException e) {
// Do nothing, because one delete tuple is enough to indicate
// that all the corresponding insert tuples are deleted
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java
index e43e525..3bffb43 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java
@@ -22,7 +22,13 @@
import edu.uci.ics.hyracks.api.dataflow.value.ILinearizeComparatorFactory;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomCalculations;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.api.IInMemoryFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
@@ -56,6 +62,7 @@
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTreeSearchCursor;
import edu.uci.ics.hyracks.storage.am.rtree.impls.SearchPredicate;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
public class LSMRTree extends AbstractLSMRTree {
@@ -64,16 +71,17 @@
ITreeIndexFrameFactory rtreeInteriorFrameFactory, ITreeIndexFrameFactory rtreeLeafFrameFactory,
ITreeIndexFrameFactory btreeInteriorFrameFactory, ITreeIndexFrameFactory btreeLeafFrameFactory,
ILSMIndexFileManager fileNameManager, TreeIndexFactory<RTree> diskRTreeFactory,
- TreeIndexFactory<BTree> diskBTreeFactory, IFileMapProvider diskFileMapProvider, int fieldCount,
- IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeCmpFactories,
- ILinearizeComparatorFactory linearizer, int[] comparatorFields, IBinaryComparatorFactory[] linearizerArray,
- ILSMMergePolicy mergePolicy, ILSMOperationTrackerFactory opTrackerFactory,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
+ TreeIndexFactory<BTree> diskBTreeFactory, BloomFilterFactory bloomFilterFactory,
+ IFileMapProvider diskFileMapProvider, int fieldCount, IBinaryComparatorFactory[] rtreeCmpFactories,
+ IBinaryComparatorFactory[] btreeCmpFactories, ILinearizeComparatorFactory linearizer,
+ int[] comparatorFields, IBinaryComparatorFactory[] linearizerArray, ILSMMergePolicy mergePolicy,
+ ILSMOperationTrackerFactory opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
+ ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
super(memBufferCache, memFreePageManager, rtreeInteriorFrameFactory, rtreeLeafFrameFactory,
btreeInteriorFrameFactory, btreeLeafFrameFactory, fileNameManager, diskRTreeFactory,
- new LSMRTreeComponentFactory(diskRTreeFactory, diskBTreeFactory), diskFileMapProvider, fieldCount,
- rtreeCmpFactories, btreeCmpFactories, linearizer, comparatorFields, linearizerArray, mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ new LSMRTreeComponentFactory(diskRTreeFactory, diskBTreeFactory, bloomFilterFactory),
+ diskFileMapProvider, fieldCount, rtreeCmpFactories, btreeCmpFactories, linearizer, comparatorFields,
+ linearizerArray, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider);
}
/**
@@ -100,7 +108,8 @@
try {
component = createDiskComponent(componentFactory,
lsmComonentFileReference.getInsertIndexFileReference(),
- lsmComonentFileReference.getDeleteIndexFileReference(), false);
+ lsmComonentFileReference.getDeleteIndexFileReference(),
+ lsmComonentFileReference.getBloomFilterFileReference(), false);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
@@ -117,8 +126,10 @@
LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) c;
RTree rtree = component.getRTree();
BTree btree = component.getBTree();
+ BloomFilter bloomFilter = component.getBloomFilter();
rtree.deactivate();
btree.deactivate();
+ bloomFilter.deactivate();
}
isActivated = false;
}
@@ -135,6 +146,7 @@
for (ILSMComponent c : immutableComponents) {
LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) c;
component.getBTree().destroy();
+ component.getBloomFilter().destroy();
component.getRTree().destroy();
}
fileManager.deleteDirs();
@@ -147,8 +159,10 @@
for (ILSMComponent c : immutableComponents) {
LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) c;
component.getBTree().deactivate();
+ component.getBloomFilter().deactivate();
component.getRTree().deactivate();
component.getBTree().destroy();
+ component.getBloomFilter().destroy();
component.getRTree().destroy();
}
immutableComponents.clear();
@@ -201,13 +215,14 @@
rctx.getComponentHolder().addAll(ctx.getComponentHolder());
LSMRTreeAccessor accessor = new LSMRTreeAccessor(lsmHarness, rctx);
ioScheduler.scheduleOperation(new LSMRTreeFlushOperation(accessor, flushingComponent, componentFileRefs
- .getInsertIndexFileReference(), componentFileRefs.getDeleteIndexFileReference(), callback));
+ .getInsertIndexFileReference(), componentFileRefs.getDeleteIndexFileReference(), componentFileRefs
+ .getBloomFilterFileReference(), callback));
}
@Override
public ILSMComponent flush(ILSMIOOperation operation) throws HyracksDataException, IndexException {
LSMRTreeFlushOperation flushOp = (LSMRTreeFlushOperation) operation;
- LSMRTreeMutableComponent flushingComponent = flushOp.getFlushingComponent();
+ LSMRTreeMutableComponent flushingComponent = (LSMRTreeMutableComponent) flushOp.getFlushingComponent();
// Renaming order is critical because we use assume ordering when we
// read the file names when we open the tree.
// The RTree should be renamed before the BTree.
@@ -219,7 +234,7 @@
SearchPredicate rtreeNullPredicate = new SearchPredicate(null, null);
memRTreeAccessor.search(rtreeScanCursor, rtreeNullPredicate);
LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, flushOp.getRTreeFlushTarget(),
- flushOp.getBTreeFlushTarget(), true);
+ flushOp.getBTreeFlushTarget(), flushOp.getBloomFilterFlushTarget(), true);
RTree diskRTree = component.getRTree();
IIndexBulkLoader rTreeBulkloader;
ITreeIndexCursor cursor;
@@ -227,9 +242,9 @@
IBinaryComparatorFactory[] linearizerArray = { linearizer };
if (rTreeTupleSorter == null) {
- rTreeTupleSorter = new TreeTupleSorter(memRTreeTuples, flushingComponent.getRTree().getFileId(),
- linearizerArray, rtreeLeafFrameFactory.createFrame(), rtreeLeafFrameFactory.createFrame(),
- flushingComponent.getRTree().getBufferCache(), comparatorFields);
+ rTreeTupleSorter = new TreeTupleSorter(flushingComponent.getRTree().getFileId(), linearizerArray,
+ rtreeLeafFrameFactory.createFrame(), rtreeLeafFrameFactory.createFrame(), flushingComponent
+ .getRTree().getBufferCache(), comparatorFields);
} else {
rTreeTupleSorter.reset();
}
@@ -237,11 +252,9 @@
// RTree.
boolean isEmpty = true;
- if (rtreeScanCursor.hasNext()) {
- isEmpty = false;
- }
try {
while (rtreeScanCursor.hasNext()) {
+ isEmpty = false;
rtreeScanCursor.next();
rTreeTupleSorter.insertTupleEntry(rtreeScanCursor.getPageId(), rtreeScanCursor.getTupleOffset());
}
@@ -250,44 +263,68 @@
}
if (!isEmpty) {
rTreeTupleSorter.sort();
- }
- rTreeBulkloader = diskRTree.createBulkLoader(1.0f, false);
- cursor = rTreeTupleSorter;
- try {
- while (cursor.hasNext()) {
- cursor.next();
- ITupleReference frameTuple = cursor.getTuple();
- rTreeBulkloader.add(frameTuple);
+ rTreeBulkloader = diskRTree.createBulkLoader(1.0f, false, 0L);
+ cursor = rTreeTupleSorter;
+
+ try {
+ while (cursor.hasNext()) {
+ cursor.next();
+ ITupleReference frameTuple = cursor.getTuple();
+ rTreeBulkloader.add(frameTuple);
+ }
+ } finally {
+ cursor.close();
}
- } finally {
- cursor.close();
+ rTreeBulkloader.end();
}
- rTreeBulkloader.end();
- // scan the memory BTree
ITreeIndexAccessor memBTreeAccessor = flushingComponent.getBTree().createAccessor(
NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- IIndexCursor btreeScanCursor = memBTreeAccessor.createSearchCursor();
RangePredicate btreeNullPredicate = new RangePredicate(null, null, true, true, null, null);
- memBTreeAccessor.search(btreeScanCursor, btreeNullPredicate);
- BTree diskBTree = component.getBTree();
-
- // BulkLoad the tuples from the in-memory tree into the new disk BTree.
- IIndexBulkLoader bTreeBulkloader = diskBTree.createBulkLoader(1.0f, false);
+ IIndexCursor btreeCountingCursor = ((BTreeAccessor) memBTreeAccessor).createCountingSearchCursor();
+ memBTreeAccessor.search(btreeCountingCursor, btreeNullPredicate);
+ long numBTreeTuples = 0L;
try {
- while (btreeScanCursor.hasNext()) {
- btreeScanCursor.next();
- ITupleReference frameTuple = btreeScanCursor.getTuple();
- bTreeBulkloader.add(frameTuple);
+ while (btreeCountingCursor.hasNext()) {
+ btreeCountingCursor.next();
+ ITupleReference countTuple = btreeCountingCursor.getTuple();
+ numBTreeTuples = IntegerSerializerDeserializer.getInt(countTuple.getFieldData(0),
+ countTuple.getFieldStart(0));
}
} finally {
- btreeScanCursor.close();
+ btreeCountingCursor.close();
}
- bTreeBulkloader.end();
- memRTreeTuples = 0;
- memBTreeTuples = 0;
- return new LSMRTreeImmutableComponent(diskRTree, diskBTree);
+
+ if (numBTreeTuples > 0) {
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numBTreeTuples);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE);
+
+ IIndexCursor btreeScanCursor = memBTreeAccessor.createSearchCursor();
+ memBTreeAccessor.search(btreeScanCursor, btreeNullPredicate);
+ BTree diskBTree = component.getBTree();
+
+ // BulkLoad the tuples from the in-memory tree into the new disk BTree.
+ IIndexBulkLoader bTreeBulkloader = diskBTree.createBulkLoader(1.0f, false, numBTreeTuples);
+ IIndexBulkLoader builder = component.getBloomFilter().createBuilder(numBTreeTuples,
+ bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
+ // scan the memory BTree
+ try {
+ while (btreeScanCursor.hasNext()) {
+ btreeScanCursor.next();
+ ITupleReference frameTuple = btreeScanCursor.getTuple();
+ bTreeBulkloader.add(frameTuple);
+ builder.add(frameTuple);
+ }
+ } finally {
+ btreeScanCursor.close();
+ builder.end();
+ }
+ bTreeBulkloader.end();
+ }
+
+ return component;
}
@Override
@@ -308,7 +345,7 @@
ILSMIndexAccessorInternal accessor = new LSMRTreeAccessor(lsmHarness, rctx);
ioScheduler.scheduleOperation(new LSMRTreeMergeOperation((ILSMIndexAccessorInternal) accessor,
mergingComponents, cursor, relMergeFileRefs.getInsertIndexFileReference(), relMergeFileRefs
- .getDeleteIndexFileReference(), callback));
+ .getDeleteIndexFileReference(), relMergeFileRefs.getBloomFilterFileReference(), callback));
}
@Override
@@ -318,34 +355,21 @@
ITreeIndexCursor cursor = mergeOp.getCursor();
mergedComponents.addAll(mergeOp.getMergingComponents());
- // Nothing to merge.
- if (mergedComponents.size() <= 1) {
- cursor.close();
- return null;
- }
+ LSMRTreeImmutableComponent mergedComponent = createDiskComponent(componentFactory,
+ mergeOp.getRTreeMergeTarget(), mergeOp.getBTreeMergeTarget(), mergeOp.getBloomFilterMergeTarget(), true);
+ IIndexBulkLoader bulkLoader = mergedComponent.getRTree().createBulkLoader(1.0f, false, 0L);
- // Bulk load the tuples from all on-disk RTrees into the new RTree.
- LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, mergeOp.getRTreeMergeTarget(),
- mergeOp.getBTreeMergeTarget(), true);
- RTree mergedRTree = component.getRTree();
- BTree mergedBTree = component.getBTree();
-
- IIndexBulkLoader bulkloader = mergedRTree.createBulkLoader(1.0f, false);
try {
while (cursor.hasNext()) {
cursor.next();
ITupleReference frameTuple = cursor.getTuple();
- bulkloader.add(frameTuple);
+ bulkLoader.add(frameTuple);
}
} finally {
cursor.close();
}
- bulkloader.end();
-
- // Load an empty BTree tree.
- mergedBTree.createBulkLoader(1.0f, false).end();
-
- return new LSMRTreeImmutableComponent(mergedRTree, mergedBTree);
+ bulkLoader.end();
+ return mergedComponent;
}
@Override
@@ -373,19 +397,21 @@
private ILSMComponent createBulkLoadTarget() throws HyracksDataException, IndexException {
LSMComponentFileReferences componentFileRefs = fileManager.getRelFlushFileReference();
return createDiskComponent(componentFactory, componentFileRefs.getInsertIndexFileReference(),
- componentFileRefs.getDeleteIndexFileReference(), true);
+ componentFileRefs.getDeleteIndexFileReference(), componentFileRefs.getBloomFilterFileReference(), true);
}
@Override
- public IIndexBulkLoader createBulkLoader(float fillLevel, boolean verifyInput) throws TreeIndexException {
- return new LSMRTreeBulkLoader(fillLevel, verifyInput);
+ public IIndexBulkLoader createBulkLoader(float fillLevel, boolean verifyInput, long numElementsHint)
+ throws TreeIndexException {
+ return new LSMRTreeBulkLoader(fillLevel, verifyInput, numElementsHint);
}
public class LSMRTreeBulkLoader implements IIndexBulkLoader {
private final ILSMComponent component;
private final IIndexBulkLoader bulkLoader;
- public LSMRTreeBulkLoader(float fillFactor, boolean verifyInput) throws TreeIndexException {
+ public LSMRTreeBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
+ throws TreeIndexException {
// Note that by using a flush target file name, we state that the
// new bulk loaded tree is "newer" than any other merged tree.
try {
@@ -395,7 +421,8 @@
} catch (IndexException e) {
throw new TreeIndexException(e);
}
- bulkLoader = ((LSMRTreeImmutableComponent) component).getRTree().createBulkLoader(fillFactor, verifyInput);
+ bulkLoader = ((LSMRTreeImmutableComponent) component).getRTree().createBulkLoader(fillFactor, verifyInput,
+ numElementsHint);
}
@Override
@@ -425,12 +452,20 @@
((LSMRTreeImmutableComponent) component).getRTree().destroy();
((LSMRTreeImmutableComponent) component).getBTree().deactivate();
((LSMRTreeImmutableComponent) component).getBTree().destroy();
+ ((LSMRTreeImmutableComponent) component).getBloomFilter().deactivate();
+ ((LSMRTreeImmutableComponent) component).getBloomFilter().destroy();
}
}
@Override
public void markAsValid(ILSMComponent lsmComponent) throws HyracksDataException {
LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) lsmComponent;
+ // Flush the bloom filter first.
+ int fileId = component.getBloomFilter().getFileId();
+ IBufferCache bufferCache = component.getBTree().getBufferCache();
+ int startPage = 0;
+ int maxPage = component.getBloomFilter().getNumPages();
+ forceFlushDirtyPages(bufferCache, fileId, startPage, maxPage);
forceFlushDirtyPages(component.getRTree());
markAsValidInternal(component.getRTree());
forceFlushDirtyPages(component.getBTree());
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
index 41ffd3d..2a463c8 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
@@ -11,10 +11,12 @@
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMHarness;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeInteriorFrame;
import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTreeSearchCursor;
@@ -22,18 +24,11 @@
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
-public abstract class LSMRTreeAbstractCursor {
+public abstract class LSMRTreeAbstractCursor implements ITreeIndexCursor {
protected RTreeSearchCursor[] rtreeCursors;
-
- public abstract void next() throws HyracksDataException;
-
- public abstract boolean hasNext() throws HyracksDataException;
-
- public abstract void reset() throws HyracksDataException;
-
protected boolean open = false;
- protected BTreeRangeSearchCursor[] btreeCursors;
+ protected ITreeIndexCursor[] btreeCursors;
protected ITreeIndexAccessor[] diskRTreeAccessors;
protected ITreeIndexAccessor[] diskBTreeAccessors;
private MultiComparator btreeCmp;
@@ -58,6 +53,7 @@
return rtreeCursors[cursorIndex];
}
+ @Override
public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
LSMRTreeCursorInitialState lsmInitialState = (LSMRTreeCursorInitialState) initialState;
btreeCmp = lsmInitialState.getBTreeCmp();
@@ -69,27 +65,42 @@
diskBTreeAccessors = lsmInitialState.getBTreeAccessors();
rtreeCursors = new RTreeSearchCursor[numberOfTrees];
- btreeCursors = new BTreeRangeSearchCursor[numberOfTrees];
+ btreeCursors = new ITreeIndexCursor[numberOfTrees];
- for (int i = 0; i < numberOfTrees; i++) {
+ int i = 0;
+ if (includeMemRTree) {
rtreeCursors[i] = new RTreeSearchCursor((IRTreeInteriorFrame) lsmInitialState
.getRTreeInteriorFrameFactory().createFrame(), (IRTreeLeafFrame) lsmInitialState
.getRTreeLeafFrameFactory().createFrame());
+ // No need for a bloom filter for the in-memory BTree.
btreeCursors[i] = new BTreeRangeSearchCursor((IBTreeLeafFrame) lsmInitialState.getBTreeLeafFrameFactory()
.createFrame(), false);
+ ++i;
}
+ for (; i < numberOfTrees; i++) {
+ rtreeCursors[i] = new RTreeSearchCursor((IRTreeInteriorFrame) lsmInitialState
+ .getRTreeInteriorFrameFactory().createFrame(), (IRTreeLeafFrame) lsmInitialState
+ .getRTreeLeafFrameFactory().createFrame());
+
+ btreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitialState
+ .getBTreeLeafFrameFactory().createFrame(), false,
+ ((LSMRTreeImmutableComponent) operationalComponents.get(i)).getBloomFilter());
+ }
+
rtreeSearchPredicate = (SearchPredicate) searchPred;
btreeRangePredicate = new RangePredicate(null, null, true, true, btreeCmp, btreeCmp);
open = true;
}
+ @Override
public ICachedPage getPage() {
// do nothing
return null;
}
+ @Override
public void close() throws HyracksDataException {
if (!open) {
return;
@@ -111,18 +122,22 @@
open = false;
}
+ @Override
public void setBufferCache(IBufferCache bufferCache) {
// do nothing
}
+ @Override
public void setFileId(int fileId) {
// do nothing
}
+ @Override
public ITupleReference getTuple() {
return frameTuple;
}
+ @Override
public boolean exclusiveLatchNodes() {
return false;
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeComponentFactory.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeComponentFactory.java
index 1681fbd..56e3d28 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeComponentFactory.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeComponentFactory.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.impls;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
@@ -27,16 +29,21 @@
public class LSMRTreeComponentFactory implements ILSMComponentFactory {
private final TreeIndexFactory<RTree> rtreeFactory;
private final TreeIndexFactory<BTree> btreeFactory;
+ private final BloomFilterFactory bloomFilterFactory;
- public LSMRTreeComponentFactory(TreeIndexFactory<RTree> rtreeFactory, TreeIndexFactory<BTree> btreeFactory) {
+ public LSMRTreeComponentFactory(TreeIndexFactory<RTree> rtreeFactory, TreeIndexFactory<BTree> btreeFactory,
+ BloomFilterFactory bloomFilterFactory) {
this.rtreeFactory = rtreeFactory;
this.btreeFactory = btreeFactory;
+ this.bloomFilterFactory = bloomFilterFactory;
}
@Override
- public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException {
+ public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException,
+ HyracksDataException {
return new LSMRTreeImmutableComponent(rtreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()),
- btreeFactory.createIndexInstance(cfr.getDeleteIndexFileReference()));
+ btreeFactory.createIndexInstance(cfr.getDeleteIndexFileReference()),
+ bloomFilterFactory.createBloomFiltertInstance(cfr.getBloomFilterFileReference()));
}
@Override
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java
index 7ceecad..e698990 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java
@@ -30,12 +30,12 @@
import edu.uci.ics.hyracks.api.io.IODeviceHandle;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractLSMIndexFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
-public class LSMRTreeFileManager extends LSMIndexFileManager {
+public class LSMRTreeFileManager extends AbstractLSMIndexFileManager {
private static final String RTREE_STRING = "r";
private static final String BTREE_STRING = "b";
@@ -69,7 +69,8 @@
String baseName = baseDir + ts + SPLIT_STRING + ts;
// Begin timestamp and end timestamp are identical since it is a flush
return new LSMComponentFileReferences(createFlushFile(baseName + SPLIT_STRING + RTREE_STRING),
- createFlushFile(baseName + SPLIT_STRING + BTREE_STRING));
+ createFlushFile(baseName + SPLIT_STRING + BTREE_STRING), createFlushFile(baseName + SPLIT_STRING
+ + BLOOM_FILTER_STRING));
}
@Override
@@ -81,7 +82,8 @@
String baseName = baseDir + firstTimestampRange[0] + SPLIT_STRING + lastTimestampRange[1];
// Get the range of timestamps by taking the earliest and the latest timestamps
return new LSMComponentFileReferences(createMergeFile(baseName + SPLIT_STRING + RTREE_STRING),
- createMergeFile(baseName + SPLIT_STRING + BTREE_STRING));
+ createMergeFile(baseName + SPLIT_STRING + BTREE_STRING), createMergeFile(baseName + SPLIT_STRING
+ + BLOOM_FILTER_STRING));
}
@Override
@@ -89,15 +91,37 @@
List<LSMComponentFileReferences> validFiles = new ArrayList<LSMComponentFileReferences>();
ArrayList<ComparableFileName> allRTreeFiles = new ArrayList<ComparableFileName>();
ArrayList<ComparableFileName> allBTreeFiles = new ArrayList<ComparableFileName>();
+ ArrayList<ComparableFileName> allBloomFilterFiles = new ArrayList<ComparableFileName>();
// Gather files from all IODeviceHandles.
for (IODeviceHandle dev : ioManager.getIODevices()) {
- cleanupAndGetValidFilesInternal(dev, btreeFilter, btreeFactory, allBTreeFiles);
- HashSet<String> btreeFilesSet = new HashSet<String>();
- for (ComparableFileName cmpFileName : allBTreeFiles) {
+ cleanupAndGetValidFilesInternal(dev, bloomFilterFilter, null, allBloomFilterFiles);
+ HashSet<String> bloomFilterFilesSet = new HashSet<String>();
+ for (ComparableFileName cmpFileName : allBloomFilterFiles) {
int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
- btreeFilesSet.add(cmpFileName.fileName.substring(0, index));
+ bloomFilterFilesSet.add(cmpFileName.fileName.substring(0, index));
}
+
+ // List of valid BTree files that may or may not have a bloom filter buddy. Will check for buddies below.
+ ArrayList<ComparableFileName> tmpAllBTreeFiles = new ArrayList<ComparableFileName>();
+ cleanupAndGetValidFilesInternal(dev, btreeFilter, btreeFactory, tmpAllBTreeFiles);
+ // Look for buddy bloom filters for all valid BTrees.
+ // If no buddy is found, delete the file, otherwise add the BTree to allBTreeFiles.
+ HashSet<String> btreeFilesSet = new HashSet<String>();
+ for (ComparableFileName cmpFileName : tmpAllBTreeFiles) {
+ int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
+ String file = cmpFileName.fileName.substring(0, index);
+ if (bloomFilterFilesSet.contains(file)) {
+ allBTreeFiles.add(cmpFileName);
+ btreeFilesSet.add(cmpFileName.fileName.substring(0, index));
+ } else {
+ // Couldn't find the corresponding bloom filter file; thus, delete
+ // the BTree file.
+ File invalidBTreeFile = new File(cmpFileName.fullPath);
+ invalidBTreeFile.delete();
+ }
+ }
+
// List of valid RTree files that may or may not have a BTree buddy. Will check for buddies below.
ArrayList<ComparableFileName> tmpAllRTreeFiles = new ArrayList<ComparableFileName>();
cleanupAndGetValidFilesInternal(dev, rtreeFilter, rtreeFactory, tmpAllRTreeFiles);
@@ -117,23 +141,26 @@
}
}
// Sanity check.
- if (allRTreeFiles.size() != allBTreeFiles.size()) {
- throw new HyracksDataException("Unequal number of valid RTree and BTree files found. Aborting cleanup.");
+ if (allRTreeFiles.size() != allBTreeFiles.size() || allBTreeFiles.size() != allBloomFilterFiles.size()) {
+ throw new HyracksDataException(
+ "Unequal number of valid RTree, BTree, and Bloom Filter files found. Aborting cleanup.");
}
// Trivial cases.
- if (allRTreeFiles.isEmpty() || allBTreeFiles.isEmpty()) {
+ if (allRTreeFiles.isEmpty() || allBTreeFiles.isEmpty() || allBloomFilterFiles.isEmpty()) {
return validFiles;
}
- if (allRTreeFiles.size() == 1 && allBTreeFiles.size() == 1) {
- validFiles.add(new LSMComponentFileReferences(allRTreeFiles.get(0).fileRef, allBTreeFiles.get(0).fileRef));
+ if (allRTreeFiles.size() == 1 && allBTreeFiles.size() == 1 && allBloomFilterFiles.size() == 1) {
+ validFiles.add(new LSMComponentFileReferences(allRTreeFiles.get(0).fileRef, allBTreeFiles.get(0).fileRef,
+ allBloomFilterFiles.get(0).fileRef));
return validFiles;
}
// Sorts files names from earliest to latest timestamp.
Collections.sort(allRTreeFiles);
Collections.sort(allBTreeFiles);
+ Collections.sort(allBloomFilterFiles);
List<ComparableFileName> validComparableRTreeFiles = new ArrayList<ComparableFileName>();
ComparableFileName lastRTree = allRTreeFiles.get(0);
@@ -143,25 +170,37 @@
ComparableFileName lastBTree = allBTreeFiles.get(0);
validComparableBTreeFiles.add(lastBTree);
+ List<ComparableFileName> validComparableBloomFilterFiles = new ArrayList<ComparableFileName>();
+ ComparableFileName lastBloomFilter = allBloomFilterFiles.get(0);
+ validComparableBloomFilterFiles.add(lastBloomFilter);
+
for (int i = 1; i < allRTreeFiles.size(); i++) {
ComparableFileName currentRTree = allRTreeFiles.get(i);
ComparableFileName currentBTree = allBTreeFiles.get(i);
+ ComparableFileName currentBloomFilter = allBloomFilterFiles.get(i);
// Current start timestamp is greater than last stop timestamp.
if (currentRTree.interval[0].compareTo(lastRTree.interval[1]) > 0
- && currentBTree.interval[0].compareTo(lastBTree.interval[1]) > 0) {
+ && currentBTree.interval[0].compareTo(lastBTree.interval[1]) > 0
+ && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[1]) > 0) {
validComparableRTreeFiles.add(currentRTree);
validComparableBTreeFiles.add(currentBTree);
+ validComparableBloomFilterFiles.add(currentBloomFilter);
lastRTree = currentRTree;
lastBTree = currentBTree;
+ lastBloomFilter = currentBloomFilter;
} else if (currentRTree.interval[0].compareTo(lastRTree.interval[0]) >= 0
&& currentRTree.interval[1].compareTo(lastRTree.interval[1]) <= 0
&& currentBTree.interval[0].compareTo(lastBTree.interval[0]) >= 0
- && currentBTree.interval[1].compareTo(lastBTree.interval[1]) <= 0) {
+ && currentBTree.interval[1].compareTo(lastBTree.interval[1]) <= 0
+ && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[0]) >= 0
+ && currentBloomFilter.interval[1].compareTo(lastBloomFilter.interval[1]) <= 0) {
// Invalid files are completely contained in last interval.
File invalidRTreeFile = new File(currentRTree.fullPath);
invalidRTreeFile.delete();
File invalidBTreeFile = new File(currentBTree.fullPath);
invalidBTreeFile.delete();
+ File invalidBloomFilterFile = new File(currentBloomFilter.fullPath);
+ invalidBloomFilterFile.delete();
} else {
// This scenario should not be possible.
throw new HyracksDataException("Found LSM files with overlapping but not contained timetamp intervals.");
@@ -172,13 +211,17 @@
// files come first.
Collections.sort(validComparableRTreeFiles, recencyCmp);
Collections.sort(validComparableBTreeFiles, recencyCmp);
+ Collections.sort(validComparableBloomFilterFiles, recencyCmp);
Iterator<ComparableFileName> rtreeFileIter = validComparableRTreeFiles.iterator();
Iterator<ComparableFileName> btreeFileIter = validComparableBTreeFiles.iterator();
+ Iterator<ComparableFileName> bloomFilterFileIter = validComparableBloomFilterFiles.iterator();
while (rtreeFileIter.hasNext() && btreeFileIter.hasNext()) {
ComparableFileName cmpRTreeFileName = rtreeFileIter.next();
ComparableFileName cmpBTreeFileName = btreeFileIter.next();
- validFiles.add(new LSMComponentFileReferences(cmpRTreeFileName.fileRef, cmpBTreeFileName.fileRef));
+ ComparableFileName cmpBloomFilterFileName = bloomFilterFileIter.next();
+ validFiles.add(new LSMComponentFileReferences(cmpRTreeFileName.fileRef, cmpBTreeFileName.fileRef,
+ cmpBloomFilterFileName.fileRef));
}
return validFiles;
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java
index 8698a1d..7b7f2bc 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java
@@ -8,6 +8,7 @@
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.io.IODeviceHandle;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperation;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessorInternal;
@@ -15,17 +16,20 @@
public class LSMRTreeFlushOperation implements ILSMIOOperation {
private final ILSMIndexAccessorInternal accessor;
- private final LSMRTreeMutableComponent flushingComponent;
+ private final ILSMComponent flushingComponent;
private final FileReference rtreeFlushTarget;
private final FileReference btreeFlushTarget;
+ private final FileReference bloomFilterFlushTarget;
private final ILSMIOOperationCallback callback;
- public LSMRTreeFlushOperation(ILSMIndexAccessorInternal accessor, LSMRTreeMutableComponent flushingComponent,
- FileReference rtreeFlushTarget, FileReference btreeFlushTarget, ILSMIOOperationCallback callback) {
+ public LSMRTreeFlushOperation(ILSMIndexAccessorInternal accessor, ILSMComponent flushingComponent,
+ FileReference rtreeFlushTarget, FileReference btreeFlushTarget, FileReference bloomFilterFlushTarget,
+ ILSMIOOperationCallback callback) {
this.accessor = accessor;
this.flushingComponent = flushingComponent;
this.rtreeFlushTarget = rtreeFlushTarget;
this.btreeFlushTarget = btreeFlushTarget;
+ this.bloomFilterFlushTarget = bloomFilterFlushTarget;
this.callback = callback;
}
@@ -38,7 +42,10 @@
public Set<IODeviceHandle> getWriteDevices() {
Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>();
devs.add(rtreeFlushTarget.getDeviceHandle());
- devs.add(btreeFlushTarget.getDeviceHandle());
+ if (btreeFlushTarget != null) {
+ devs.add(btreeFlushTarget.getDeviceHandle());
+ devs.add(bloomFilterFlushTarget.getDeviceHandle());
+ }
return devs;
}
@@ -60,7 +67,11 @@
return btreeFlushTarget;
}
- public LSMRTreeMutableComponent getFlushingComponent() {
+ public FileReference getBloomFilterFlushTarget() {
+ return bloomFilterFlushTarget;
+ }
+
+ public ILSMComponent getFlushingComponent() {
return flushingComponent;
}
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeImmutableComponent.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeImmutableComponent.java
index afba3a0..8d20c14 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeImmutableComponent.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeImmutableComponent.java
@@ -1,6 +1,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.impls;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractImmutableLSMComponent;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
@@ -8,10 +9,12 @@
public class LSMRTreeImmutableComponent extends AbstractImmutableLSMComponent {
private final RTree rtree;
private final BTree btree;
+ private final BloomFilter bloomFilter;
- public LSMRTreeImmutableComponent(RTree rtree, BTree btree) {
+ public LSMRTreeImmutableComponent(RTree rtree, BTree btree, BloomFilter bloomFilter) {
this.rtree = rtree;
this.btree = btree;
+ this.bloomFilter = bloomFilter;
}
@Override
@@ -21,6 +24,8 @@
if (btree != null) {
btree.deactivate();
btree.destroy();
+ bloomFilter.deactivate();
+ bloomFilter.destroy();
}
}
@@ -32,4 +37,7 @@
return btree;
}
+ public BloomFilter getBloomFilter() {
+ return bloomFilter;
+ }
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java
index 970b253..0e05a93 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java
@@ -1,6 +1,5 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.impls;
-import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -21,16 +20,18 @@
private final ITreeIndexCursor cursor;
private final FileReference rtreeMergeTarget;
private final FileReference btreeMergeTarget;
+ private final FileReference bloomFilterMergeTarget;
private final ILSMIOOperationCallback callback;
public LSMRTreeMergeOperation(ILSMIndexAccessorInternal accessor, List<ILSMComponent> mergingComponents,
ITreeIndexCursor cursor, FileReference rtreeMergeTarget, FileReference btreeMergeTarget,
- ILSMIOOperationCallback callback) {
+ FileReference bloomFilterMergeTarget, ILSMIOOperationCallback callback) {
this.accessor = accessor;
this.mergingComponents = mergingComponents;
this.cursor = cursor;
this.rtreeMergeTarget = rtreeMergeTarget;
this.btreeMergeTarget = btreeMergeTarget;
+ this.bloomFilterMergeTarget = bloomFilterMergeTarget;
this.callback = callback;
}
@@ -40,14 +41,23 @@
for (ILSMComponent o : mergingComponents) {
LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) o;
devs.add(component.getRTree().getFileReference().getDeviceHandle());
- devs.add(component.getBTree().getFileReference().getDeviceHandle());
+ if (component.getBTree() != null) {
+ devs.add(component.getBTree().getFileReference().getDeviceHandle());
+ devs.add(component.getBloomFilter().getFileReference().getDeviceHandle());
+ }
}
return devs;
}
@Override
public Set<IODeviceHandle> getWriteDevices() {
- return Collections.singleton(rtreeMergeTarget.getDeviceHandle());
+ Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>();
+ devs.add(rtreeMergeTarget.getDeviceHandle());
+ if (btreeMergeTarget != null) {
+ devs.add(btreeMergeTarget.getDeviceHandle());
+ devs.add(bloomFilterMergeTarget.getDeviceHandle());
+ }
+ return devs;
}
@Override
@@ -68,6 +78,10 @@
return btreeMergeTarget;
}
+ public FileReference getBloomFilterMergeTarget() {
+ return bloomFilterMergeTarget;
+ }
+
public ITreeIndexCursor getCursor() {
return cursor;
}
@@ -75,5 +89,4 @@
public List<ILSMComponent> getMergingComponents() {
return mergingComponents;
}
-
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
index f5e6f3d..d898f6c 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
@@ -19,11 +19,10 @@
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
-public class LSMRTreeSearchCursor extends LSMRTreeAbstractCursor implements ITreeIndexCursor {
+public class LSMRTreeSearchCursor extends LSMRTreeAbstractCursor {
private int currentCursror;
@@ -64,7 +63,7 @@
}
@Override
- public boolean hasNext() throws HyracksDataException {
+ public boolean hasNext() throws HyracksDataException, IndexException {
if (foundNext) {
return true;
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java
index 46cf050..f691839 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java
@@ -20,11 +20,10 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
-public class LSMRTreeSortedCursor extends LSMRTreeAbstractCursor implements ITreeIndexCursor {
+public class LSMRTreeSortedCursor extends LSMRTreeAbstractCursor {
private ILinearizeComparator linearizeCmp;
private boolean[] depletedRtreeCursors;
@@ -63,7 +62,7 @@
}
@Override
- public boolean hasNext() throws HyracksDataException {
+ public boolean hasNext() throws HyracksDataException, IndexException {
while (!foundNext) {
frameTuple = null;
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java
index 824b8ef..478d076 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java
@@ -52,7 +52,6 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMFlushOperation;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
@@ -103,7 +102,7 @@
LSMRTreeImmutableComponent component;
try {
component = createDiskComponent(componentFactory,
- lsmComonentFileReference.getInsertIndexFileReference(), null, false);
+ lsmComonentFileReference.getInsertIndexFileReference(), null, null, false);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
@@ -205,13 +204,13 @@
opCtx.setOperation(IndexOperation.FLUSH);
opCtx.getComponentHolder().add(flushingComponent);
ILSMIndexAccessorInternal accessor = new LSMRTreeWithAntiMatterTuplesAccessor(lsmHarness, opCtx);
- ioScheduler.scheduleOperation(new LSMFlushOperation(accessor, flushingComponent, relFlushFileRefs
- .getInsertIndexFileReference(), callback));
+ ioScheduler.scheduleOperation(new LSMRTreeFlushOperation(accessor, flushingComponent, relFlushFileRefs
+ .getInsertIndexFileReference(), null, null, callback));
}
@Override
public ILSMComponent flush(ILSMIOOperation operation) throws HyracksDataException, IndexException {
- LSMFlushOperation flushOp = (LSMFlushOperation) operation;
+ LSMRTreeFlushOperation flushOp = (LSMRTreeFlushOperation) operation;
// Renaming order is critical because we use assume ordering when we
// read the file names when we open the tree.
// The RTree should be renamed before the BTree.
@@ -221,8 +220,8 @@
RTreeSearchCursor rtreeScanCursor = (RTreeSearchCursor) memRTreeAccessor.createSearchCursor();
SearchPredicate rtreeNullPredicate = new SearchPredicate(null, null);
memRTreeAccessor.search(rtreeScanCursor, rtreeNullPredicate);
- LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, flushOp.getFlushTarget(), null,
- true);
+ LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, flushOp.getRTreeFlushTarget(),
+ null, null, true);
RTree diskRTree = component.getRTree();
// scan the memory BTree
@@ -235,13 +234,13 @@
// Since the LSM-RTree is used as a secondary assumption, the
// primary key will be the last comparator in the BTree comparators
if (rTreeTupleSorter == null) {
- rTreeTupleSorter = new TreeTupleSorter(memRTreeTuples, flushingComponent.getRTree().getFileId(),
- linearizerArray, rtreeLeafFrameFactory.createFrame(), rtreeLeafFrameFactory.createFrame(),
- flushingComponent.getRTree().getBufferCache(), comparatorFields);
+ rTreeTupleSorter = new TreeTupleSorter(flushingComponent.getRTree().getFileId(), linearizerArray,
+ rtreeLeafFrameFactory.createFrame(), rtreeLeafFrameFactory.createFrame(), flushingComponent
+ .getRTree().getBufferCache(), comparatorFields);
- bTreeTupleSorter = new TreeTupleSorter(memBTreeTuples, flushingComponent.getBTree().getFileId(),
- linearizerArray, btreeLeafFrameFactory.createFrame(), btreeLeafFrameFactory.createFrame(),
- flushingComponent.getBTree().getBufferCache(), comparatorFields);
+ bTreeTupleSorter = new TreeTupleSorter(flushingComponent.getBTree().getFileId(), linearizerArray,
+ btreeLeafFrameFactory.createFrame(), btreeLeafFrameFactory.createFrame(), flushingComponent
+ .getBTree().getBufferCache(), comparatorFields);
} else {
rTreeTupleSorter.reset();
bTreeTupleSorter.reset();
@@ -250,11 +249,9 @@
// RTree.
boolean isEmpty = true;
- if (rtreeScanCursor.hasNext()) {
- isEmpty = false;
- }
try {
while (rtreeScanCursor.hasNext()) {
+ isEmpty = false;
rtreeScanCursor.next();
rTreeTupleSorter.insertTupleEntry(rtreeScanCursor.getPageId(), rtreeScanCursor.getTupleOffset());
}
@@ -266,11 +263,9 @@
}
isEmpty = true;
- if (btreeScanCursor.hasNext()) {
- isEmpty = false;
- }
try {
while (btreeScanCursor.hasNext()) {
+ isEmpty = false;
btreeScanCursor.next();
bTreeTupleSorter.insertTupleEntry(btreeScanCursor.getPageId(), btreeScanCursor.getTupleOffset());
}
@@ -281,9 +276,9 @@
bTreeTupleSorter.sort();
}
- IIndexBulkLoader rTreeBulkloader = diskRTree.createBulkLoader(1.0f, false);
- LSMRTreeFlushCursor cursor = new LSMRTreeFlushCursor(rTreeTupleSorter, bTreeTupleSorter, comparatorFields,
- linearizerArray);
+ IIndexBulkLoader rTreeBulkloader = diskRTree.createBulkLoader(1.0f, false, 0L);
+ LSMRTreeWithAntiMatterTuplesFlushCursor cursor = new LSMRTreeWithAntiMatterTuplesFlushCursor(rTreeTupleSorter,
+ bTreeTupleSorter, comparatorFields, linearizerArray);
cursor.open(null, null);
try {
@@ -298,9 +293,6 @@
}
rTreeBulkloader.end();
-
- memRTreeTuples = 0;
- memBTreeTuples = 0;
return component;
}
@@ -317,7 +309,7 @@
LSMComponentFileReferences relMergeFileRefs = getMergeTargetFileName(mergingComponents);
ILSMIndexAccessorInternal accessor = new LSMRTreeWithAntiMatterTuplesAccessor(lsmHarness, rctx);
ioScheduler.scheduleOperation(new LSMRTreeMergeOperation(accessor, mergingComponents, cursor, relMergeFileRefs
- .getInsertIndexFileReference(), null, callback));
+ .getInsertIndexFileReference(), null, null, callback));
}
@Override
@@ -335,9 +327,9 @@
// Bulk load the tuples from all on-disk RTrees into the new RTree.
LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, mergeOp.getRTreeMergeTarget(),
- null, true);
+ null, null, true);
RTree mergedRTree = component.getRTree();
- IIndexBulkLoader bulkloader = mergedRTree.createBulkLoader(1.0f, false);
+ IIndexBulkLoader bulkloader = mergedRTree.createBulkLoader(1.0f, false, 0L);
try {
while (cursor.hasNext()) {
cursor.next();
@@ -374,20 +366,23 @@
}
@Override
- public IIndexBulkLoader createBulkLoader(float fillLevel, boolean verifyInput) throws TreeIndexException {
- return new LSMRTreeWithAntiMatterTuplesBulkLoader(fillLevel, verifyInput);
+ public IIndexBulkLoader createBulkLoader(float fillLevel, boolean verifyInput, long numElementsHint)
+ throws TreeIndexException {
+ return new LSMRTreeWithAntiMatterTuplesBulkLoader(fillLevel, verifyInput, numElementsHint);
}
private ILSMComponent createBulkLoadTarget() throws HyracksDataException, IndexException {
LSMComponentFileReferences relFlushFileRefs = fileManager.getRelFlushFileReference();
- return createDiskComponent(bulkLoaComponentFactory, relFlushFileRefs.getInsertIndexFileReference(), null, true);
+ return createDiskComponent(bulkLoaComponentFactory, relFlushFileRefs.getInsertIndexFileReference(), null, null,
+ true);
}
public class LSMRTreeWithAntiMatterTuplesBulkLoader implements IIndexBulkLoader {
private final ILSMComponent component;
private final IIndexBulkLoader bulkLoader;
- public LSMRTreeWithAntiMatterTuplesBulkLoader(float fillFactor, boolean verifyInput) throws TreeIndexException {
+ public LSMRTreeWithAntiMatterTuplesBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
+ throws TreeIndexException {
// Note that by using a flush target file name, we state that the
// new bulk loaded tree is "newer" than any other merged tree.
try {
@@ -397,7 +392,8 @@
} catch (IndexException e) {
throw new TreeIndexException(e);
}
- bulkLoader = ((LSMRTreeImmutableComponent) component).getRTree().createBulkLoader(fillFactor, verifyInput);
+ bulkLoader = ((LSMRTreeImmutableComponent) component).getRTree().createBulkLoader(fillFactor, verifyInput,
+ numElementsHint);
}
@Override
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesComponentFactory.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesComponentFactory.java
index 0ca353b..0149800 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesComponentFactory.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesComponentFactory.java
@@ -32,7 +32,8 @@
@Override
public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException {
- return new LSMRTreeImmutableComponent(rtreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()), null);
+ return new LSMRTreeImmutableComponent(rtreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()),
+ null, null);
}
@Override
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFileManager.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFileManager.java
new file mode 100644
index 0000000..10b982f
--- /dev/null
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFileManager.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.rtree.impls;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.api.io.IIOManager;
+import edu.uci.ics.hyracks.api.io.IODeviceHandle;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractLSMIndexFileManager;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
+import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+
+public class LSMRTreeWithAntiMatterTuplesFileManager extends AbstractLSMIndexFileManager {
+
+ private final TreeIndexFactory<? extends ITreeIndex> rtreeFactory;
+
+ public LSMRTreeWithAntiMatterTuplesFileManager(IIOManager ioManager, IFileMapProvider fileMapProvider,
+ FileReference file, TreeIndexFactory<? extends ITreeIndex> rtreeFactory, int startIODeviceIndex) {
+ super(ioManager, fileMapProvider, file, null, startIODeviceIndex);
+ this.rtreeFactory = rtreeFactory;
+ }
+
+ @Override
+ public LSMComponentFileReferences getRelFlushFileReference() {
+ Date date = new Date();
+ String ts = formatter.format(date);
+ // Begin timestamp and end timestamp are identical since it is a flush
+ return new LSMComponentFileReferences(createFlushFile(baseDir + ts + SPLIT_STRING + ts), null, null);
+ }
+
+ @Override
+ public LSMComponentFileReferences getRelMergeFileReference(String firstFileName, String lastFileName)
+ throws HyracksDataException {
+ String[] firstTimestampRange = firstFileName.split(SPLIT_STRING);
+ String[] lastTimestampRange = lastFileName.split(SPLIT_STRING);
+ // Get the range of timestamps by taking the earliest and the latest timestamps
+ return new LSMComponentFileReferences(createMergeFile(baseDir + firstTimestampRange[0] + SPLIT_STRING
+ + lastTimestampRange[1]), null, null);
+ }
+
+ private static FilenameFilter fileNameFilter = new FilenameFilter() {
+ public boolean accept(File dir, String name) {
+ return !name.startsWith(".");
+ }
+ };
+
+ @Override
+ public List<LSMComponentFileReferences> cleanupAndGetValidFiles() throws HyracksDataException, IndexException {
+ List<LSMComponentFileReferences> validFiles = new ArrayList<LSMComponentFileReferences>();
+ ArrayList<ComparableFileName> allFiles = new ArrayList<ComparableFileName>();
+
+ // Gather files from all IODeviceHandles and delete invalid files
+ // There are two types of invalid files:
+ // (1) The isValid flag is not set
+ // (2) The file's interval is contained by some other file
+ // Here, we only filter out (1).
+ for (IODeviceHandle dev : ioManager.getIODevices()) {
+ cleanupAndGetValidFilesInternal(dev, fileNameFilter, rtreeFactory, allFiles);
+ }
+
+ if (allFiles.isEmpty()) {
+ return validFiles;
+ }
+
+ if (allFiles.size() == 1) {
+ validFiles.add(new LSMComponentFileReferences(allFiles.get(0).fileRef, null, null));
+ return validFiles;
+ }
+
+ // Sorts files names from earliest to latest timestamp.
+ Collections.sort(allFiles);
+
+ List<ComparableFileName> validComparableFiles = new ArrayList<ComparableFileName>();
+ ComparableFileName last = allFiles.get(0);
+ validComparableFiles.add(last);
+ for (int i = 1; i < allFiles.size(); i++) {
+ ComparableFileName current = allFiles.get(i);
+ // The current start timestamp is greater than last stop timestamp so current is valid.
+ if (current.interval[0].compareTo(last.interval[1]) > 0) {
+ validComparableFiles.add(current);
+ last = current;
+ } else if (current.interval[0].compareTo(last.interval[0]) >= 0
+ && current.interval[1].compareTo(last.interval[1]) <= 0) {
+ // The current file is completely contained in the interval of the
+ // last file. Thus the last file must contain at least as much information
+ // as the current file, so delete the current file.
+ current.fileRef.delete();
+ } else {
+ // This scenario should not be possible since timestamps are monotonically increasing.
+ throw new HyracksDataException("Found LSM files with overlapping timestamp intervals, "
+ + "but the intervals were not contained by another file.");
+ }
+ }
+
+ // Sort valid files in reverse lexicographical order, such that newer files come first.
+ Collections.sort(validComparableFiles, recencyCmp);
+ for (ComparableFileName cmpFileName : validComparableFiles) {
+ validFiles.add(new LSMComponentFileReferences(cmpFileName.fileRef, null, null));
+ }
+
+ return validFiles;
+ }
+}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushCursor.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFlushCursor.java
similarity index 96%
rename from hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushCursor.java
rename to hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFlushCursor.java
index 65f3f4b..22e6929 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushCursor.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFlushCursor.java
@@ -25,7 +25,7 @@
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
-public class LSMRTreeFlushCursor implements ITreeIndexCursor {
+public class LSMRTreeWithAntiMatterTuplesFlushCursor implements ITreeIndexCursor {
private final TreeTupleSorter rTreeTupleSorter;
private final TreeTupleSorter bTreeTupleSorter;
private final int[] comparatorFields;
@@ -36,7 +36,7 @@
private ITupleReference btreeTuple;
private boolean foundNext = false;
- public LSMRTreeFlushCursor(TreeTupleSorter rTreeTupleSorter, TreeTupleSorter bTreeTupleSorter,
+ public LSMRTreeWithAntiMatterTuplesFlushCursor(TreeTupleSorter rTreeTupleSorter, TreeTupleSorter bTreeTupleSorter,
int[] comparatorFields, IBinaryComparatorFactory[] comparatorFactories) {
this.rTreeTupleSorter = rTreeTupleSorter;
this.bTreeTupleSorter = bTreeTupleSorter;
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/TreeTupleSorter.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/TreeTupleSorter.java
index d63cff9..294c2b8 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/TreeTupleSorter.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/TreeTupleSorter.java
@@ -29,6 +29,7 @@
import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
public class TreeTupleSorter implements ITreeIndexCursor {
+ private final static int INITIAL_SIZE = 1000000;
private int numTuples;
private int currentTupleIndex;
private int[] tPointers;
@@ -38,18 +39,18 @@
private ITreeIndexTupleReference frameTuple1;
private ITreeIndexTupleReference frameTuple2;
private final int fileId;
- private final static int ARRAY_GROWTH = 1000; // Must be at least of size 2
+ private final static int ARRAY_GROWTH = 1000000; // Must be at least of size 2
private final int[] comparatorFields;
private final MultiComparator cmp;
- public TreeTupleSorter(int initialSize, int fileId, IBinaryComparatorFactory[] comparatorFactories,
- ITreeIndexFrame leafFrame1, ITreeIndexFrame leafFrame2, IBufferCache bufferCache, int[] comparatorFields) {
+ public TreeTupleSorter(int fileId, IBinaryComparatorFactory[] comparatorFactories, ITreeIndexFrame leafFrame1,
+ ITreeIndexFrame leafFrame2, IBufferCache bufferCache, int[] comparatorFields) {
this.fileId = fileId;
this.leafFrame1 = leafFrame1;
this.leafFrame2 = leafFrame2;
this.bufferCache = bufferCache;
this.comparatorFields = comparatorFields;
- tPointers = new int[initialSize * 2];
+ tPointers = new int[INITIAL_SIZE * 2];
frameTuple1 = leafFrame1.createTupleReference();
frameTuple2 = leafFrame2.createTupleReference();
currentTupleIndex = 0;
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java
index e4d2e82..6c9fce6 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java
@@ -22,6 +22,7 @@
import edu.uci.ics.hyracks.api.io.IIOManager;
import edu.uci.ics.hyracks.data.std.primitive.DoublePointable;
import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMLeafFrameFactory;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
@@ -39,11 +40,11 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BTreeFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
import edu.uci.ics.hyracks.storage.am.lsm.rtree.impls.LSMRTree;
import edu.uci.ics.hyracks.storage.am.lsm.rtree.impls.LSMRTreeFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.rtree.impls.LSMRTreeWithAntiMatterTuples;
+import edu.uci.ics.hyracks.storage.am.lsm.rtree.impls.LSMRTreeWithAntiMatterTuplesFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.rtree.impls.RTreeFactory;
import edu.uci.ics.hyracks.storage.am.lsm.rtree.tuples.LSMRTreeCopyTupleWriterFactory;
import edu.uci.ics.hyracks.storage.am.lsm.rtree.tuples.LSMRTreeTupleWriterFactory;
@@ -105,13 +106,20 @@
int[] comparatorFields = { 0 };
IBinaryComparatorFactory[] linearizerArray = { linearizeCmpFactory };
+ int[] bloomFilterKeyFields = new int[btreeCmpFactories.length];
+ for (int i = 0; i < btreeCmpFactories.length; i++) {
+ bloomFilterKeyFields[i] = i;
+ }
+ BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider,
+ bloomFilterKeyFields);
+
ILSMIndexFileManager fileNameManager = new LSMRTreeFileManager(ioManager, diskFileMapProvider, file,
diskRTreeFactory, diskBTreeFactory, startIODeviceIndex);
LSMRTree lsmTree = new LSMRTree(memBufferCache, memFreePageManager, rtreeInteriorFrameFactory,
rtreeLeafFrameFactory, btreeInteriorFrameFactory, btreeLeafFrameFactory, fileNameManager,
- diskRTreeFactory, diskBTreeFactory, diskFileMapProvider, typeTraits.length, rtreeCmpFactories,
- btreeCmpFactories, linearizeCmpFactory, comparatorFields, linearizerArray, mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ diskRTreeFactory, diskBTreeFactory, bloomFilterFactory, diskFileMapProvider, typeTraits.length,
+ rtreeCmpFactories, btreeCmpFactories, linearizeCmpFactory, comparatorFields, linearizerArray,
+ mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider);
return lsmTree;
}
@@ -172,8 +180,8 @@
IBinaryComparatorFactory[] linearizerArray = { linearizerCmpFactory,
btreeCmpFactories[btreeCmpFactories.length - 1] };
- ILSMIndexFileManager fileNameManager = new LSMIndexFileManager(ioManager, diskFileMapProvider, file,
- diskRTreeFactory, startIODeviceIndex);
+ ILSMIndexFileManager fileNameManager = new LSMRTreeWithAntiMatterTuplesFileManager(ioManager,
+ diskFileMapProvider, file, diskRTreeFactory, startIODeviceIndex);
LSMRTreeWithAntiMatterTuples lsmTree = new LSMRTreeWithAntiMatterTuples(memBufferCache, memFreePageManager,
rtreeInteriorFrameFactory, rtreeLeafFrameFactory, btreeInteriorFrameFactory, btreeLeafFrameFactory,
fileNameManager, diskRTreeFactory, bulkLoadRTreeFactory, diskFileMapProvider, typeTraits.length,
diff --git a/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorDescriptor.java b/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorDescriptor.java
index 52e9c32..d718c69 100644
--- a/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorDescriptor.java
+++ b/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorDescriptor.java
@@ -44,8 +44,9 @@
IIndexDataflowHelperFactory dataflowHelperFactory, boolean retainInput,
ISearchOperationCallbackFactory searchOpCallbackFactory) {
super(spec, 1, 1, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, dataflowHelperFactory, null, retainInput, NoOpLocalResourceFactoryProvider.INSTANCE,
- searchOpCallbackFactory, NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, dataflowHelperFactory, null, retainInput,
+ NoOpLocalResourceFactoryProvider.INSTANCE, searchOpCallbackFactory,
+ NoOpOperationCallbackFactory.INSTANCE);
this.keyFields = keyFields;
}
diff --git a/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTree.java b/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTree.java
index 773d593..c12dc50 100644
--- a/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTree.java
+++ b/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTree.java
@@ -848,7 +848,8 @@
}
@Override
- public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput) throws TreeIndexException {
+ public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
+ throws TreeIndexException {
// TODO: verifyInput currently does nothing.
try {
return new RTreeBulkLoader(fillFactor);
diff --git a/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeSearchCursor.java b/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeSearchCursor.java
index fd80071..6b5b1b5 100644
--- a/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeSearchCursor.java
+++ b/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeSearchCursor.java
@@ -33,7 +33,7 @@
private int fileId = -1;
private ICachedPage page = null;
private IRTreeInteriorFrame interiorFrame = null;
- private IRTreeLeafFrame leafFrame = null;
+ protected IRTreeLeafFrame leafFrame = null;
private IBufferCache bufferCache = null;
private SearchPredicate pred;
@@ -88,7 +88,7 @@
return page;
}
- private boolean fetchNextLeafPage() throws HyracksDataException {
+ protected boolean fetchNextLeafPage() throws HyracksDataException {
boolean succeeded = false;
if (readLatched) {
page.releaseReadLatch();
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/AbstractOperationCallbackTest.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/AbstractOperationCallbackTest.java
index 33ef61a..41dfdfe 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/AbstractOperationCallbackTest.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/AbstractOperationCallbackTest.java
@@ -12,6 +12,7 @@
@SuppressWarnings("rawtypes")
protected final ISerializerDeserializer[] keySerdes;
protected final MultiComparator cmp;
+ protected final int[] bloomFilterKeyFields;
protected IIndex index;
@@ -20,6 +21,10 @@
public AbstractOperationCallbackTest() {
this.keySerdes = new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE };
this.cmp = MultiComparator.create(SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length));
+ bloomFilterKeyFields = new int[NUM_KEY_FIELDS];
+ for (int i = 0; i < NUM_KEY_FIELDS; ++i) {
+ bloomFilterKeyFields[i] = i;
+ }
}
public void setup() throws Exception {
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
index 770a2ad..970526e 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
@@ -54,8 +54,8 @@
protected static final Logger LOGGER = Logger.getLogger(OrderedIndexExamplesTest.class.getName());
protected final Random rnd = new Random(50);
- protected abstract ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException;
+ protected abstract ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException;
/**
* Fixed-Length Key,Value Example. Create a tree index with one fixed-length
@@ -82,7 +82,11 @@
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -162,7 +166,11 @@
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -234,7 +242,12 @@
cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
cmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+ bloomFilterKeyFields[1] = 1;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -313,7 +326,11 @@
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -393,7 +410,11 @@
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -495,7 +516,11 @@
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -581,7 +606,12 @@
cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
cmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+ bloomFilterKeyFields[1] = 1;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -591,7 +621,7 @@
LOGGER.info("Bulk loading " + ins + " tuples");
}
long start = System.currentTimeMillis();
- IIndexBulkLoader bulkLoader = treeIndex.createBulkLoader(0.7f, false);
+ IIndexBulkLoader bulkLoader = treeIndex.createBulkLoader(0.7f, false, ins);
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
for (int i = 0; i < ins; i++) {
@@ -649,14 +679,19 @@
Random rnd = new Random();
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
+
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
int ins = 1000;
for (int i = 1; i < ins; i++) {
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
// Load sorted records, and expect to fail at tuple i.
- IIndexBulkLoader bulkLoader = treeIndex.createBulkLoader(0.7f, true);
+ IIndexBulkLoader bulkLoader = treeIndex.createBulkLoader(0.7f, true, ins);
for (int j = 0; j < ins; j++) {
if (j > i) {
fail("Bulk load failure test unexpectedly succeeded past tuple: " + j);
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
index f739d33..fa22f6b 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
@@ -53,8 +53,8 @@
protected abstract void tearDown() throws HyracksDataException;
- protected abstract IIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException;
+ protected abstract IIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException;
protected abstract IIndexTestWorkerFactory getWorkerFactory();
@@ -75,7 +75,13 @@
ITypeTraits[] typeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] cmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes, numKeys);
- IIndex index = createIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[numKeys];
+ for (int i = 0; i < numKeys; ++i) {
+ bloomFilterKeyFields[i] = i;
+ }
+
+ IIndex index = createIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
IIndexTestWorkerFactory workerFactory = getWorkerFactory();
// 4 batches per thread.
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/common/TreeIndexTestUtils.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/common/TreeIndexTestUtils.java
index 818373c..1a80231 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/common/TreeIndexTestUtils.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/common/TreeIndexTestUtils.java
@@ -106,7 +106,7 @@
}
public void checkDiskOrderScan(IIndexTestContext ctx) throws Exception {
- try {
+ try {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Testing Disk-Order Scan.");
}
@@ -243,7 +243,7 @@
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
// Perform bulk load.
- IIndexBulkLoader bulkLoader = ctx.getIndex().createBulkLoader(0.7f, false);
+ IIndexBulkLoader bulkLoader = ctx.getIndex().createBulkLoader(0.7f, false, numTuples);
int c = 1;
for (CheckTuple checkTuple : checkTuples) {
if (LOGGER.isLoggable(Level.INFO)) {
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
index 3998108..f962200 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
@@ -91,6 +91,15 @@
public static final int LSM_INVINDEX_SCAN_COUNT_ARRAY_SIZE = 1000000;
public static final int LSM_INVINDEX_MULTITHREAD_NUM_OPERATIONS = 200;
+ // Test params for BloomFilter
+ public static final int BLOOM_FILTER_NUM_TUPLES_TO_INSERT = 100;
+
+ // Mem configuration for BloomFilter.
+ public static final int BLOOM_FILTER_PAGE_SIZE = 256;
+ public static final int BLOOM_FILTER_NUM_PAGES = 1000;
+ public static final int BLOOM_FILTER_MAX_OPEN_FILES = 10;
+ public static final int BLOOM_FILTER_HYRACKS_FRAME_SIZE = 128;
+
}
/* ORIGINAL TEST PARAMETERS: DO NOT EDIT!
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
index e911a98..f93e9b6 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
@@ -688,7 +688,7 @@
LOGGER.info("Bulk loading " + numInserts + " tuples");
}
long start = System.currentTimeMillis();
- IIndexBulkLoader bulkLoader = treeIndex.createBulkLoader(0.7f, false);
+ IIndexBulkLoader bulkLoader = treeIndex.createBulkLoader(0.7f, false, numInserts);
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
diff --git a/hyracks-tests/hyracks-storage-am-bloomfilter-test/pom.xml b/hyracks-tests/hyracks-storage-am-bloomfilter-test/pom.xml
new file mode 100644
index 0000000..3b15677
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-bloomfilter-test/pom.xml
@@ -0,0 +1,49 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-storage-am-bloomfilter-test</artifactId>
+ <version>0.2.2-SNAPSHOT</version>
+
+ <parent>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-tests</artifactId>
+ <version>0.2.2-SNAPSHOT</version>
+ </parent>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.6</source>
+ <target>1.6</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.1</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-storage-am-bloomfilter</artifactId>
+ <version>0.2.2-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-test-support</artifactId>
+ <version>0.2.2-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/BloomFilterTest.java b/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/BloomFilterTest.java
new file mode 100644
index 0000000..6dab32c
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/BloomFilterTest.java
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.bloomfilter;
+
+import java.util.ArrayList;
+import java.util.Random;
+import java.util.TreeSet;
+import java.util.logging.Level;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomCalculations;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+
+@SuppressWarnings("rawtypes")
+public class BloomFilterTest extends AbstractBloomFilterTest {
+ private final Random rnd = new Random(50);
+
+ @Before
+ public void setUp() throws HyracksDataException {
+ super.setUp();
+ }
+
+ @Test
+ public void singleFieldTest() throws Exception {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("TESTING BLOOM FILTER");
+ }
+
+ IBufferCache bufferCache = harness.getBufferCache();
+
+ int numElements = 100;
+ int[] keyFields = { 0 };
+
+ BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(),
+ keyFields);
+
+ double acceptanleFalsePositiveRate = 0.1;
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ acceptanleFalsePositiveRate);
+
+ bf.create();
+ bf.activate();
+ IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(),
+ bloomFilterSpec.getNumBucketsPerElements());
+
+ int fieldCount = 2;
+ ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
+ ArrayTupleReference tuple = new ArrayTupleReference();
+
+ // generate keys
+ int maxKey = 1000;
+ TreeSet<Integer> uniqueKeys = new TreeSet<Integer>();
+ ArrayList<Integer> keys = new ArrayList<Integer>();
+ while (uniqueKeys.size() < numElements) {
+ int key = rnd.nextInt() % maxKey;
+ uniqueKeys.add(key);
+ }
+ for (Integer i : uniqueKeys) {
+ keys.add(i);
+ }
+
+ // Insert tuples in the bloom filter
+ for (int i = 0; i < keys.size(); ++i) {
+ TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
+ builder.add(tuple);
+ }
+ builder.end();
+
+ // Check all the inserted tuples can be found.
+
+ long[] hashes = new long[2];
+ for (int i = 0; i < keys.size(); ++i) {
+ TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
+ Assert.assertTrue(bf.contains(tuple, hashes));
+ }
+
+ bf.deactivate();
+ bf.destroy();
+ }
+
+ @Test
+ public void multiFieldTest() throws Exception {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("TESTING BLOOM FILTER");
+ }
+
+ IBufferCache bufferCache = harness.getBufferCache();
+
+ int numElements = 10000;
+ int[] keyFields = { 2, 4, 1 };
+
+ BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(),
+ keyFields);
+
+ double acceptanleFalsePositiveRate = 0.1;
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ acceptanleFalsePositiveRate);
+
+ bf.create();
+ bf.activate();
+ IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(),
+ bloomFilterSpec.getNumBucketsPerElements());
+
+ int fieldCount = 5;
+ ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
+ ArrayTupleReference tuple = new ArrayTupleReference();
+
+ int maxLength = 20;
+ ArrayList<String> s1 = new ArrayList<String>();
+ ArrayList<String> s2 = new ArrayList<String>();
+ ArrayList<String> s3 = new ArrayList<String>();
+ ArrayList<String> s4 = new ArrayList<String>();
+ for (int i = 0; i < numElements; ++i) {
+ s1.add(randomString(rnd.nextInt() % maxLength, rnd));
+ s2.add(randomString(rnd.nextInt() % maxLength, rnd));
+ s3.add(randomString(rnd.nextInt() % maxLength, rnd));
+ s4.add(randomString(rnd.nextInt() % maxLength, rnd));
+ }
+
+ for (int i = 0; i < numElements; ++i) {
+ TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
+ builder.add(tuple);
+ }
+ builder.end();
+
+ long[] hashes = new long[2];
+ for (int i = 0; i < numElements; ++i) {
+ TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
+ Assert.assertTrue(bf.contains(tuple, hashes));
+ }
+
+ bf.deactivate();
+ bf.destroy();
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java b/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
new file mode 100644
index 0000000..284a6cb
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
@@ -0,0 +1,296 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.bloomfilter;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+import java.util.logging.Level;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.MurmurHash128Bit;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest;
+
+@SuppressWarnings("rawtypes")
+public class MurmurHashForITupleReferenceTest extends AbstractBloomFilterTest {
+ private final static int NUM_LONG_VARS_FOR_128_BIT_HASH = 2;
+ private final static int DUMMY_FIELD = 0;
+ private final Random rnd = new Random(50);
+
+ @Before
+ public void setUp() throws HyracksDataException {
+ super.setUp();
+ }
+
+ @Test
+ public void murmurhashONEIntegerFieldTest() throws Exception {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("TESTING MURMUR HASH ONE INTEGER FIELD");
+ }
+
+ int fieldCount = 2;
+ ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
+ ArrayTupleReference tuple = new ArrayTupleReference();
+ TupleUtils.createIntegerTuple(tupleBuilder, tuple, rnd.nextInt());
+ tuple.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
+
+ int keyFields[] = { 0 };
+ int length = getTupleSize(tuple, keyFields);
+
+ long actuals[] = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
+ MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
+
+ ByteBuffer buffer;
+ byte[] array = new byte[length];
+ fillArrayWithData(array, keyFields, tuple, length);
+ buffer = ByteBuffer.wrap(array);
+
+ long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
+ Assert.assertArrayEquals(expecteds, actuals);
+ }
+
+ @Test
+ public void murmurhashTwoIntegerFieldsTest() throws Exception {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("TESTING MURMUR HASH TWO INTEGER FIELDS");
+ }
+
+ int fieldCount = 2;
+ ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
+ ArrayTupleReference tuple = new ArrayTupleReference();
+ TupleUtils.createIntegerTuple(tupleBuilder, tuple, rnd.nextInt(), rnd.nextInt());
+ tuple.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
+
+ int keyFields[] = { 0, 1 };
+ int length = getTupleSize(tuple, keyFields);
+
+ long actuals[] = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
+ MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
+
+ ByteBuffer buffer;
+ byte[] array = new byte[length];
+ fillArrayWithData(array, keyFields, tuple, length);
+ buffer = ByteBuffer.wrap(array);
+
+ long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
+ Assert.assertArrayEquals(expecteds, actuals);
+ }
+
+ @Test
+ public void murmurhashOneStringFieldTest() throws Exception {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("TESTING MURMUR HASH ONE STRING FIELD");
+ }
+
+ int fieldCount = 2;
+ ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE };
+ ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
+ ArrayTupleReference tuple = new ArrayTupleReference();
+ String s = randomString(100, rnd);
+ TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s);
+
+ int keyFields[] = { 0 };
+ int length = getTupleSize(tuple, keyFields);
+
+ long actuals[] = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
+ MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
+
+ byte[] array = new byte[length];
+ ByteBuffer buffer;
+ fillArrayWithData(array, keyFields, tuple, length);
+ buffer = ByteBuffer.wrap(array);
+
+ long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
+ Assert.assertArrayEquals(expecteds, actuals);
+ }
+
+ @Test
+ public void murmurhashThreeStringFieldsTest() throws Exception {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("TESTING MURMUR HASH THREE STRING FIELDS");
+ }
+
+ int fieldCount = 3;
+ ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
+ ArrayTupleReference tuple = new ArrayTupleReference();
+ String s1 = randomString(40, rnd);
+ String s2 = randomString(60, rnd);
+ String s3 = randomString(20, rnd);
+ TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1, s2, s3);
+
+ int keyFields[] = { 2, 0, 1 };
+ int length = getTupleSize(tuple, keyFields);
+
+ long actuals[] = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
+ MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
+
+ byte[] array = new byte[length];
+ ByteBuffer buffer;
+ fillArrayWithData(array, keyFields, tuple, length);
+ buffer = ByteBuffer.wrap(array);
+
+ long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
+ Assert.assertArrayEquals(expecteds, actuals);
+ }
+
+ private void fillArrayWithData(byte[] array, int[] keyFields, ITupleReference tuple, int length) {
+ int currentFieldIndex = 0;
+ int bytePos = 0;
+ for (int i = 0; i < length; ++i) {
+ array[i] = tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex]) + bytePos];
+ ++bytePos;
+ if (tuple.getFieldLength(keyFields[currentFieldIndex]) == bytePos) {
+ ++currentFieldIndex;
+ bytePos = 0;
+ }
+ }
+ }
+
+ private int getTupleSize(ITupleReference tuple, int[] keyFields) {
+ int length = 0;
+ for (int i = 0; i < keyFields.length; ++i) {
+ length += tuple.getFieldLength(keyFields[i]);
+ }
+ return length;
+ }
+
+ /**
+ * The hash3_x64_128 and getblock functions are borrowed from cassandra source code for testing purpose
+ **/
+ protected static long getblock(ByteBuffer key, int offset, int index) {
+ int i_8 = index << 3;
+ int blockOffset = offset + i_8;
+ return ((long) key.get(blockOffset + 0) & 0xff) + (((long) key.get(blockOffset + 1) & 0xff) << 8)
+ + (((long) key.get(blockOffset + 2) & 0xff) << 16) + (((long) key.get(blockOffset + 3) & 0xff) << 24)
+ + (((long) key.get(blockOffset + 4) & 0xff) << 32) + (((long) key.get(blockOffset + 5) & 0xff) << 40)
+ + (((long) key.get(blockOffset + 6) & 0xff) << 48) + (((long) key.get(blockOffset + 7) & 0xff) << 56);
+ }
+
+ public static long[] hash3_x64_128(ByteBuffer key, int offset, int length, long seed) {
+ final int nblocks = length >> 4; // Process as 128-bit blocks.
+
+ long h1 = seed;
+ long h2 = seed;
+
+ long c1 = 0x87c37b91114253d5L;
+ long c2 = 0x4cf5ad432745937fL;
+
+ //----------
+ // body
+
+ for (int i = 0; i < nblocks; i++) {
+ long k1 = getblock(key, offset, i * 2 + 0);
+ long k2 = getblock(key, offset, i * 2 + 1);
+
+ k1 *= c1;
+ k1 = MurmurHash128Bit.rotl64(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+
+ h1 = MurmurHash128Bit.rotl64(h1, 27);
+ h1 += h2;
+ h1 = h1 * 5 + 0x52dce729;
+
+ k2 *= c2;
+ k2 = MurmurHash128Bit.rotl64(k2, 33);
+ k2 *= c1;
+ h2 ^= k2;
+
+ h2 = MurmurHash128Bit.rotl64(h2, 31);
+ h2 += h1;
+ h2 = h2 * 5 + 0x38495ab5;
+ }
+
+ //----------
+ // tail
+
+ // Advance offset to the unprocessed tail of the data.
+ offset += nblocks * 16;
+
+ long k1 = 0;
+ long k2 = 0;
+
+ switch (length & 15) {
+ case 15:
+ k2 ^= ((long) key.get(offset + 14)) << 48;
+ case 14:
+ k2 ^= ((long) key.get(offset + 13)) << 40;
+ case 13:
+ k2 ^= ((long) key.get(offset + 12)) << 32;
+ case 12:
+ k2 ^= ((long) key.get(offset + 11)) << 24;
+ case 11:
+ k2 ^= ((long) key.get(offset + 10)) << 16;
+ case 10:
+ k2 ^= ((long) key.get(offset + 9)) << 8;
+ case 9:
+ k2 ^= ((long) key.get(offset + 8)) << 0;
+ k2 *= c2;
+ k2 = MurmurHash128Bit.rotl64(k2, 33);
+ k2 *= c1;
+ h2 ^= k2;
+
+ case 8:
+ k1 ^= ((long) key.get(offset + 7)) << 56;
+ case 7:
+ k1 ^= ((long) key.get(offset + 6)) << 48;
+ case 6:
+ k1 ^= ((long) key.get(offset + 5)) << 40;
+ case 5:
+ k1 ^= ((long) key.get(offset + 4)) << 32;
+ case 4:
+ k1 ^= ((long) key.get(offset + 3)) << 24;
+ case 3:
+ k1 ^= ((long) key.get(offset + 2)) << 16;
+ case 2:
+ k1 ^= ((long) key.get(offset + 1)) << 8;
+ case 1:
+ k1 ^= ((long) key.get(offset));
+ k1 *= c1;
+ k1 = MurmurHash128Bit.rotl64(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= length;
+ h2 ^= length;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = MurmurHash128Bit.fmix(h1);
+ h2 = MurmurHash128Bit.fmix(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ return (new long[] { h1, h2 });
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/util/AbstractBloomFilterTest.java b/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/util/AbstractBloomFilterTest.java
new file mode 100644
index 0000000..9b857a6
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/util/AbstractBloomFilterTest.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.bloomfilter.util;
+
+import java.util.Random;
+import java.util.logging.Logger;
+
+import org.junit.After;
+import org.junit.Before;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public abstract class AbstractBloomFilterTest {
+ protected final Logger LOGGER = Logger.getLogger(BloomFilterTestHarness.class.getName());
+
+ protected final BloomFilterTestHarness harness;
+
+ public AbstractBloomFilterTest() {
+ harness = new BloomFilterTestHarness();
+ }
+
+ public AbstractBloomFilterTest(int pageSize, int numPages, int maxOpenFiles, int hyracksFrameSize) {
+ harness = new BloomFilterTestHarness(pageSize, numPages, maxOpenFiles, hyracksFrameSize);
+ }
+
+ @Before
+ public void setUp() throws HyracksDataException {
+ harness.setUp();
+ }
+
+ @After
+ public void tearDown() throws HyracksDataException {
+ harness.tearDown();
+ }
+
+ public static String randomString(int length, Random random) {
+ char[] chars = "abcdefghijklmnopqrstuvwxyz".toCharArray();
+ StringBuilder strBuilder = new StringBuilder();
+ for (int i = 0; i < length; ++i) {
+ char c = chars[random.nextInt(chars.length)];
+ strBuilder.append(c);
+ }
+ return strBuilder.toString();
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/util/BloomFilterTestHarness.java b/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/util/BloomFilterTestHarness.java
new file mode 100644
index 0000000..8fac122
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/util/BloomFilterTestHarness.java
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.bloomfilter.util;
+
+import java.io.File;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Random;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
+import edu.uci.ics.hyracks.test.support.TestUtils;
+
+public class BloomFilterTestHarness {
+
+ private static final long RANDOM_SEED = 50;
+
+ protected final int pageSize;
+ protected final int numPages;
+ protected final int maxOpenFiles;
+ protected final int hyracksFrameSize;
+
+ protected IHyracksTaskContext ctx;
+ protected IBufferCache bufferCache;
+ protected IFileMapProvider fileMapProvider;
+ protected FileReference file;
+
+ protected final Random rnd = new Random();
+ protected final SimpleDateFormat simpleDateFormat = new SimpleDateFormat("ddMMyy-hhmmssSS");
+ protected final String tmpDir = System.getProperty("java.io.tmpdir");
+ protected final String sep = System.getProperty("file.separator");
+ protected String fileName;
+
+ public BloomFilterTestHarness() {
+ this.pageSize = AccessMethodTestsConfig.BLOOM_FILTER_PAGE_SIZE;
+ this.numPages = AccessMethodTestsConfig.BLOOM_FILTER_NUM_PAGES;
+ this.maxOpenFiles = AccessMethodTestsConfig.BLOOM_FILTER_MAX_OPEN_FILES;
+ this.hyracksFrameSize = AccessMethodTestsConfig.BLOOM_FILTER_HYRACKS_FRAME_SIZE;
+ }
+
+ public BloomFilterTestHarness(int pageSize, int numPages, int maxOpenFiles, int hyracksFrameSize) {
+ this.pageSize = pageSize;
+ this.numPages = numPages;
+ this.maxOpenFiles = maxOpenFiles;
+ this.hyracksFrameSize = hyracksFrameSize;
+ }
+
+ public void setUp() throws HyracksDataException {
+ fileName = tmpDir + sep + simpleDateFormat.format(new Date());
+ ctx = TestUtils.create(getHyracksFrameSize());
+ TestStorageManagerComponentHolder.init(pageSize, numPages, maxOpenFiles);
+ bufferCache = TestStorageManagerComponentHolder.getBufferCache(ctx);
+ fileMapProvider = TestStorageManagerComponentHolder.getFileMapProvider(ctx);
+ file = new FileReference(new File(fileName));
+ rnd.setSeed(RANDOM_SEED);
+ }
+
+ public void tearDown() throws HyracksDataException {
+ bufferCache.close();
+ file.delete();
+ }
+
+ public IHyracksTaskContext getHyracksTaskContext() {
+ return ctx;
+ }
+
+ public IBufferCache getBufferCache() {
+ return bufferCache;
+ }
+
+ public IFileMapProvider getFileMapProvider() {
+ return fileMapProvider;
+ }
+
+ public FileReference getFileReference() {
+ return file;
+ }
+
+ public String getFileName() {
+ return fileName;
+ }
+
+ public Random getRandom() {
+ return rnd;
+ }
+
+ public int getPageSize() {
+ return pageSize;
+ }
+
+ public int getNumPages() {
+ return numPages;
+ }
+
+ public int getHyracksFrameSize() {
+ return hyracksFrameSize;
+ }
+
+ public int getMaxOpenFiles() {
+ return maxOpenFiles;
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeExamplesTest.java b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeExamplesTest.java
index 6d9d565..c02d53d 100644
--- a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeExamplesTest.java
+++ b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeExamplesTest.java
@@ -40,8 +40,8 @@
harness.tearDown();
}
- protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException {
+ protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException {
return BTreeUtils.createBTree(harness.getBufferCache(), harness.getFileMapProvider(), typeTraits, cmpFactories,
BTreeLeafFrameType.REGULAR_NSM, harness.getFileReference());
}
diff --git a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeMultiThreadTest.java b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeMultiThreadTest.java
index 746e885..3f38c05 100644
--- a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeMultiThreadTest.java
+++ b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeMultiThreadTest.java
@@ -47,8 +47,8 @@
}
@Override
- protected ITreeIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException {
+ protected ITreeIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException {
return BTreeUtils.createBTree(harness.getBufferCache(), harness.getFileMapProvider(), typeTraits, cmpFactories,
BTreeLeafFrameType.REGULAR_NSM, harness.getFileReference());
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java
index 644f348..539ed3e 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java
@@ -32,12 +32,12 @@
private final LSMBTreeTestHarness harness = new LSMBTreeTestHarness();
@Override
- protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException {
+ protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException {
return LSMBTreeUtils.createLSMTree(harness.getMemBufferCache(), harness.getMemFreePageManager(),
harness.getIOManager(), harness.getFileReference(), harness.getDiskBufferCache(),
- harness.getDiskFileMapProvider(), typeTraits, cmpFactories, harness.getMergePolicy(),
- harness.getOperationTrackerFactory(), harness.getIOScheduler(),
+ harness.getDiskFileMapProvider(), typeTraits, cmpFactories, bloomFilterKeyFields,
+ harness.getMergePolicy(), harness.getOperationTrackerFactory(), harness.getIOScheduler(),
harness.getIOOperationCallbackProvider());
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java
index 978ff86..648e70f 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.lsm.btree;
import org.junit.Test;
@@ -30,8 +45,8 @@
index = LSMBTreeUtils.createLSMTree(harness.getMemBufferCache(), harness.getMemFreePageManager(),
harness.getIOManager(), harness.getFileReference(), harness.getDiskBufferCache(),
harness.getDiskFileMapProvider(), SerdeUtils.serdesToTypeTraits(keySerdes),
- SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length), harness.getMergePolicy(),
- NoOpOperationTrackerFactory.INSTANCE, harness.getIOScheduler(),
+ SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length), bloomFilterKeyFields,
+ harness.getMergePolicy(), NoOpOperationTrackerFactory.INSTANCE, harness.getIOScheduler(),
harness.getIOOperationCallbackProvider());
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java
index d0d2c77..f8ad0b2 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java
@@ -35,8 +35,8 @@
index = LSMBTreeUtils.createLSMTree(harness.getMemBufferCache(), harness.getMemFreePageManager(),
harness.getIOManager(), harness.getFileReference(), harness.getDiskBufferCache(),
harness.getDiskFileMapProvider(), SerdeUtils.serdesToTypeTraits(keySerdes),
- SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length), harness.getMergePolicy(),
- NoOpOperationTrackerFactory.INSTANCE, harness.getIOScheduler(),
+ SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length), bloomFilterKeyFields,
+ harness.getMergePolicy(), NoOpOperationTrackerFactory.INSTANCE, harness.getIOScheduler(),
harness.getIOOperationCallbackProvider());
}
@@ -228,7 +228,7 @@
throw new IllegalArgumentException("Invalid range: [" + begin + ", " + end + "]");
}
- IIndexBulkLoader bulkloader = index.createBulkLoader(1.0f, false);
+ IIndexBulkLoader bulkloader = index.createBulkLoader(1.0f, false, end - begin);
for (int i = begin; i <= end; i++) {
TupleUtils.createIntegerTuple(builder, tuple, i);
bulkloader.add(tuple);
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java
index 0a0b201..c494448 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java
@@ -48,12 +48,12 @@
}
@Override
- protected ITreeIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException {
+ protected ITreeIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException {
return LSMBTreeUtils.createLSMTree(harness.getMemBufferCache(), harness.getMemFreePageManager(),
harness.getIOManager(), harness.getFileReference(), harness.getDiskBufferCache(),
- harness.getDiskFileMapProvider(), typeTraits, cmpFactories, harness.getMergePolicy(),
- harness.getOperationTrackerFactory(), harness.getIOScheduler(),
+ harness.getDiskFileMapProvider(), typeTraits, cmpFactories, bloomFilterKeyFields,
+ harness.getMergePolicy(), harness.getOperationTrackerFactory(), harness.getIOScheduler(),
harness.getIOOperationCallbackProvider());
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/BTreeBulkLoadRunner.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/BTreeBulkLoadRunner.java
index 2e9395f..69e2b58 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/BTreeBulkLoadRunner.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/BTreeBulkLoadRunner.java
@@ -37,7 +37,7 @@
public long runExperiment(DataGenThread dataGen, int numThreads) throws Exception {
btree.create();
long start = System.currentTimeMillis();
- IIndexBulkLoader bulkLoader = btree.createBulkLoader(1.0f, false);
+ IIndexBulkLoader bulkLoader = btree.createBulkLoader(1.0f, false, 0L);
for (int i = 0; i < numBatches; i++) {
TupleBatch batch = dataGen.tupleBatchQueue.take();
for (int j = 0; j < batch.size(); j++) {
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
index 62186c2..5d2185a 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
@@ -74,7 +74,8 @@
private final int onDiskNumPages;
public LSMTreeRunner(int numBatches, int inMemPageSize, int inMemNumPages, int onDiskPageSize, int onDiskNumPages,
- ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories) throws BTreeException, HyracksException {
+ ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories, int[] bloomFilterKeyFields)
+ throws BTreeException, HyracksException {
this.numBatches = numBatches;
this.onDiskPageSize = onDiskPageSize;
@@ -95,8 +96,8 @@
new LIFOMetaDataFrameFactory());
this.ioScheduler = SynchronousScheduler.INSTANCE;
lsmtree = LSMBTreeUtils.createLSMTree(memBufferCache, memFreePageManager, ioManager, file, bufferCache, fmp,
- typeTraits, cmpFactories, NoMergePolicy.INSTANCE, ThreadCountingOperationTrackerFactory.INSTANCE,
- ioScheduler, NoOpIOOperationCallback.INSTANCE);
+ typeTraits, cmpFactories, bloomFilterKeyFields, NoMergePolicy.INSTANCE,
+ ThreadCountingOperationTrackerFactory.INSTANCE, ioScheduler, NoOpIOOperationCallback.INSTANCE);
}
@Override
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java
index e15845f..f790fde 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java
@@ -71,9 +71,13 @@
throws Exception {
ITypeTraits[] typeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] cmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes, numKeyFields);
+ int[] bloomFilterKeyFields = new int[numKeyFields];
+ for (int i = 0; i < numKeyFields; ++i) {
+ bloomFilterKeyFields[i] = i;
+ }
LSMBTree lsmTree = LSMBTreeUtils.createLSMTree(memBufferCache, memFreePageManager, ioManager, file,
- diskBufferCache, diskFileMapProvider, typeTraits, cmpFactories, mergePolicy, opTrackerFactory,
- ioScheduler, ioOpCallbackProvider);
+ diskBufferCache, diskFileMapProvider, typeTraits, cmpFactories, bloomFilterKeyFields, mergePolicy,
+ opTrackerFactory, ioScheduler, ioOpCallbackProvider);
LSMBTreeTestContext testCtx = new LSMBTreeTestContext(fieldSerdes, lsmTree);
return testCtx;
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
index 7b54884..97f78f3 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
@@ -205,7 +205,7 @@
ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
// Use the expected index to bulk-load the actual index.
- IIndexBulkLoader bulkLoader = testCtx.getIndex().createBulkLoader(1.0f, false);
+ IIndexBulkLoader bulkLoader = testCtx.getIndex().createBulkLoader(1.0f, false, numDocs);
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(testCtx.getFieldSerdes().length);
ArrayTupleReference tuple = new ArrayTupleReference();
Iterator<CheckTuple> checkTupleIter = tmpMemIndex.iterator();
diff --git a/hyracks-tests/pom.xml b/hyracks-tests/pom.xml
index b79295a..4011339 100644
--- a/hyracks-tests/pom.xml
+++ b/hyracks-tests/pom.xml
@@ -19,5 +19,6 @@
<module>hyracks-storage-am-lsm-btree-test</module>
<module>hyracks-storage-am-lsm-rtree-test</module>
<module>hyracks-storage-am-lsm-invertedindex-test</module>
+ <module>hyracks-storage-am-bloomfilter-test</module>
</modules>
</project>
diff --git a/pom.xml b/pom.xml
index 49a5887..c642992 100644
--- a/pom.xml
+++ b/pom.xml
@@ -95,6 +95,7 @@
<module>hyracks-cli</module>
<module>hyracks-storage-common</module>
<module>hyracks-storage-am-common</module>
+ <module>hyracks-storage-am-bloomfilter</module>
<module>hyracks-storage-am-btree</module>
<module>hyracks-storage-am-lsm-invertedindex</module>
<module>hyracks-storage-am-lsm-common</module>