All lsm indexes (BTree, RTree, and inverted index) are now using bloom filters whenever possible.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_lsm_tree_bloom_filter@2776 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java
index fce6e69..138a5db 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeOperatorTestHelper.java
@@ -18,7 +18,6 @@
import edu.uci.ics.hyracks.control.nc.io.IOManager;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.lsm.btree.dataflow.LSMBTreeDataflowHelperFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.common.dataflow.AbstractLSMIndexDataflowHelper;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.ConstantMergePolicyProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.NoOpIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.SynchronousSchedulerProvider;
@@ -33,10 +32,10 @@
super(ioManager);
}
- public IIndexDataflowHelperFactory createDataFlowHelperFactory() {
+ public IIndexDataflowHelperFactory createDataFlowHelperFactory(int[] bloomFilterKeyFields) {
return new LSMBTreeDataflowHelperFactory(new ConstantMergePolicyProvider(MERGE_THRESHOLD),
ThreadCountingOperationTrackerFactory.INSTANCE, SynchronousSchedulerProvider.INSTANCE,
- NoOpIOOperationCallback.INSTANCE, DEFAULT_MEM_PAGE_SIZE, DEFAULT_MEM_NUM_PAGES);
+ NoOpIOOperationCallback.INSTANCE, DEFAULT_MEM_PAGE_SIZE, DEFAULT_MEM_NUM_PAGES, bloomFilterKeyFields);
}
}
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreePrimaryIndexScanOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreePrimaryIndexScanOperatorTest.java
index d751399..5353345 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreePrimaryIndexScanOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreePrimaryIndexScanOperatorTest.java
@@ -29,6 +29,10 @@
@Override
protected IIndexDataflowHelperFactory createDataFlowHelperFactory() {
- return ((LSMBTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory();
+ int[] bloomFilterKeyFields = new int[primaryKeyFieldCount];
+ for (int i = 0; i < primaryKeyFieldCount; ++i) {
+ bloomFilterKeyFields[i] = i;
+ }
+ return ((LSMBTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory(bloomFilterKeyFields);
}
}
\ No newline at end of file
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreePrimaryIndexSearchOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreePrimaryIndexSearchOperatorTest.java
index bdcf3f6..82af814 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreePrimaryIndexSearchOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreePrimaryIndexSearchOperatorTest.java
@@ -28,6 +28,10 @@
@Override
protected IIndexDataflowHelperFactory createDataFlowHelperFactory() {
- return ((LSMBTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory();
+ int[] bloomFilterKeyFields = new int[primaryKeyFieldCount];
+ for (int i = 0; i < primaryKeyFieldCount; ++i) {
+ bloomFilterKeyFields[i] = i;
+ }
+ return ((LSMBTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory(bloomFilterKeyFields);
}
}
\ No newline at end of file
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexInsertOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexInsertOperatorTest.java
index 5ba7279..0fc8ef5 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexInsertOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexInsertOperatorTest.java
@@ -28,6 +28,10 @@
@Override
protected IIndexDataflowHelperFactory createDataFlowHelperFactory() {
- return ((LSMBTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory();
+ int[] bloomFilterKeyFields = new int[secondaryKeyFieldCount];
+ for (int i = 0; i < secondaryKeyFieldCount; ++i) {
+ bloomFilterKeyFields[i] = i;
+ }
+ return ((LSMBTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory(bloomFilterKeyFields);
}
}
\ No newline at end of file
diff --git a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexSearchOperatorTest.java b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexSearchOperatorTest.java
index 6f8f647..2d6e7df 100644
--- a/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexSearchOperatorTest.java
+++ b/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/am/lsm/btree/LSMBTreeSecondaryIndexSearchOperatorTest.java
@@ -28,6 +28,10 @@
@Override
protected IIndexDataflowHelperFactory createDataFlowHelperFactory() {
- return ((LSMBTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory();
+ int[] bloomFilterKeyFields = new int[secondaryKeyFieldCount];
+ for (int i = 0; i < secondaryKeyFieldCount; ++i) {
+ bloomFilterKeyFields[i] = i;
+ }
+ return ((LSMBTreeOperatorTestHelper) testHelper).createDataFlowHelperFactory(bloomFilterKeyFields);
}
}
\ No newline at end of file
diff --git a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomCalculations.java b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomCalculations.java
new file mode 100644
index 0000000..9c9a7be
--- /dev/null
+++ b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomCalculations.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.bloomfilter.impls;
+
+/**
+ * This class has been taken from cassandra source code with minor modifications.
+ */
+
+/**
+ * The following calculations are taken from:
+ * http://www.cs.wisc.edu/~cao/papers/summary-cache/node8.html
+ * "Bloom Filters - the math"
+ * This class's static methods are meant to facilitate the use of the Bloom
+ * Filter class by helping to choose correct values of 'bits per element' and
+ * 'number of hash functions, k'.
+ */
+public class BloomCalculations {
+
+ private static final int minBuckets = 2;
+ private static final int minK = 1;
+
+ /**
+ * In the following table, the row 'i' shows false positive rates if i buckets
+ * per element are used. Column 'j' shows false positive rates if j hash
+ * functions are used. The first row is 'i=0', the first column is 'j=0'.
+ * Each cell (i,j) the false positive rate determined by using i buckets per
+ * element and j hash functions.
+ */
+ static final double[][] probs = new double[][] {
+ { 1.0 }, // dummy row representing 0 buckets per element
+ { 1.0, 1.0 }, // dummy row representing 1 buckets per element
+ { 1.0, 0.393, 0.400 },
+ { 1.0, 0.283, 0.237, 0.253 },
+ { 1.0, 0.221, 0.155, 0.147, 0.160 },
+ { 1.0, 0.181, 0.109, 0.092, 0.092, 0.101 }, // 5
+ { 1.0, 0.154, 0.0804, 0.0609, 0.0561, 0.0578, 0.0638 },
+ { 1.0, 0.133, 0.0618, 0.0423, 0.0359, 0.0347, 0.0364 },
+ { 1.0, 0.118, 0.0489, 0.0306, 0.024, 0.0217, 0.0216, 0.0229 },
+ { 1.0, 0.105, 0.0397, 0.0228, 0.0166, 0.0141, 0.0133, 0.0135, 0.0145 },
+ { 1.0, 0.0952, 0.0329, 0.0174, 0.0118, 0.00943, 0.00844, 0.00819, 0.00846 }, // 10
+ { 1.0, 0.0869, 0.0276, 0.0136, 0.00864, 0.0065, 0.00552, 0.00513, 0.00509 },
+ { 1.0, 0.08, 0.0236, 0.0108, 0.00646, 0.00459, 0.00371, 0.00329, 0.00314 },
+ { 1.0, 0.074, 0.0203, 0.00875, 0.00492, 0.00332, 0.00255, 0.00217, 0.00199, 0.00194 },
+ { 1.0, 0.0689, 0.0177, 0.00718, 0.00381, 0.00244, 0.00179, 0.00146, 0.00129, 0.00121, 0.0012 },
+ { 1.0, 0.0645, 0.0156, 0.00596, 0.003, 0.00183, 0.00128, 0.001, 0.000852, 0.000775, 0.000744 }, // 15
+ { 1.0, 0.0606, 0.0138, 0.005, 0.00239, 0.00139, 0.000935, 0.000702, 0.000574, 0.000505, 0.00047, 0.000459 },
+ { 1.0, 0.0571, 0.0123, 0.00423, 0.00193, 0.00107, 0.000692, 0.000499, 0.000394, 0.000335, 0.000302,
+ 0.000287, 0.000284 },
+ { 1.0, 0.054, 0.0111, 0.00362, 0.00158, 0.000839, 0.000519, 0.00036, 0.000275, 0.000226, 0.000198,
+ 0.000183, 0.000176 },
+ { 1.0, 0.0513, 0.00998, 0.00312, 0.0013, 0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132,
+ 0.000118, 0.000111, 0.000109 },
+ { 1.0, 0.0488, 0.00906, 0.0027, 0.00108, 0.00053, 0.000303, 0.000196, 0.00014, 0.000108, 8.89e-05,
+ 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05 } // 20
+ }; // the first column is a dummy column representing K=0.
+
+ /**
+ * The optimal number of hashes for a given number of bits per element.
+ * These values are automatically calculated from the data above.
+ */
+ private static final int[] optKPerBuckets = new int[probs.length];
+
+ static {
+ for (int i = 0; i < probs.length; i++) {
+ double min = Double.MAX_VALUE;
+ double[] prob = probs[i];
+ for (int j = 0; j < prob.length; j++) {
+ if (prob[j] < min) {
+ min = prob[j];
+ optKPerBuckets[i] = Math.max(minK, j);
+ }
+ }
+ }
+ }
+
+ /**
+ * Given the number of buckets that can be used per element, return a
+ * specification that minimizes the false positive rate.
+ *
+ * @param bucketsPerElement
+ * The number of buckets per element for the filter.
+ * @return A spec that minimizes the false positive rate.
+ */
+ public static BloomFilterSpecification computeBloomSpec(int bucketsPerElement) {
+ assert bucketsPerElement >= 1;
+ assert bucketsPerElement <= probs.length - 1;
+ return new BloomFilterSpecification(optKPerBuckets[bucketsPerElement], bucketsPerElement);
+ }
+
+ /**
+ * Given a maximum tolerable false positive probability, compute a Bloom
+ * specification which will give less than the specified false positive rate,
+ * but minimize the number of buckets per element and the number of hash
+ * functions used. Because bandwidth (and therefore total bitvector size)
+ * is considered more expensive than computing power, preference is given
+ * to minimizing buckets per element rather than number of hash functions.
+ *
+ * @param maxBucketsPerElement
+ * The maximum number of buckets available for the filter.
+ * @param maxFalsePosProb
+ * The maximum tolerable false positive rate.
+ * @return A Bloom Specification which would result in a false positive rate
+ * less than specified by the function call
+ * @throws UnsupportedOperationException
+ * if a filter satisfying the parameters cannot be met
+ */
+ public static BloomFilterSpecification computeBloomSpec(int maxBucketsPerElement, double maxFalsePosProb) {
+ assert maxBucketsPerElement >= 1;
+ assert maxBucketsPerElement <= probs.length - 1;
+ int maxK = probs[maxBucketsPerElement].length - 1;
+
+ // Handle the trivial cases
+ if (maxFalsePosProb >= probs[minBuckets][minK]) {
+ return new BloomFilterSpecification(2, optKPerBuckets[2]);
+ }
+ if (maxFalsePosProb < probs[maxBucketsPerElement][maxK]) {
+ throw new UnsupportedOperationException(String.format("Unable to satisfy %s with %s buckets per element",
+ maxFalsePosProb, maxBucketsPerElement));
+ }
+
+ // First find the minimal required number of buckets:
+ int bucketsPerElement = 2;
+ int K = optKPerBuckets[2];
+ while (probs[bucketsPerElement][K] > maxFalsePosProb) {
+ bucketsPerElement++;
+ K = optKPerBuckets[bucketsPerElement];
+ }
+ // Now that the number of buckets is sufficient, see if we can relax K
+ // without losing too much precision.
+ while (probs[bucketsPerElement][K - 1] <= maxFalsePosProb) {
+ K--;
+ }
+
+ return new BloomFilterSpecification(K, bucketsPerElement);
+ }
+
+ /**
+ * Calculates the maximum number of buckets per element that this implementation
+ * can support. Crucially, it will lower the bucket count if necessary to meet
+ * BitSet's size restrictions.
+ */
+ public static int maxBucketsPerElement(long numElements) {
+ numElements = Math.max(1, numElements);
+ double v = Long.MAX_VALUE / (double) numElements;
+ if (v < 1.0) {
+ throw new UnsupportedOperationException("Cannot compute probabilities for " + numElements + " elements.");
+ }
+ return Math.min(BloomCalculations.probs.length - 1, (int) v);
+ }
+}
diff --git a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilter.java b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
index a6a45de..0e796b0 100644
--- a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
+++ b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
@@ -36,8 +36,6 @@
private final static int NUM_ELEMENTS_OFFSET = NUM_HASHES_USED_OFFSET + 4; // 8
private final static int NUM_BITS_OFFSET = NUM_ELEMENTS_OFFSET + 8; // 12
- private final static int NUM_BITS_PER_ELEMENT = 10;
-
private final IBufferCache bufferCache;
private final IFileMapProvider fileMapProvider;
private final FileReference file;
@@ -193,8 +191,9 @@
fileId = -1;
}
- public IIndexBulkLoader createBuilder(long numElements, int numHashes) throws HyracksDataException {
- return new BloomFilterBuilder(numElements, numHashes);
+ public IIndexBulkLoader createBuilder(long numElements, int numHashes, int numBitsPerElement)
+ throws HyracksDataException {
+ return new BloomFilterBuilder(numElements, numHashes, numBitsPerElement);
}
public class BloomFilterBuilder implements IIndexBulkLoader {
@@ -205,27 +204,29 @@
private final long numBits;
private final int numPages;
- public BloomFilterBuilder(long numElements, int numHashes) throws HyracksDataException {
+ public BloomFilterBuilder(long numElements, int numHashes, int numBitsPerElement) throws HyracksDataException {
if (!isActivated) {
throw new HyracksDataException("Failed to create the bloom filter builder since it is not activated.");
}
+
this.numElements = numElements;
this.numHashes = numHashes;
- numBits = numElements * NUM_BITS_PER_ELEMENT;
+ numBits = numElements * numBitsPerElement;
long tmp = (long) Math.ceil(numBits / (double) numBitsPerPage);
if (tmp > Integer.MAX_VALUE) {
throw new HyracksDataException("Cannot create a bloom filter with his huge number of pages.");
}
numPages = (int) tmp;
- persistBloomFilterMetaData();
- readBloomFilterMetaData();
-
- int currentPageId = 1;
- while (currentPageId <= numPages) {
- ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
- page.acquireWriteLatch();
- bloomFilterPages.add(page);
- ++currentPageId;
+ if (numElements > 0) {
+ persistBloomFilterMetaData();
+ readBloomFilterMetaData();
+ int currentPageId = 1;
+ while (currentPageId <= numPages) {
+ ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
+ page.acquireWriteLatch();
+ bloomFilterPages.add(page);
+ ++currentPageId;
+ }
}
}
diff --git a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java
index 645c8bd..d430e54 100644
--- a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java
+++ b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterFactory.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -23,19 +23,19 @@
public class BloomFilterFactory {
private final IBufferCache bufferCache;
private final IFileMapProvider fileMapProvider;
- private final int[] keyFields;
+ private final int[] bloomFilterKeyFields;
- public BloomFilterFactory(IBufferCache bufferCache, IFileMapProvider fileMapProvider, int[] keyFields) {
+ public BloomFilterFactory(IBufferCache bufferCache, IFileMapProvider fileMapProvider, int[] bloomFilterKeyFields) {
this.bufferCache = bufferCache;
this.fileMapProvider = fileMapProvider;
- this.keyFields = keyFields;
+ this.bloomFilterKeyFields = bloomFilterKeyFields;
}
public BloomFilter createBloomFiltertInstance(FileReference file) throws HyracksDataException {
- return new BloomFilter(bufferCache, fileMapProvider, file, keyFields);
+ return new BloomFilter(bufferCache, fileMapProvider, file, bloomFilterKeyFields);
}
- public int[] getKeyFields() {
- return keyFields;
+ public int[] getBloomFilterKeyFields() {
+ return bloomFilterKeyFields;
}
}
diff --git a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterSpecification.java b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterSpecification.java
index c396f9b..a1e5517 100644
--- a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterSpecification.java
+++ b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterSpecification.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -16,16 +16,16 @@
package edu.uci.ics.hyracks.storage.am.bloomfilter.impls;
public final class BloomFilterSpecification {
- private final long numElements;
+ private final int numBucketsPerElement;
private final int numHashes;
- public BloomFilterSpecification(long numElements, int numHashes) {
- this.numElements = numElements;
+ public BloomFilterSpecification(int numBucketsPerElement, int numHashes) {
+ this.numBucketsPerElement = numBucketsPerElement;
this.numHashes = numHashes;
}
- public long getNumElements() {
- return numElements;
+ public int getNumBucketsPerElements() {
+ return numBucketsPerElement;
}
public int getNumHashes() {
diff --git a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/MurmurHash128Bit.java b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/MurmurHash128Bit.java
index b1ec77d..0bc0a7f 100644
--- a/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/MurmurHash128Bit.java
+++ b/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/MurmurHash128Bit.java
@@ -18,7 +18,7 @@
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
/**
- * The idea of this class is borrowed from http://murmurhash.googlepages.com/ and cassandra source code.3
+ * The idea of this class is borrowed from http://murmurhash.googlepages.com/ and cassandra source code.
* We changed the hash function to operate on ITupleReference instead of a byte array.
**/
public class MurmurHash128Bit {
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
index c1cab0a..0ed1dbe 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
@@ -12,6 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package edu.uci.ics.hyracks.storage.am.btree.impls;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java
index 0bed4ca..8894dd8 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java
@@ -36,20 +36,22 @@
import edu.uci.ics.hyracks.storage.common.file.TransientFileMapManager;
public class LSMBTreeDataflowHelper extends AbstractLSMIndexDataflowHelper {
+ private final int[] bloomFilterKeyFields;
public LSMBTreeDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- ILSMMergePolicy mergePolicy, ILSMOperationTrackerFactory opTrackerFactory,
+ int[] bloomFilterKeyFields, ILSMMergePolicy mergePolicy, ILSMOperationTrackerFactory opTrackerFactory,
ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- this(opDesc, ctx, partition, DEFAULT_MEM_PAGE_SIZE, DEFAULT_MEM_NUM_PAGES, mergePolicy, opTrackerFactory,
- ioScheduler, ioOpCallbackProvider);
+ this(opDesc, ctx, partition, DEFAULT_MEM_PAGE_SIZE, DEFAULT_MEM_NUM_PAGES, bloomFilterKeyFields, mergePolicy,
+ opTrackerFactory, ioScheduler, ioOpCallbackProvider);
}
public LSMBTreeDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- int memPageSize, int memNumPages, ILSMMergePolicy mergePolicy,
+ int memPageSize, int memNumPages, int[] bloomFilterKeyFields, ILSMMergePolicy mergePolicy,
ILSMOperationTrackerFactory opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
super(opDesc, ctx, partition, memPageSize, memNumPages, mergePolicy, opTrackerFactory, ioScheduler,
ioOpCallbackProvider);
+ this.bloomFilterKeyFields = bloomFilterKeyFields;
}
@Override
@@ -61,7 +63,7 @@
IInMemoryFreePageManager memFreePageManager = new InMemoryFreePageManager(memNumPages, metaDataFrameFactory);
return LSMBTreeUtils.createLSMTree(memBufferCache, memFreePageManager, ctx.getIOManager(), file, opDesc
.getStorageManager().getBufferCache(ctx), opDesc.getStorageManager().getFileMapProvider(ctx),
- treeOpDesc.getTreeIndexTypeTraits(), treeOpDesc.getTreeIndexComparatorFactories(), mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider, partition);
+ treeOpDesc.getTreeIndexTypeTraits(), treeOpDesc.getTreeIndexComparatorFactories(),
+ bloomFilterKeyFields, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider, partition);
}
}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java
index f451748..4bef5cf 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java
@@ -27,18 +27,22 @@
public class LSMBTreeDataflowHelperFactory extends AbstractLSMIndexDataflowHelperFactory {
private static final long serialVersionUID = 1L;
+ private final int[] bloomFilterKeyFields;
public LSMBTreeDataflowHelperFactory(ILSMMergePolicyProvider mergePolicyProvider,
ILSMOperationTrackerFactory opTrackerFactory, ILSMIOOperationSchedulerProvider ioSchedulerProvider,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider, int memPageSize, int memNumPages) {
+ ILSMIOOperationCallbackProvider ioOpCallbackProvider, int memPageSize, int memNumPages,
+ int[] bloomFilterKeyFields) {
super(mergePolicyProvider, opTrackerFactory, ioSchedulerProvider, ioOpCallbackProvider, memPageSize,
memNumPages);
+ this.bloomFilterKeyFields = bloomFilterKeyFields;
}
@Override
public IndexDataflowHelper createIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
int partition) {
- return new LSMBTreeDataflowHelper(opDesc, ctx, partition, memPageSize, memNumPages, mergePolicyProvider.getMergePolicy(ctx),
- opTrackerFactory, ioSchedulerProvider.getIOScheduler(ctx), ioOpCallbackProvider);
+ return new LSMBTreeDataflowHelper(opDesc, ctx, partition, memPageSize, memNumPages, bloomFilterKeyFields,
+ mergePolicyProvider.getMergePolicy(ctx), opTrackerFactory, ioSchedulerProvider.getIOScheduler(ctx),
+ ioOpCallbackProvider);
}
}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
index b81bbe9..1305d3e 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
@@ -24,8 +24,10 @@
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomCalculations;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
@@ -89,8 +91,6 @@
private final ITreeIndexFrameFactory deleteLeafFrameFactory;
private final IBinaryComparatorFactory[] cmpFactories;
- private final int numHashes = 10;
-
public LSMBTree(IInMemoryBufferCache memBufferCache, IInMemoryFreePageManager memFreePageManager,
ITreeIndexFrameFactory interiorFrameFactory, ITreeIndexFrameFactory insertLeafFrameFactory,
ITreeIndexFrameFactory deleteLeafFrameFactory, ILSMIndexFileManager fileManager,
@@ -145,7 +145,7 @@
try {
component = createDiskComponent(componentFactory,
lsmComonentFileReference.getInsertIndexFileReference(),
- lsmComonentFileReference.getBloomFilterFileReference(), 0L, 0, false);
+ lsmComonentFileReference.getBloomFilterFileReference(), false);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
@@ -322,7 +322,7 @@
if (btreePred.isLowKeyInclusive() && btreePred.isHighKeyInclusive()) {
if (btreePred.getLowKeyComparator().getKeyFieldCount() == btreePred.getHighKeyComparator()
.getKeyFieldCount()) {
- if (btreePred.getLowKeyComparator().getKeyFieldCount() == componentFactory.getKeyFields().length) {
+ if (btreePred.getLowKeyComparator().getKeyFieldCount() == componentFactory.getBloomFilterKeyFields().length) {
if (ctx.cmp.compare(btreePred.getLowKey(), btreePred.getHighKey()) == 0) {
isPointSearch = true;
}
@@ -399,10 +399,15 @@
countingCursor.close();
}
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE);
+
LSMBTreeImmutableComponent component = createDiskComponent(componentFactory, flushOp.getBTreeFlushTarget(),
- flushOp.getBloomFilterFlushTarget(), numElements, numHashes, true);
+ flushOp.getBloomFilterFlushTarget(), true);
IIndexBulkLoader bulkLoader = component.getBTree().createBulkLoader(1.0f, false, numElements);
- IIndexBulkLoader builder = component.getBloomFilter().createBuilder(numElements, numHashes);
+ IIndexBulkLoader builder = component.getBloomFilter().createBuilder(numElements,
+ bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
IIndexCursor scanCursor = accessor.createSearchCursor();
accessor.search(scanCursor, nullPred);
@@ -454,11 +459,15 @@
numElements += ((LSMBTreeImmutableComponent) mergedComponents.get(i)).getBloomFilter().getNumElements();
}
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE);
LSMBTreeImmutableComponent mergedComponent = createDiskComponent(componentFactory,
- mergeOp.getBTreeMergeTarget(), mergeOp.getBloomFilterMergeTarget(), numElements, numHashes, true);
+ mergeOp.getBTreeMergeTarget(), mergeOp.getBloomFilterMergeTarget(), true);
IIndexBulkLoader bulkLoader = mergedComponent.getBTree().createBulkLoader(1.0f, false, numElements);
- IIndexBulkLoader builder = mergedComponent.getBloomFilter().createBuilder(numElements, numHashes);
+ IIndexBulkLoader builder = mergedComponent.getBloomFilter().createBuilder(numElements,
+ bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
try {
while (cursor.hasNext()) {
cursor.next();
@@ -475,8 +484,8 @@
}
private LSMBTreeImmutableComponent createDiskComponent(LSMBTreeImmutableComponentFactory factory,
- FileReference btreeFileRef, FileReference bloomFilterFileRef, long numElements, int numHashes,
- boolean createComponent) throws HyracksDataException, IndexException {
+ FileReference btreeFileRef, FileReference bloomFilterFileRef, boolean createComponent)
+ throws HyracksDataException, IndexException {
// Create new BTree instance.
LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) factory
.createLSMComponentInstance(new LSMComponentFileReferences(btreeFileRef, null, bloomFilterFileRef));
@@ -500,16 +509,17 @@
}
}
- private ILSMComponent createBulkLoadTarget(long numElementsHint) throws HyracksDataException, IndexException {
+ private ILSMComponent createBulkLoadTarget() throws HyracksDataException, IndexException {
LSMComponentFileReferences componentFileRefs = fileManager.getRelFlushFileReference();
return createDiskComponent(bulkLoadComponentFactory, componentFileRefs.getInsertIndexFileReference(),
- componentFileRefs.getBloomFilterFileReference(), numElementsHint, numHashes, true);
+ componentFileRefs.getBloomFilterFileReference(), true);
}
@Override
public void markAsValid(ILSMComponent lsmComponent) throws HyracksDataException {
// The order of forcing the dirty page to be flushed is critical. The bloom filter must be always done first.
LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) lsmComponent;
+ // Flush the bloom filter first.
int fileId = component.getBloomFilter().getFileId();
IBufferCache bufferCache = component.getBTree().getBufferCache();
int startPage = 0;
@@ -528,7 +538,7 @@
public LSMBTreeBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
throws TreeIndexException, HyracksDataException {
try {
- component = createBulkLoadTarget(numElementsHint);
+ component = createBulkLoadTarget();
} catch (HyracksDataException e) {
throw new TreeIndexException(e);
} catch (IndexException e) {
@@ -536,8 +546,12 @@
}
bulkLoader = (BTreeBulkLoader) ((LSMBTreeImmutableComponent) component).getBTree().createBulkLoader(
fillFactor, verifyInput, numElementsHint);
+
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElementsHint);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE);
builder = ((LSMBTreeImmutableComponent) component).getBloomFilter().createBuilder(numElementsHint,
- numHashes);
+ bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
}
@Override
@@ -558,12 +572,12 @@
}
protected void handleException() throws HyracksDataException, IndexException {
- ((LSMBTreeImmutableComponent) component).getBTree().deactivate();
if (!endCalledBasedOnFailure) {
builder.end();
}
- ((LSMBTreeImmutableComponent) component).getBloomFilter().deactivate();
+ ((LSMBTreeImmutableComponent) component).getBTree().deactivate();
((LSMBTreeImmutableComponent) component).getBTree().destroy();
+ ((LSMBTreeImmutableComponent) component).getBloomFilter().deactivate();
((LSMBTreeImmutableComponent) component).getBloomFilter().destroy();
}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java
index e1dbc9e..38766c3 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java
@@ -37,7 +37,6 @@
public class LSMBTreeFileManager extends AbstractLSMIndexFileManager {
private static final String BTREE_STRING = "b";
- private static final String BLOOM_FILTER_STRING = "f";
private final TreeIndexFactory<? extends ITreeIndex> btreeFactory;
@@ -47,22 +46,6 @@
this.btreeFactory = btreeFactory;
}
- protected void cleanupAndGetValidFilesInternal(IODeviceHandle dev, FilenameFilter filter,
- TreeIndexFactory<? extends ITreeIndex> treeFactory, ArrayList<ComparableFileName> allFiles)
- throws HyracksDataException, IndexException {
- File dir = new File(dev.getPath(), baseDir);
- String[] files = dir.list(filter);
- for (String fileName : files) {
- File file = new File(dir.getPath() + File.separator + fileName);
- FileReference fileRef = new FileReference(file);
- if (treeFactory == null || isValidTreeIndex(treeFactory.createIndexInstance(fileRef))) {
- allFiles.add(new ComparableFileName(fileRef));
- } else {
- file.delete();
- }
- }
- }
-
@Override
public LSMComponentFileReferences getRelFlushFileReference() {
Date date = new Date();
@@ -91,12 +74,6 @@
}
};
- private static FilenameFilter bloomFilterFilter = new FilenameFilter() {
- public boolean accept(File dir, String name) {
- return !name.startsWith(".") && name.endsWith(BLOOM_FILTER_STRING);
- }
- };
-
@Override
public List<LSMComponentFileReferences> cleanupAndGetValidFiles() throws HyracksDataException, IndexException {
List<LSMComponentFileReferences> validFiles = new ArrayList<LSMComponentFileReferences>();
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponent.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponent.java
index 7b5e95d..daa86d9 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponent.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponent.java
@@ -6,7 +6,6 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractImmutableLSMComponent;
public class LSMBTreeImmutableComponent extends AbstractImmutableLSMComponent {
-
private final BTree btree;
private final BloomFilter bloomFilter;
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponentFactory.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponentFactory.java
index fcae42c..e9da5a5 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponentFactory.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponentFactory.java
@@ -46,7 +46,7 @@
return btreeFactory.getBufferCache();
}
- public int[] getKeyFields() {
- return bloomFilterFactory.getKeyFields();
+ public int[] getBloomFilterKeyFields() {
+ return bloomFilterFactory.getBloomFilterKeyFields();
}
}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java
index ddfe365..180fb9a 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java
@@ -55,6 +55,7 @@
for (ILSMComponent o : mergingComponents) {
LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) o;
devs.add(component.getBTree().getFileReference().getDeviceHandle());
+ devs.add(component.getBloomFilter().getFileReference().getDeviceHandle());
}
return devs;
}
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java
index 154ecf6..8ebecdc 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java
@@ -49,17 +49,18 @@
public static LSMBTree createLSMTree(IInMemoryBufferCache memBufferCache,
IInMemoryFreePageManager memFreePageManager, IIOManager ioManager, FileReference file,
IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits,
- IBinaryComparatorFactory[] cmpFactories, ILSMMergePolicy mergePolicy,
+ IBinaryComparatorFactory[] cmpFactories, int[] bloomFilterKeyFields, ILSMMergePolicy mergePolicy,
ILSMOperationTrackerFactory opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
return createLSMTree(memBufferCache, memFreePageManager, ioManager, file, diskBufferCache, diskFileMapProvider,
- typeTraits, cmpFactories, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider, 0);
+ typeTraits, cmpFactories, bloomFilterKeyFields, mergePolicy, opTrackerFactory, ioScheduler,
+ ioOpCallbackProvider, 0);
}
public static LSMBTree createLSMTree(IInMemoryBufferCache memBufferCache,
IInMemoryFreePageManager memFreePageManager, IIOManager ioManager, FileReference file,
IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits,
- IBinaryComparatorFactory[] cmpFactories, ILSMMergePolicy mergePolicy,
+ IBinaryComparatorFactory[] cmpFactories, int[] bloomFilterKeyFields, ILSMMergePolicy mergePolicy,
ILSMOperationTrackerFactory opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
ILSMIOOperationCallbackProvider ioOpCallbackProvider, int startIODeviceIndex) {
LSMBTreeTupleWriterFactory insertTupleWriterFactory = new LSMBTreeTupleWriterFactory(typeTraits,
@@ -82,11 +83,12 @@
TreeIndexFactory<BTree> bulkLoadBTreeFactory = new BTreeFactory(diskBufferCache, diskFileMapProvider,
freePageManagerFactory, interiorFrameFactory, insertLeafFrameFactory, cmpFactories, typeTraits.length);
- int keyFields[] = new int[cmpFactories.length];
- for (int i = 0; i < cmpFactories.length; i++) {
- keyFields[i] = i;
- }
- BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider, keyFields);
+ // int[] bloomFilterKeyFields = new int[cmpFactories.length];
+ // for (int i = 0; i < cmpFactories.length; i++) {
+ // bloomFilterKeyFields[i] = i;
+ // }
+ BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider,
+ bloomFilterKeyFields);
ILSMIndexFileManager fileNameManager = new LSMBTreeFileManager(ioManager, diskFileMapProvider, file,
diskBTreeFactory, startIODeviceIndex);
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java
index 3cc7196..0c6b9ab 100644
--- a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java
+++ b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java
@@ -40,6 +40,8 @@
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
public abstract class AbstractLSMIndex implements ILSMIndexInternal {
+ protected final static double MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE = 0.1;
+
protected final ILSMHarness lsmHarness;
protected final ILSMIOOperationScheduler ioScheduler;
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java
index e2bd77c..a84f8c9 100644
--- a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java
+++ b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndexFileManager.java
@@ -41,6 +41,7 @@
public abstract class AbstractLSMIndexFileManager implements ILSMIndexFileManager {
protected static final String SPLIT_STRING = "_";
+ protected static final String BLOOM_FILTER_STRING = "f";
// Use all IODevices registered in ioManager in a round-robin fashion to choose
// where to flush and merge
@@ -106,7 +107,7 @@
for (String fileName : files) {
File file = new File(dir.getPath() + File.separator + fileName);
FileReference fileRef = new FileReference(file);
- if (isValidTreeIndex(treeFactory.createIndexInstance(fileRef))) {
+ if (treeFactory == null || isValidTreeIndex(treeFactory.createIndexInstance(fileRef))) {
allFiles.add(new ComparableFileName(fileRef));
} else {
file.delete();
@@ -139,6 +140,12 @@
f.delete();
}
+ protected static FilenameFilter bloomFilterFilter = new FilenameFilter() {
+ public boolean accept(File dir, String name) {
+ return !name.startsWith(".") && name.endsWith(BLOOM_FILTER_STRING);
+ }
+ };
+
protected FileReference createFlushFile(String relFlushFileName) {
// Assigns new files to I/O devices in round-robin fashion.
IODeviceHandle dev = ioManager.getIODevices().get(ioDeviceIndex);
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index 159352f..c69a8df 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -23,6 +23,10 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomCalculations;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
@@ -94,7 +98,8 @@
public LSMInvertedIndex(IInMemoryBufferCache memBufferCache, IInMemoryFreePageManager memFreePageManager,
OnDiskInvertedIndexFactory diskInvIndexFactory, BTreeFactory deletedKeysBTreeFactory,
- ILSMIndexFileManager fileManager, IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
+ BloomFilterFactory bloomFilterFactory, ILSMIndexFileManager fileManager,
+ IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
IBinaryComparatorFactory[] tokenCmpFactories, IBinaryTokenizerFactory tokenizerFactory,
ILSMMergePolicy mergePolicy, ILSMOperationTrackerFactory opTrackerFactory,
@@ -114,7 +119,8 @@
((InMemoryBufferCache) memBufferCache).getFileMapProvider(), invListTypeTraits, invListCmpFactories,
BTreeLeafFrameType.REGULAR_NSM, new FileReference(new File("membtree")));
mutableComponent = new LSMInvertedIndexMutableComponent(memInvIndex, deleteKeysBTree, memFreePageManager);
- componentFactory = new LSMInvertedIndexComponentFactory(diskInvIndexFactory, deletedKeysBTreeFactory);
+ componentFactory = new LSMInvertedIndexComponentFactory(diskInvIndexFactory, deletedKeysBTreeFactory,
+ bloomFilterFactory);
}
@Override
@@ -147,7 +153,8 @@
try {
component = createDiskInvIndexComponent(componentFactory,
lsmComonentFileReference.getInsertIndexFileReference(),
- lsmComonentFileReference.getDeleteIndexFileReference(), false);
+ lsmComonentFileReference.getDeleteIndexFileReference(),
+ lsmComonentFileReference.getBloomFilterFileReference(), false);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
@@ -170,8 +177,10 @@
mutableComponent.getDeletedKeysBTree().clear();
for (ILSMComponent c : immutableComponents) {
LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) c;
+ component.getBloomFilter().deactivate();
component.getInvIndex().deactivate();
component.getDeletedKeysBTree().deactivate();
+ component.getBloomFilter().destroy();
component.getInvIndex().destroy();
component.getDeletedKeysBTree().destroy();
}
@@ -202,6 +211,7 @@
List<ILSMComponent> immutableComponents = componentsRef.get();
for (ILSMComponent c : immutableComponents) {
LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) c;
+ component.getBloomFilter().deactivate();
component.getInvIndex().deactivate();
component.getDeletedKeysBTree().deactivate();
}
@@ -230,6 +240,7 @@
LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) c;
component.getInvIndex().destroy();
component.getDeletedKeysBTree().destroy();
+ component.getBloomFilter().destroy();
}
fileManager.deleteDirs();
}
@@ -357,13 +368,14 @@
// Distinguish between regular searches and range searches (mostly used in merges).
if (pred instanceof InvertedIndexSearchPredicate) {
initState = new LSMInvertedIndexSearchCursorInitialState(keyCmp, keysOnlyTuple, indexAccessors,
- deletedKeysBTreeAccessors, ictx, includeMutableComponent, lsmHarness, operationalComponents);
+ deletedKeysBTreeAccessors, mutableComponent.getDeletedKeysBTree().getLeafFrameFactory(), ictx,
+ includeMutableComponent, lsmHarness, operationalComponents);
} else {
InMemoryInvertedIndex memInvIndex = (InMemoryInvertedIndex) mutableComponent.getInvIndex();
MultiComparator tokensAndKeysCmp = MultiComparator.create(memInvIndex.getBTree().getComparatorFactories());
initState = new LSMInvertedIndexRangeSearchCursorInitialState(tokensAndKeysCmp, keyCmp, keysOnlyTuple,
- includeMutableComponent, lsmHarness, indexAccessors, deletedKeysBTreeAccessors, pred,
- operationalComponents);
+ mutableComponent.getDeletedKeysBTree().getLeafFrameFactory(), includeMutableComponent, lsmHarness,
+ indexAccessors, deletedKeysBTreeAccessors, pred, operationalComponents);
}
return initState;
}
@@ -392,7 +404,8 @@
opCtx.getComponentHolder().add(flushingComponent);
ioScheduler.scheduleOperation(new LSMInvertedIndexFlushOperation(new LSMInvertedIndexAccessor(this, lsmHarness,
fileManager, opCtx), mutableComponent, componentFileRefs.getInsertIndexFileReference(),
- componentFileRefs.getDeleteIndexFileReference(), callback));
+ componentFileRefs.getDeleteIndexFileReference(), componentFileRefs.getBloomFilterFileReference(),
+ callback));
}
@Override
@@ -401,7 +414,8 @@
// Create an inverted index instance to be bulk loaded.
LSMInvertedIndexImmutableComponent component = createDiskInvIndexComponent(componentFactory,
- flushOp.getDictBTreeFlushTarget(), flushOp.getDeletedKeysBTreeFlushTarget(), true);
+ flushOp.getDictBTreeFlushTarget(), flushOp.getDeletedKeysBTreeFlushTarget(),
+ flushOp.getBloomFilterFlushTarget(), true);
IInvertedIndex diskInvertedIndex = component.getInvIndex();
// Create a scan cursor on the BTree underlying the in-memory inverted index.
@@ -425,28 +439,53 @@
}
invIndexBulkLoader.end();
- // Create an BTree instance for the deleted keys.
- BTree diskDeletedKeysBTree = component.getDeletedKeysBTree();
-
- // Create a scan cursor on the deleted keys BTree underlying the in-memory inverted index.
IIndexAccessor deletedKeysBTreeAccessor = flushingComponent.getDeletedKeysBTree().createAccessor(
NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- IIndexCursor deletedKeysScanCursor = deletedKeysBTreeAccessor.createSearchCursor();
- deletedKeysBTreeAccessor.search(deletedKeysScanCursor, nullPred);
-
- // Bulk load the deleted-keys BTree.
- IIndexBulkLoader deletedKeysBTreeBulkLoader = diskDeletedKeysBTree.createBulkLoader(1.0f, false, 0L);
+ IIndexCursor btreeCountingCursor = ((BTreeAccessor) deletedKeysBTreeAccessor).createCountingSearchCursor();
+ deletedKeysBTreeAccessor.search(btreeCountingCursor, nullPred);
+ long numBTreeTuples = 0L;
try {
- while (deletedKeysScanCursor.hasNext()) {
- deletedKeysScanCursor.next();
- deletedKeysBTreeBulkLoader.add(deletedKeysScanCursor.getTuple());
+ while (btreeCountingCursor.hasNext()) {
+ btreeCountingCursor.next();
+ ITupleReference countTuple = btreeCountingCursor.getTuple();
+ numBTreeTuples = IntegerSerializerDeserializer.getInt(countTuple.getFieldData(0),
+ countTuple.getFieldStart(0));
}
} finally {
- deletedKeysScanCursor.close();
+ btreeCountingCursor.close();
}
- deletedKeysBTreeBulkLoader.end();
- return new LSMInvertedIndexImmutableComponent(diskInvertedIndex, diskDeletedKeysBTree);
+ if (numBTreeTuples > 0) {
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numBTreeTuples);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE);
+
+ // Create an BTree instance for the deleted keys.
+ BTree diskDeletedKeysBTree = component.getDeletedKeysBTree();
+
+ // Create a scan cursor on the deleted keys BTree underlying the in-memory inverted index.
+ IIndexCursor deletedKeysScanCursor = deletedKeysBTreeAccessor.createSearchCursor();
+ deletedKeysBTreeAccessor.search(deletedKeysScanCursor, nullPred);
+
+ // Bulk load the deleted-keys BTree.
+ IIndexBulkLoader deletedKeysBTreeBulkLoader = diskDeletedKeysBTree.createBulkLoader(1.0f, false, 0L);
+ IIndexBulkLoader builder = component.getBloomFilter().createBuilder(numBTreeTuples,
+ bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
+
+ try {
+ while (deletedKeysScanCursor.hasNext()) {
+ deletedKeysScanCursor.next();
+ deletedKeysBTreeBulkLoader.add(deletedKeysScanCursor.getTuple());
+ builder.add(deletedKeysScanCursor.getTuple());
+ }
+ } finally {
+ deletedKeysScanCursor.close();
+ builder.end();
+ }
+ deletedKeysBTreeBulkLoader.end();
+ }
+
+ return component;
}
@Override
@@ -476,7 +515,7 @@
ILSMIndexAccessorInternal accessor = new LSMInvertedIndexAccessor(this, lsmHarness, fileManager, ictx);
ioScheduler.scheduleOperation(new LSMInvertedIndexMergeOperation(accessor, mergingComponents, cursor,
relMergeFileRefs.getInsertIndexFileReference(), relMergeFileRefs.getDeleteIndexFileReference(),
- callback));
+ relMergeFileRefs.getBloomFilterFileReference(), callback));
}
@Override
@@ -486,7 +525,8 @@
// Create an inverted index instance.
LSMInvertedIndexImmutableComponent component = createDiskInvIndexComponent(componentFactory,
- mergeOp.getDictBTreeMergeTarget(), mergeOp.getDeletedKeysBTreeMergeTarget(), true);
+ mergeOp.getDictBTreeMergeTarget(), mergeOp.getDeletedKeysBTreeMergeTarget(),
+ mergeOp.getBloomFilterMergeTarget(), true);
IInvertedIndex mergedDiskInvertedIndex = component.getInvIndex();
IIndexCursor cursor = mergeOp.getCursor();
@@ -502,19 +542,16 @@
}
invIndexBulkLoader.end();
- // Create an empty deleted keys BTree (do nothing with the returned index).
- BTree deletedKeysBTree = component.getDeletedKeysBTree();
-
// Add the merged components for cleanup.
mergedComponents.addAll(mergeOp.getMergingComponents());
- return new LSMInvertedIndexImmutableComponent(mergedDiskInvertedIndex, deletedKeysBTree);
+ return component;
}
private ILSMComponent createBulkLoadTarget() throws HyracksDataException, IndexException {
LSMComponentFileReferences componentFileRefs = fileManager.getRelFlushFileReference();
return createDiskInvIndexComponent(componentFactory, componentFileRefs.getInsertIndexFileReference(),
- componentFileRefs.getDeleteIndexFileReference(), true);
+ componentFileRefs.getDeleteIndexFileReference(), componentFileRefs.getBloomFilterFileReference(), true);
}
@Override
@@ -563,6 +600,8 @@
((LSMInvertedIndexImmutableComponent) component).getInvIndex().destroy();
((LSMInvertedIndexImmutableComponent) component).getDeletedKeysBTree().deactivate();
((LSMInvertedIndexImmutableComponent) component).getDeletedKeysBTree().destroy();
+ ((LSMInvertedIndexImmutableComponent) component).getBloomFilter().deactivate();
+ ((LSMInvertedIndexImmutableComponent) component).getBloomFilter().destroy();
}
@Override
@@ -579,17 +618,20 @@
}
protected LSMInvertedIndexImmutableComponent createDiskInvIndexComponent(ILSMComponentFactory factory,
- FileReference dictBTreeFileRef, FileReference btreeFileRef, boolean create) throws HyracksDataException,
- IndexException {
+ FileReference dictBTreeFileRef, FileReference btreeFileRef, FileReference bloomFilterFileRef, boolean create)
+ throws HyracksDataException, IndexException {
LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) factory
- .createLSMComponentInstance(new LSMComponentFileReferences(dictBTreeFileRef, btreeFileRef, null));
+ .createLSMComponentInstance(new LSMComponentFileReferences(dictBTreeFileRef, btreeFileRef,
+ bloomFilterFileRef));
if (create) {
component.getInvIndex().create();
component.getDeletedKeysBTree().create();
+ component.getBloomFilter().create();
}
// Will be closed during cleanup of merge().
component.getInvIndex().activate();
component.getDeletedKeysBTree().activate();
+ component.getBloomFilter().activate();
return component;
}
@@ -659,8 +701,15 @@
public void markAsValid(ILSMComponent lsmComponent) throws HyracksDataException {
LSMInvertedIndexImmutableComponent invIndexComponent = (LSMInvertedIndexImmutableComponent) lsmComponent;
OnDiskInvertedIndex invIndex = (OnDiskInvertedIndex) invIndexComponent.getInvIndex();
+ // Flush the bloom filter first.
+ int fileId = invIndexComponent.getBloomFilter().getFileId();
+ IBufferCache bufferCache = invIndex.getBufferCache();
+ int startPage = 0;
+ int maxPage = invIndexComponent.getBloomFilter().getNumPages();
+ forceFlushDirtyPages(bufferCache, fileId, startPage, maxPage);
+
ITreeIndex treeIndex = invIndex.getBTree();
- // Flush inverted index first.
+ // Flush inverted index second.
forceFlushDirtyPages(treeIndex);
forceFlushInvListsFileDirtyPages(invIndex);
// Flush deleted keys BTree.
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexComponentFactory.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexComponentFactory.java
index f856f90..1f4db63 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexComponentFactory.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexComponentFactory.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
@@ -27,17 +29,21 @@
public class LSMInvertedIndexComponentFactory implements ILSMComponentFactory {
private final OnDiskInvertedIndexFactory diskInvIndexFactory;
private final TreeIndexFactory<BTree> btreeFactory;
+ private final BloomFilterFactory bloomFilterFactory;
public LSMInvertedIndexComponentFactory(OnDiskInvertedIndexFactory diskInvIndexFactory,
- TreeIndexFactory<BTree> btreeFactory) {
+ TreeIndexFactory<BTree> btreeFactory, BloomFilterFactory bloomFilterFactory) {
this.diskInvIndexFactory = diskInvIndexFactory;
this.btreeFactory = btreeFactory;
+ this.bloomFilterFactory = bloomFilterFactory;
}
@Override
- public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException {
+ public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException,
+ HyracksDataException {
return new LSMInvertedIndexImmutableComponent(diskInvIndexFactory.createIndexInstance(cfr
- .getInsertIndexFileReference()), btreeFactory.createIndexInstance(cfr.getDeleteIndexFileReference()));
+ .getInsertIndexFileReference()), btreeFactory.createIndexInstance(cfr.getDeleteIndexFileReference()),
+ bloomFilterFactory.createBloomFiltertInstance(cfr.getBloomFilterFileReference()));
}
@Override
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
index 21a11dc..15a1633 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java
@@ -69,7 +69,8 @@
String baseName = baseDir + ts + SPLIT_STRING + ts;
// Begin timestamp and end timestamp are identical since it is a flush
return new LSMComponentFileReferences(createFlushFile(baseName + SPLIT_STRING + DICT_BTREE_SUFFIX),
- createFlushFile(baseName + SPLIT_STRING + DELETED_KEYS_BTREE_SUFFIX), null);
+ createFlushFile(baseName + SPLIT_STRING + DELETED_KEYS_BTREE_SUFFIX), createFlushFile(baseName
+ + SPLIT_STRING + BLOOM_FILTER_STRING));
}
@Override
@@ -81,7 +82,8 @@
String baseName = baseDir + firstTimestampRange[0] + SPLIT_STRING + lastTimestampRange[1];
// Get the range of timestamps by taking the earliest and the latest timestamps
return new LSMComponentFileReferences(createMergeFile(baseName + SPLIT_STRING + DICT_BTREE_SUFFIX),
- createMergeFile(baseName + SPLIT_STRING + DELETED_KEYS_BTREE_SUFFIX), null);
+ createMergeFile(baseName + SPLIT_STRING + DELETED_KEYS_BTREE_SUFFIX), createMergeFile(baseName
+ + SPLIT_STRING + BLOOM_FILTER_STRING));
}
@Override
@@ -89,15 +91,40 @@
List<LSMComponentFileReferences> validFiles = new ArrayList<LSMComponentFileReferences>();
ArrayList<ComparableFileName> allDictBTreeFiles = new ArrayList<ComparableFileName>();
ArrayList<ComparableFileName> allDeletedKeysBTreeFiles = new ArrayList<ComparableFileName>();
+ ArrayList<ComparableFileName> allBloomFilterFiles = new ArrayList<ComparableFileName>();
// Gather files from all IODeviceHandles.
for (IODeviceHandle dev : ioManager.getIODevices()) {
- cleanupAndGetValidFilesInternal(dev, deletedKeysBTreeFilter, btreeFactory, allDeletedKeysBTreeFiles);
- HashSet<String> deletedKeysBTreeFilesSet = new HashSet<String>();
- for (ComparableFileName cmpFileName : allDeletedKeysBTreeFiles) {
+ cleanupAndGetValidFilesInternal(dev, bloomFilterFilter, null, allBloomFilterFiles);
+ HashSet<String> bloomFilterFilesSet = new HashSet<String>();
+ for (ComparableFileName cmpFileName : allBloomFilterFiles) {
int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
- deletedKeysBTreeFilesSet.add(cmpFileName.fileName.substring(0, index));
+ bloomFilterFilesSet.add(cmpFileName.fileName.substring(0, index));
}
+ // List of valid BTree files that may or may not have a bloom filter buddy. Will check for buddies below.
+ ArrayList<ComparableFileName> tmpAllDeletedBTreeFiles = new ArrayList<ComparableFileName>();
+ cleanupAndGetValidFilesInternal(dev, deletedKeysBTreeFilter, btreeFactory, tmpAllDeletedBTreeFiles);
+
+ // Look for buddy bloom filters for all valid BTrees.
+ // If no buddy is found, delete the file, otherwise add the BTree to allBTreeFiles.
+ HashSet<String> deletedKeysBTreeFilesSet = new HashSet<String>();
+ for (ComparableFileName cmpFileName : tmpAllDeletedBTreeFiles) {
+ int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
+ String file = cmpFileName.fileName.substring(0, index);
+ if (bloomFilterFilesSet.contains(file)) {
+ allDeletedKeysBTreeFiles.add(cmpFileName);
+ deletedKeysBTreeFilesSet.add(cmpFileName.fileName.substring(0, index));
+ } else {
+ // Couldn't find the corresponding BTree file; thus, delete
+ // the deleted-keys BTree file.
+ // There is no need to delete the inverted-lists file corresponding to the non-existent
+ // dictionary BTree, because we flush the dictionary BTree first. So if a dictionary BTree
+ // file does not exists, then neither can its inverted-list file.
+ File invalidDeletedKeysBTreeFile = new File(cmpFileName.fullPath);
+ invalidDeletedKeysBTreeFile.delete();
+ }
+ }
+
// We use the dictionary BTree of the inverted index for validation.
// List of valid dictionary BTree files that may or may not have a deleted-keys BTree buddy. Will check for buddies below.
ArrayList<ComparableFileName> tmpAllBTreeFiles = new ArrayList<ComparableFileName>();
@@ -121,24 +148,27 @@
}
}
// Sanity check.
- if (allDictBTreeFiles.size() != allDeletedKeysBTreeFiles.size()) {
- throw new HyracksDataException("Unequal number of valid RTree and BTree files found. Aborting cleanup.");
+ if (allDictBTreeFiles.size() != allDeletedKeysBTreeFiles.size()
+ || allDictBTreeFiles.size() != allBloomFilterFiles.size()) {
+ throw new HyracksDataException(
+ "Unequal number of valid Dictionary BTree, Deleted BTree, and Bloom Filter files found. Aborting cleanup.");
}
// Trivial cases.
- if (allDictBTreeFiles.isEmpty() || allDeletedKeysBTreeFiles.isEmpty()) {
+ if (allDictBTreeFiles.isEmpty() || allDeletedKeysBTreeFiles.isEmpty() || allBloomFilterFiles.isEmpty()) {
return validFiles;
}
- if (allDictBTreeFiles.size() == 1 && allDeletedKeysBTreeFiles.size() == 1) {
+ if (allDictBTreeFiles.size() == 1 && allDeletedKeysBTreeFiles.size() == 1 && allBloomFilterFiles.size() == 1) {
validFiles.add(new LSMComponentFileReferences(allDictBTreeFiles.get(0).fileRef, allDeletedKeysBTreeFiles
- .get(0).fileRef, null));
+ .get(0).fileRef, allBloomFilterFiles.get(0).fileRef));
return validFiles;
}
// Sorts files names from earliest to latest timestamp.
Collections.sort(allDeletedKeysBTreeFiles);
Collections.sort(allDictBTreeFiles);
+ Collections.sort(allBloomFilterFiles);
List<ComparableFileName> validComparableDictBTreeFiles = new ArrayList<ComparableFileName>();
ComparableFileName lastDictBTree = allDictBTreeFiles.get(0);
@@ -148,25 +178,37 @@
ComparableFileName lastDeletedKeysBTree = allDeletedKeysBTreeFiles.get(0);
validComparableDeletedKeysBTreeFiles.add(lastDeletedKeysBTree);
+ List<ComparableFileName> validComparableBloomFilterFiles = new ArrayList<ComparableFileName>();
+ ComparableFileName lastBloomFilter = allBloomFilterFiles.get(0);
+ validComparableBloomFilterFiles.add(lastBloomFilter);
+
for (int i = 1; i < allDictBTreeFiles.size(); i++) {
ComparableFileName currentRTree = allDictBTreeFiles.get(i);
ComparableFileName currentBTree = allDictBTreeFiles.get(i);
+ ComparableFileName currentBloomFilter = allBloomFilterFiles.get(i);
// Current start timestamp is greater than last stop timestamp.
if (currentRTree.interval[0].compareTo(lastDeletedKeysBTree.interval[1]) > 0
- && currentBTree.interval[0].compareTo(lastDeletedKeysBTree.interval[1]) > 0) {
+ && currentBTree.interval[0].compareTo(lastDeletedKeysBTree.interval[1]) > 0
+ && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[1]) > 0) {
validComparableDictBTreeFiles.add(currentRTree);
validComparableDeletedKeysBTreeFiles.add(currentBTree);
+ validComparableBloomFilterFiles.add(currentBloomFilter);
lastDictBTree = currentRTree;
lastDeletedKeysBTree = currentBTree;
+ lastBloomFilter = currentBloomFilter;
} else if (currentRTree.interval[0].compareTo(lastDictBTree.interval[0]) >= 0
&& currentRTree.interval[1].compareTo(lastDictBTree.interval[1]) <= 0
&& currentBTree.interval[0].compareTo(lastDeletedKeysBTree.interval[0]) >= 0
- && currentBTree.interval[1].compareTo(lastDeletedKeysBTree.interval[1]) <= 0) {
+ && currentBTree.interval[1].compareTo(lastDeletedKeysBTree.interval[1]) <= 0
+ && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[0]) >= 0
+ && currentBloomFilter.interval[1].compareTo(lastBloomFilter.interval[1]) <= 0) {
// Invalid files are completely contained in last interval.
File invalidRTreeFile = new File(currentRTree.fullPath);
invalidRTreeFile.delete();
File invalidBTreeFile = new File(currentBTree.fullPath);
invalidBTreeFile.delete();
+ File invalidBloomFilterFile = new File(currentBloomFilter.fullPath);
+ invalidBloomFilterFile.delete();
} else {
// This scenario should not be possible.
throw new HyracksDataException("Found LSM files with overlapping but not contained timetamp intervals.");
@@ -177,14 +219,17 @@
// files come first.
Collections.sort(validComparableDictBTreeFiles, recencyCmp);
Collections.sort(validComparableDeletedKeysBTreeFiles, recencyCmp);
+ Collections.sort(validComparableBloomFilterFiles, recencyCmp);
Iterator<ComparableFileName> dictBTreeFileIter = validComparableDictBTreeFiles.iterator();
Iterator<ComparableFileName> deletedKeysBTreeIter = validComparableDeletedKeysBTreeFiles.iterator();
+ Iterator<ComparableFileName> bloomFilterFileIter = validComparableBloomFilterFiles.iterator();
while (dictBTreeFileIter.hasNext() && deletedKeysBTreeIter.hasNext()) {
ComparableFileName cmpDictBTreeFile = dictBTreeFileIter.next();
ComparableFileName cmpDeletedKeysBTreeFile = deletedKeysBTreeIter.next();
+ ComparableFileName cmpBloomFilterFileName = bloomFilterFileIter.next();
validFiles.add(new LSMComponentFileReferences(cmpDictBTreeFile.fileRef, cmpDeletedKeysBTreeFile.fileRef,
- null));
+ cmpBloomFilterFileName.fileRef));
}
return validFiles;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java
index fb55ce0..eedf0da 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java
@@ -16,6 +16,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls;
import java.util.Collections;
+import java.util.HashSet;
import java.util.Set;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -31,15 +32,18 @@
private final LSMInvertedIndexMutableComponent flushingComponent;
private final FileReference dictBTreeFlushTarget;
private final FileReference deletedKeysBTreeFlushTarget;
+ private final FileReference bloomFilterFlushTarget;
private final ILSMIOOperationCallback callback;
public LSMInvertedIndexFlushOperation(ILSMIndexAccessorInternal accessor,
LSMInvertedIndexMutableComponent flushingComponent, FileReference dictBTreeFlushTarget,
- FileReference deletedKeysBTreeFlushTarget, ILSMIOOperationCallback callback) {
+ FileReference deletedKeysBTreeFlushTarget, FileReference bloomFilterFlushTarget,
+ ILSMIOOperationCallback callback) {
this.accessor = accessor;
this.flushingComponent = flushingComponent;
this.dictBTreeFlushTarget = dictBTreeFlushTarget;
this.deletedKeysBTreeFlushTarget = deletedKeysBTreeFlushTarget;
+ this.bloomFilterFlushTarget = bloomFilterFlushTarget;
this.callback = callback;
}
@@ -50,7 +54,12 @@
@Override
public Set<IODeviceHandle> getWriteDevices() {
- return Collections.singleton(dictBTreeFlushTarget.getDeviceHandle());
+ Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>();
+ devs.add(dictBTreeFlushTarget.getDeviceHandle());
+ devs.add(deletedKeysBTreeFlushTarget.getDeviceHandle());
+ devs.add(bloomFilterFlushTarget.getDeviceHandle());
+ return devs;
+
}
@Override
@@ -71,6 +80,10 @@
return deletedKeysBTreeFlushTarget;
}
+ public FileReference getBloomFilterFlushTarget() {
+ return bloomFilterFlushTarget;
+ }
+
public LSMInvertedIndexMutableComponent getFlushingComponent() {
return flushingComponent;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexImmutableComponent.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexImmutableComponent.java
index 5099a7b..4c9b5e8 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexImmutableComponent.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexImmutableComponent.java
@@ -1,6 +1,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractImmutableLSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
@@ -9,10 +10,12 @@
private final IInvertedIndex invIndex;
private final BTree deletedKeysBTree;
+ private final BloomFilter bloomFilter;
- public LSMInvertedIndexImmutableComponent(IInvertedIndex invIndex, BTree deletedKeysBTree) {
+ public LSMInvertedIndexImmutableComponent(IInvertedIndex invIndex, BTree deletedKeysBTree, BloomFilter bloomFilter) {
this.invIndex = invIndex;
this.deletedKeysBTree = deletedKeysBTree;
+ this.bloomFilter = bloomFilter;
}
@Override
@@ -21,6 +24,8 @@
invIndex.destroy();
deletedKeysBTree.deactivate();
deletedKeysBTree.destroy();
+ bloomFilter.deactivate();
+ bloomFilter.destroy();
}
public IInvertedIndex getInvIndex() {
@@ -31,4 +36,7 @@
return deletedKeysBTree;
}
+ public BloomFilter getBloomFilter() {
+ return bloomFilter;
+ }
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java
index 63b604e..dea628c 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java
@@ -36,16 +36,18 @@
private final IIndexCursor cursor;
private final FileReference dictBTreeMergeTarget;
private final FileReference deletedKeysBTreeMergeTarget;
+ private final FileReference bloomFilterMergeTarget;
private final ILSMIOOperationCallback callback;
public LSMInvertedIndexMergeOperation(ILSMIndexAccessorInternal accessor, List<ILSMComponent> mergingComponents,
IIndexCursor cursor, FileReference dictBTreeMergeTarget, FileReference deletedKeysBTreeMergeTarget,
- ILSMIOOperationCallback callback) {
+ FileReference bloomFilterMergeTarget, ILSMIOOperationCallback callback) {
this.accessor = accessor;
this.mergingComponents = mergingComponents;
this.cursor = cursor;
this.dictBTreeMergeTarget = dictBTreeMergeTarget;
this.deletedKeysBTreeMergeTarget = deletedKeysBTreeMergeTarget;
+ this.bloomFilterMergeTarget = bloomFilterMergeTarget;
this.callback = callback;
}
@@ -57,15 +59,17 @@
OnDiskInvertedIndex invIndex = (OnDiskInvertedIndex) component.getInvIndex();
devs.add(invIndex.getBTree().getFileReference().getDeviceHandle());
devs.add(component.getDeletedKeysBTree().getFileReference().getDeviceHandle());
+ devs.add(component.getBloomFilter().getFileReference().getDeviceHandle());
}
return devs;
}
@Override
public Set<IODeviceHandle> getWriteDevices() {
- Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>(2);
+ Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>();
devs.add(dictBTreeMergeTarget.getDeviceHandle());
devs.add(deletedKeysBTreeMergeTarget.getDeviceHandle());
+ devs.add(bloomFilterMergeTarget.getDeviceHandle());
return devs;
}
@@ -87,6 +91,10 @@
return deletedKeysBTreeMergeTarget;
}
+ public FileReference getBloomFilterMergeTarget() {
+ return bloomFilterMergeTarget;
+ }
+
public IIndexCursor getCursor() {
return cursor;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
index 43626f5..259af5b 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
@@ -18,6 +18,7 @@
import java.util.ArrayList;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
@@ -27,13 +28,14 @@
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
public class LSMInvertedIndexRangeSearchCursor extends LSMIndexSearchCursor {
// Assuming the cursor for all deleted-keys indexes are of the same type.
- protected IIndexCursor deletedKeysBTreeCursor;
+ private IIndexCursor[] deletedKeysBTreeCursors;
protected ArrayList<IIndexAccessor> deletedKeysBTreeAccessors;
protected PermutingTupleReference keysOnlyTuple;
protected RangePredicate keySearchPred;
@@ -59,19 +61,32 @@
rangeCursors[i] = invIndexAccessor.createRangeSearchCursor();
invIndexAccessor.rangeSearch(rangeCursors[i], lsmInitState.getSearchPredicate());
}
+ lsmHarness = lsmInitState.getLSMHarness();
+ operationalComponents = lsmInitState.getOperationalComponents();
+ includeMemComponent = lsmInitState.getIncludeMemComponent();
// For searching the deleted-keys BTrees.
this.keysOnlyTuple = lsmInitState.getKeysOnlyTuple();
deletedKeysBTreeAccessors = lsmInitState.getDeletedKeysBTreeAccessors();
+
if (!deletedKeysBTreeAccessors.isEmpty()) {
- deletedKeysBTreeCursor = deletedKeysBTreeAccessors.get(0).createSearchCursor();
+ deletedKeysBTreeCursors = new IIndexCursor[deletedKeysBTreeAccessors.size()];
+ int i = 0;
+ if (includeMemComponent) {
+ // No need for a bloom filter for the in-memory BTree.
+ deletedKeysBTreeCursors[i] = deletedKeysBTreeAccessors.get(i).createSearchCursor();
+ ++i;
+ }
+ for (; i < deletedKeysBTreeCursors.length; i++) {
+ deletedKeysBTreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitState
+ .getgetDeletedKeysBTreeLeafFrameFactory().createFrame(), false,
+ ((LSMInvertedIndexImmutableComponent) operationalComponents.get(i)).getBloomFilter());
+ }
+
}
MultiComparator keyCmp = lsmInitState.getKeyComparator();
keySearchPred = new RangePredicate(keysOnlyTuple, keysOnlyTuple, true, true, keyCmp, keyCmp);
- lsmHarness = lsmInitState.getLSMHarness();
- includeMemComponent = lsmInitState.getIncludeMemComponent();
- operationalComponents = lsmInitState.getOperationalComponents();
setPriorityQueueComparator();
initPriorityQueue();
}
@@ -84,16 +99,16 @@
keysOnlyTuple.reset(checkElement.getTuple());
int end = checkElement.getCursorIndex();
for (int i = 0; i < end; i++) {
- deletedKeysBTreeCursor.reset();
+ deletedKeysBTreeCursors[i].reset();
try {
- deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursor, keySearchPred);
- if (deletedKeysBTreeCursor.hasNext()) {
+ deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursors[i], keySearchPred);
+ if (deletedKeysBTreeCursors[i].hasNext()) {
return true;
}
} catch (IndexException e) {
throw new HyracksDataException(e);
} finally {
- deletedKeysBTreeCursor.close();
+ deletedKeysBTreeCursors[i].close();
}
}
return false;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
index 5a81a2e..0cec92e 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
@@ -22,6 +22,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
@@ -38,17 +39,20 @@
private final ArrayList<IIndexAccessor> deletedKeysBTreeAccessors;
private final ISearchPredicate predicate;
private final PermutingTupleReference keysOnlyTuple;
+ private final ITreeIndexFrameFactory deletedKeysBtreeLeafFrameFactory;
private final boolean includeMemComponent;
private final List<ILSMComponent> operationalComponents;
public LSMInvertedIndexRangeSearchCursorInitialState(MultiComparator tokensAndKeysCmp, MultiComparator keyCmp,
- PermutingTupleReference keysOnlyTuple, boolean includeMemComponent, ILSMHarness lsmHarness,
- ArrayList<IIndexAccessor> indexAccessors, ArrayList<IIndexAccessor> deletedKeysBTreeAccessors,
- ISearchPredicate predicate, List<ILSMComponent> operationalComponents) {
+ PermutingTupleReference keysOnlyTuple, ITreeIndexFrameFactory deletedKeysBtreeLeafFrameFactory,
+ boolean includeMemComponent, ILSMHarness lsmHarness, ArrayList<IIndexAccessor> indexAccessors,
+ ArrayList<IIndexAccessor> deletedKeysBTreeAccessors, ISearchPredicate predicate,
+ List<ILSMComponent> operationalComponents) {
this.tokensAndKeysCmp = tokensAndKeysCmp;
this.keyCmp = keyCmp;
this.keysOnlyTuple = keysOnlyTuple;
+ this.deletedKeysBtreeLeafFrameFactory = deletedKeysBtreeLeafFrameFactory;
this.lsmHarness = lsmHarness;
this.indexAccessors = indexAccessors;
this.deletedKeysBTreeAccessors = deletedKeysBTreeAccessors;
@@ -114,6 +118,10 @@
return keyCmp;
}
+ public ITreeIndexFrameFactory getgetDeletedKeysBTreeLeafFrameFactory() {
+ return deletedKeysBtreeLeafFrameFactory;
+ }
+
public boolean getIncludeMemComponent() {
return includeMemComponent;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
index 1ea66f7..36ad51b 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
@@ -18,6 +18,7 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
@@ -29,6 +30,7 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMHarness;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
/**
@@ -48,7 +50,7 @@
private ISearchOperationCallback searchCallback;
// Assuming the cursor for all deleted-keys indexes are of the same type.
- private IIndexCursor deletedKeysBTreeCursor;
+ private IIndexCursor[] deletedKeysBTreeCursors;
private List<IIndexAccessor> deletedKeysBTreeAccessors;
private RangePredicate keySearchPred;
private ILSMIndexOperationContext opCtx;
@@ -69,7 +71,19 @@
// For searching the deleted-keys BTrees.
deletedKeysBTreeAccessors = lsmInitState.getDeletedKeysBTreeAccessors();
- deletedKeysBTreeCursor = deletedKeysBTreeAccessors.get(0).createSearchCursor();
+ deletedKeysBTreeCursors = new IIndexCursor[deletedKeysBTreeAccessors.size()];
+ int i = 0;
+ if (includeMemComponent) {
+ // No need for a bloom filter for the in-memory BTree.
+ deletedKeysBTreeCursors[i] = deletedKeysBTreeAccessors.get(i).createSearchCursor();
+ ++i;
+ }
+ for (; i < deletedKeysBTreeCursors.length; i++) {
+ deletedKeysBTreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitState
+ .getgetDeletedKeysBTreeLeafFrameFactory().createFrame(), false,
+ ((LSMInvertedIndexImmutableComponent) operationalComponents.get(i)).getBloomFilter());
+ }
+
MultiComparator keyCmp = lsmInitState.getKeyComparator();
keySearchPred = new RangePredicate(null, null, true, true, keyCmp, keyCmp);
}
@@ -78,16 +92,16 @@
keySearchPred.setLowKey(key, true);
keySearchPred.setHighKey(key, true);
for (int i = 0; i < accessorIndex; i++) {
- deletedKeysBTreeCursor.reset();
+ deletedKeysBTreeCursors[i].reset();
try {
- deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursor, keySearchPred);
- if (deletedKeysBTreeCursor.hasNext()) {
+ deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursors[i], keySearchPred);
+ if (deletedKeysBTreeCursors[i].hasNext()) {
return true;
}
} catch (IndexException e) {
throw new HyracksDataException(e);
} finally {
- deletedKeysBTreeCursor.close();
+ deletedKeysBTreeCursors[i].close();
}
}
return false;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursorInitialState.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursorInitialState.java
index 15fc769..eb6f338 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursorInitialState.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursorInitialState.java
@@ -21,6 +21,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
@@ -39,17 +40,20 @@
private MultiComparator originalCmp;
private final MultiComparator keyCmp;
private final PermutingTupleReference keysOnlyTuple;
+ private final ITreeIndexFrameFactory deletedKeysBtreeLeafFrameFactory;
private final List<ILSMComponent> operationalComponents;
public LSMInvertedIndexSearchCursorInitialState(final MultiComparator keyCmp,
PermutingTupleReference keysOnlyTuple, List<IIndexAccessor> indexAccessors,
- List<IIndexAccessor> deletedKeysBTreeAccessors, IIndexOperationContext ctx, boolean includeMemComponent,
- ILSMHarness lsmHarness, List<ILSMComponent> operationalComponents) {
+ List<IIndexAccessor> deletedKeysBTreeAccessors, ITreeIndexFrameFactory deletedKeysBtreeLeafFrameFactory,
+ IIndexOperationContext ctx, boolean includeMemComponent, ILSMHarness lsmHarness,
+ List<ILSMComponent> operationalComponents) {
this.keyCmp = keyCmp;
this.keysOnlyTuple = keysOnlyTuple;
this.indexAccessors = indexAccessors;
this.deletedKeysBTreeAccessors = deletedKeysBTreeAccessors;
+ this.deletedKeysBtreeLeafFrameFactory = deletedKeysBtreeLeafFrameFactory;
this.includeMemComponent = includeMemComponent;
this.operationalComponents = operationalComponents;
this.lsmHarness = lsmHarness;
@@ -113,6 +117,10 @@
public List<IIndexAccessor> getDeletedKeysBTreeAccessors() {
return deletedKeysBTreeAccessors;
}
+
+ public ITreeIndexFrameFactory getgetDeletedKeysBTreeLeafFrameFactory() {
+ return deletedKeysBtreeLeafFrameFactory;
+ }
public PermutingTupleReference getKeysOnlyTuple() {
return keysOnlyTuple;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java
index 3cc5deb..1b293eb 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java
@@ -17,6 +17,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.common.api.IInMemoryFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IInMemoryBufferCache;
@@ -36,16 +37,16 @@
public PartitionedLSMInvertedIndex(IInMemoryBufferCache memBufferCache,
IInMemoryFreePageManager memFreePageManager, OnDiskInvertedIndexFactory diskInvIndexFactory,
- BTreeFactory deletedKeysBTreeFactory, ILSMIndexFileManager fileManager,
- IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
+ BTreeFactory deletedKeysBTreeFactory, BloomFilterFactory bloomFilterFactory,
+ ILSMIndexFileManager fileManager, IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
IBinaryComparatorFactory[] tokenCmpFactories, IBinaryTokenizerFactory tokenizerFactory,
ILSMMergePolicy mergePolicy, ILSMOperationTrackerFactory opTrackerFactory,
ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider)
throws IndexException {
- super(memBufferCache, memFreePageManager, diskInvIndexFactory, deletedKeysBTreeFactory, fileManager,
- diskFileMapProvider, invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories,
- tokenizerFactory, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ super(memBufferCache, memFreePageManager, diskInvIndexFactory, deletedKeysBTreeFactory, bloomFilterFactory,
+ fileManager, diskFileMapProvider, invListTypeTraits, invListCmpFactories, tokenTypeTraits,
+ tokenCmpFactories, tokenizerFactory, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider);
}
protected InMemoryInvertedIndex createInMemoryInvertedIndex(IInMemoryBufferCache memBufferCache)
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
index 8a4f989..79c8ccf 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
@@ -21,6 +21,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.io.IIOManager;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeException;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
@@ -141,6 +142,13 @@
BTreeFactory deletedKeysBTreeFactory = createDeletedKeysBTreeFactory(diskFileMapProvider, invListTypeTraits,
invListCmpFactories, diskBufferCache);
+ int[] bloomFilterKeyFields = new int[invListCmpFactories.length];
+ for (int i = 0; i < invListCmpFactories.length; i++) {
+ bloomFilterKeyFields[i] = i;
+ }
+ BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider,
+ bloomFilterKeyFields);
+
FileReference onDiskDirFileRef = new FileReference(new File(onDiskDir));
LSMInvertedIndexFileManager fileManager = new LSMInvertedIndexFileManager(ioManager, diskFileMapProvider,
onDiskDirFileRef, deletedKeysBTreeFactory, startIODeviceIndex);
@@ -152,9 +160,9 @@
tokenCmpFactories, fileManager);
LSMInvertedIndex invIndex = new LSMInvertedIndex(memBufferCache, memFreePageManager, invIndexFactory,
- deletedKeysBTreeFactory, fileManager, diskFileMapProvider, invListTypeTraits, invListCmpFactories,
- tokenTypeTraits, tokenCmpFactories, tokenizerFactory, mergePolicy, opTrackerFactory, ioScheduler,
- ioOpCallbackProvider);
+ deletedKeysBTreeFactory, bloomFilterFactory, fileManager, diskFileMapProvider, invListTypeTraits,
+ invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, mergePolicy,
+ opTrackerFactory, ioScheduler, ioOpCallbackProvider);
return invIndex;
}
@@ -184,6 +192,13 @@
BTreeFactory deletedKeysBTreeFactory = createDeletedKeysBTreeFactory(diskFileMapProvider, invListTypeTraits,
invListCmpFactories, diskBufferCache);
+ int[] bloomFilterKeyFields = new int[invListCmpFactories.length];
+ for (int i = 0; i < invListCmpFactories.length; i++) {
+ bloomFilterKeyFields[i] = i;
+ }
+ BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider,
+ bloomFilterKeyFields);
+
FileReference onDiskDirFileRef = new FileReference(new File(onDiskDir));
LSMInvertedIndexFileManager fileManager = new LSMInvertedIndexFileManager(ioManager, diskFileMapProvider,
onDiskDirFileRef, deletedKeysBTreeFactory, startIODeviceIndex);
@@ -195,9 +210,9 @@
tokenTypeTraits, tokenCmpFactories, fileManager);
PartitionedLSMInvertedIndex invIndex = new PartitionedLSMInvertedIndex(memBufferCache, memFreePageManager,
- invIndexFactory, deletedKeysBTreeFactory, fileManager, diskFileMapProvider, invListTypeTraits,
- invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ invIndexFactory, deletedKeysBTreeFactory, bloomFilterFactory, fileManager, diskFileMapProvider,
+ invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory,
+ mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider);
return invIndex;
}
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
index 0e85cfd..23137ab 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
@@ -67,10 +67,6 @@
protected final LSMRTreeMutableComponent mutableComponent;
protected final IInMemoryBufferCache memBufferCache;
- // This is used to estimate number of tuples in the memory RTree and BTree
- // for efficient memory allocation in the sort operation prior to flushing
- protected int memRTreeTuples;
- protected int memBTreeTuples;
protected TreeTupleSorter rTreeTupleSorter;
// On-disk components.
@@ -115,8 +111,6 @@
this.linearizer = linearizer;
this.comparatorFields = comparatorFields;
this.linearizerArray = linearizerArray;
- memRTreeTuples = 0;
- memBTreeTuples = 0;
rTreeTupleSorter = null;
}
@@ -226,20 +220,24 @@
}
protected LSMRTreeImmutableComponent createDiskComponent(ILSMComponentFactory factory, FileReference insertFileRef,
- FileReference deleteFileRef, boolean createComponent) throws HyracksDataException, IndexException {
+ FileReference deleteFileRef, FileReference bloomFilterFileRef, boolean createComponent)
+ throws HyracksDataException, IndexException {
// Create new tree instance.
LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) factory
- .createLSMComponentInstance(new LSMComponentFileReferences(insertFileRef, deleteFileRef, null));
+ .createLSMComponentInstance(new LSMComponentFileReferences(insertFileRef, deleteFileRef,
+ bloomFilterFileRef));
if (createComponent) {
component.getRTree().create();
if (component.getBTree() != null) {
component.getBTree().create();
+ component.getBloomFilter().create();
}
}
// Tree will be closed during cleanup of merge().
component.getRTree().activate();
if (component.getBTree() != null) {
component.getBTree().activate();
+ component.getBloomFilter().activate();
}
return component;
}
@@ -302,7 +300,6 @@
if (foundTupleInMemoryBTree) {
try {
ctx.memBTreeAccessor.delete(tuple);
- memBTreeTuples--;
} catch (BTreeNonExistentKeyException e) {
// Tuple has been deleted in the meantime. Do nothing.
// This normally shouldn't happen if we are dealing with
@@ -312,13 +309,11 @@
}
} else {
ctx.memRTreeAccessor.insert(tuple);
- memRTreeTuples++;
}
} else {
try {
ctx.memBTreeAccessor.insert(tuple);
- memBTreeTuples++;
} catch (BTreeDuplicateKeyException e) {
// Do nothing, because one delete tuple is enough to indicate
// that all the corresponding insert tuples are deleted
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java
index ea6548e..3bffb43 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java
@@ -22,7 +22,13 @@
import edu.uci.ics.hyracks.api.dataflow.value.ILinearizeComparatorFactory;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomCalculations;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.api.IInMemoryFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
@@ -56,6 +62,7 @@
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTreeSearchCursor;
import edu.uci.ics.hyracks.storage.am.rtree.impls.SearchPredicate;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
public class LSMRTree extends AbstractLSMRTree {
@@ -64,16 +71,17 @@
ITreeIndexFrameFactory rtreeInteriorFrameFactory, ITreeIndexFrameFactory rtreeLeafFrameFactory,
ITreeIndexFrameFactory btreeInteriorFrameFactory, ITreeIndexFrameFactory btreeLeafFrameFactory,
ILSMIndexFileManager fileNameManager, TreeIndexFactory<RTree> diskRTreeFactory,
- TreeIndexFactory<BTree> diskBTreeFactory, IFileMapProvider diskFileMapProvider, int fieldCount,
- IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeCmpFactories,
- ILinearizeComparatorFactory linearizer, int[] comparatorFields, IBinaryComparatorFactory[] linearizerArray,
- ILSMMergePolicy mergePolicy, ILSMOperationTrackerFactory opTrackerFactory,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
+ TreeIndexFactory<BTree> diskBTreeFactory, BloomFilterFactory bloomFilterFactory,
+ IFileMapProvider diskFileMapProvider, int fieldCount, IBinaryComparatorFactory[] rtreeCmpFactories,
+ IBinaryComparatorFactory[] btreeCmpFactories, ILinearizeComparatorFactory linearizer,
+ int[] comparatorFields, IBinaryComparatorFactory[] linearizerArray, ILSMMergePolicy mergePolicy,
+ ILSMOperationTrackerFactory opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
+ ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
super(memBufferCache, memFreePageManager, rtreeInteriorFrameFactory, rtreeLeafFrameFactory,
btreeInteriorFrameFactory, btreeLeafFrameFactory, fileNameManager, diskRTreeFactory,
- new LSMRTreeComponentFactory(diskRTreeFactory, diskBTreeFactory), diskFileMapProvider, fieldCount,
- rtreeCmpFactories, btreeCmpFactories, linearizer, comparatorFields, linearizerArray, mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ new LSMRTreeComponentFactory(diskRTreeFactory, diskBTreeFactory, bloomFilterFactory),
+ diskFileMapProvider, fieldCount, rtreeCmpFactories, btreeCmpFactories, linearizer, comparatorFields,
+ linearizerArray, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider);
}
/**
@@ -100,7 +108,8 @@
try {
component = createDiskComponent(componentFactory,
lsmComonentFileReference.getInsertIndexFileReference(),
- lsmComonentFileReference.getDeleteIndexFileReference(), false);
+ lsmComonentFileReference.getDeleteIndexFileReference(),
+ lsmComonentFileReference.getBloomFilterFileReference(), false);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
@@ -117,8 +126,10 @@
LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) c;
RTree rtree = component.getRTree();
BTree btree = component.getBTree();
+ BloomFilter bloomFilter = component.getBloomFilter();
rtree.deactivate();
btree.deactivate();
+ bloomFilter.deactivate();
}
isActivated = false;
}
@@ -135,6 +146,7 @@
for (ILSMComponent c : immutableComponents) {
LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) c;
component.getBTree().destroy();
+ component.getBloomFilter().destroy();
component.getRTree().destroy();
}
fileManager.deleteDirs();
@@ -147,8 +159,10 @@
for (ILSMComponent c : immutableComponents) {
LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) c;
component.getBTree().deactivate();
+ component.getBloomFilter().deactivate();
component.getRTree().deactivate();
component.getBTree().destroy();
+ component.getBloomFilter().destroy();
component.getRTree().destroy();
}
immutableComponents.clear();
@@ -201,7 +215,8 @@
rctx.getComponentHolder().addAll(ctx.getComponentHolder());
LSMRTreeAccessor accessor = new LSMRTreeAccessor(lsmHarness, rctx);
ioScheduler.scheduleOperation(new LSMRTreeFlushOperation(accessor, flushingComponent, componentFileRefs
- .getInsertIndexFileReference(), componentFileRefs.getDeleteIndexFileReference(), callback));
+ .getInsertIndexFileReference(), componentFileRefs.getDeleteIndexFileReference(), componentFileRefs
+ .getBloomFilterFileReference(), callback));
}
@Override
@@ -219,7 +234,7 @@
SearchPredicate rtreeNullPredicate = new SearchPredicate(null, null);
memRTreeAccessor.search(rtreeScanCursor, rtreeNullPredicate);
LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, flushOp.getRTreeFlushTarget(),
- flushOp.getBTreeFlushTarget(), true);
+ flushOp.getBTreeFlushTarget(), flushOp.getBloomFilterFlushTarget(), true);
RTree diskRTree = component.getRTree();
IIndexBulkLoader rTreeBulkloader;
ITreeIndexCursor cursor;
@@ -227,9 +242,9 @@
IBinaryComparatorFactory[] linearizerArray = { linearizer };
if (rTreeTupleSorter == null) {
- rTreeTupleSorter = new TreeTupleSorter(memRTreeTuples, flushingComponent.getRTree().getFileId(),
- linearizerArray, rtreeLeafFrameFactory.createFrame(), rtreeLeafFrameFactory.createFrame(),
- flushingComponent.getRTree().getBufferCache(), comparatorFields);
+ rTreeTupleSorter = new TreeTupleSorter(flushingComponent.getRTree().getFileId(), linearizerArray,
+ rtreeLeafFrameFactory.createFrame(), rtreeLeafFrameFactory.createFrame(), flushingComponent
+ .getRTree().getBufferCache(), comparatorFields);
} else {
rTreeTupleSorter.reset();
}
@@ -237,11 +252,9 @@
// RTree.
boolean isEmpty = true;
- if (rtreeScanCursor.hasNext()) {
- isEmpty = false;
- }
try {
while (rtreeScanCursor.hasNext()) {
+ isEmpty = false;
rtreeScanCursor.next();
rTreeTupleSorter.insertTupleEntry(rtreeScanCursor.getPageId(), rtreeScanCursor.getTupleOffset());
}
@@ -250,44 +263,68 @@
}
if (!isEmpty) {
rTreeTupleSorter.sort();
- }
- rTreeBulkloader = diskRTree.createBulkLoader(1.0f, false, 0L);
- cursor = rTreeTupleSorter;
- try {
- while (cursor.hasNext()) {
- cursor.next();
- ITupleReference frameTuple = cursor.getTuple();
- rTreeBulkloader.add(frameTuple);
+ rTreeBulkloader = diskRTree.createBulkLoader(1.0f, false, 0L);
+ cursor = rTreeTupleSorter;
+
+ try {
+ while (cursor.hasNext()) {
+ cursor.next();
+ ITupleReference frameTuple = cursor.getTuple();
+ rTreeBulkloader.add(frameTuple);
+ }
+ } finally {
+ cursor.close();
}
- } finally {
- cursor.close();
+ rTreeBulkloader.end();
}
- rTreeBulkloader.end();
- // scan the memory BTree
ITreeIndexAccessor memBTreeAccessor = flushingComponent.getBTree().createAccessor(
NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- IIndexCursor btreeScanCursor = memBTreeAccessor.createSearchCursor();
RangePredicate btreeNullPredicate = new RangePredicate(null, null, true, true, null, null);
- memBTreeAccessor.search(btreeScanCursor, btreeNullPredicate);
- BTree diskBTree = component.getBTree();
-
- // BulkLoad the tuples from the in-memory tree into the new disk BTree.
- IIndexBulkLoader bTreeBulkloader = diskBTree.createBulkLoader(1.0f, false, 0L);
+ IIndexCursor btreeCountingCursor = ((BTreeAccessor) memBTreeAccessor).createCountingSearchCursor();
+ memBTreeAccessor.search(btreeCountingCursor, btreeNullPredicate);
+ long numBTreeTuples = 0L;
try {
- while (btreeScanCursor.hasNext()) {
- btreeScanCursor.next();
- ITupleReference frameTuple = btreeScanCursor.getTuple();
- bTreeBulkloader.add(frameTuple);
+ while (btreeCountingCursor.hasNext()) {
+ btreeCountingCursor.next();
+ ITupleReference countTuple = btreeCountingCursor.getTuple();
+ numBTreeTuples = IntegerSerializerDeserializer.getInt(countTuple.getFieldData(0),
+ countTuple.getFieldStart(0));
}
} finally {
- btreeScanCursor.close();
+ btreeCountingCursor.close();
}
- bTreeBulkloader.end();
- memRTreeTuples = 0;
- memBTreeTuples = 0;
- return new LSMRTreeImmutableComponent(diskRTree, diskBTree);
+
+ if (numBTreeTuples > 0) {
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numBTreeTuples);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ MAX_BLOOM_FILTER_ACCEPTABLE_FALSE_POSITIVE_RATE);
+
+ IIndexCursor btreeScanCursor = memBTreeAccessor.createSearchCursor();
+ memBTreeAccessor.search(btreeScanCursor, btreeNullPredicate);
+ BTree diskBTree = component.getBTree();
+
+ // BulkLoad the tuples from the in-memory tree into the new disk BTree.
+ IIndexBulkLoader bTreeBulkloader = diskBTree.createBulkLoader(1.0f, false, numBTreeTuples);
+ IIndexBulkLoader builder = component.getBloomFilter().createBuilder(numBTreeTuples,
+ bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
+ // scan the memory BTree
+ try {
+ while (btreeScanCursor.hasNext()) {
+ btreeScanCursor.next();
+ ITupleReference frameTuple = btreeScanCursor.getTuple();
+ bTreeBulkloader.add(frameTuple);
+ builder.add(frameTuple);
+ }
+ } finally {
+ btreeScanCursor.close();
+ builder.end();
+ }
+ bTreeBulkloader.end();
+ }
+
+ return component;
}
@Override
@@ -308,7 +345,7 @@
ILSMIndexAccessorInternal accessor = new LSMRTreeAccessor(lsmHarness, rctx);
ioScheduler.scheduleOperation(new LSMRTreeMergeOperation((ILSMIndexAccessorInternal) accessor,
mergingComponents, cursor, relMergeFileRefs.getInsertIndexFileReference(), relMergeFileRefs
- .getDeleteIndexFileReference(), callback));
+ .getDeleteIndexFileReference(), relMergeFileRefs.getBloomFilterFileReference(), callback));
}
@Override
@@ -318,34 +355,21 @@
ITreeIndexCursor cursor = mergeOp.getCursor();
mergedComponents.addAll(mergeOp.getMergingComponents());
- // Nothing to merge.
- if (mergedComponents.size() <= 1) {
- cursor.close();
- return null;
- }
+ LSMRTreeImmutableComponent mergedComponent = createDiskComponent(componentFactory,
+ mergeOp.getRTreeMergeTarget(), mergeOp.getBTreeMergeTarget(), mergeOp.getBloomFilterMergeTarget(), true);
+ IIndexBulkLoader bulkLoader = mergedComponent.getRTree().createBulkLoader(1.0f, false, 0L);
- // Bulk load the tuples from all on-disk RTrees into the new RTree.
- LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, mergeOp.getRTreeMergeTarget(),
- mergeOp.getBTreeMergeTarget(), true);
- RTree mergedRTree = component.getRTree();
- BTree mergedBTree = component.getBTree();
-
- IIndexBulkLoader bulkloader = mergedRTree.createBulkLoader(1.0f, false, 0L);
try {
while (cursor.hasNext()) {
cursor.next();
ITupleReference frameTuple = cursor.getTuple();
- bulkloader.add(frameTuple);
+ bulkLoader.add(frameTuple);
}
} finally {
cursor.close();
}
- bulkloader.end();
-
- // Load an empty BTree tree.
- mergedBTree.createBulkLoader(1.0f, false, 0L).end();
-
- return new LSMRTreeImmutableComponent(mergedRTree, mergedBTree);
+ bulkLoader.end();
+ return mergedComponent;
}
@Override
@@ -373,7 +397,7 @@
private ILSMComponent createBulkLoadTarget() throws HyracksDataException, IndexException {
LSMComponentFileReferences componentFileRefs = fileManager.getRelFlushFileReference();
return createDiskComponent(componentFactory, componentFileRefs.getInsertIndexFileReference(),
- componentFileRefs.getDeleteIndexFileReference(), true);
+ componentFileRefs.getDeleteIndexFileReference(), componentFileRefs.getBloomFilterFileReference(), true);
}
@Override
@@ -428,12 +452,20 @@
((LSMRTreeImmutableComponent) component).getRTree().destroy();
((LSMRTreeImmutableComponent) component).getBTree().deactivate();
((LSMRTreeImmutableComponent) component).getBTree().destroy();
+ ((LSMRTreeImmutableComponent) component).getBloomFilter().deactivate();
+ ((LSMRTreeImmutableComponent) component).getBloomFilter().destroy();
}
}
@Override
public void markAsValid(ILSMComponent lsmComponent) throws HyracksDataException {
LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) lsmComponent;
+ // Flush the bloom filter first.
+ int fileId = component.getBloomFilter().getFileId();
+ IBufferCache bufferCache = component.getBTree().getBufferCache();
+ int startPage = 0;
+ int maxPage = component.getBloomFilter().getNumPages();
+ forceFlushDirtyPages(bufferCache, fileId, startPage, maxPage);
forceFlushDirtyPages(component.getRTree());
markAsValidInternal(component.getRTree());
forceFlushDirtyPages(component.getBTree());
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
index 41ffd3d..2a463c8 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
@@ -11,10 +11,12 @@
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMHarness;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeInteriorFrame;
import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTreeSearchCursor;
@@ -22,18 +24,11 @@
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
-public abstract class LSMRTreeAbstractCursor {
+public abstract class LSMRTreeAbstractCursor implements ITreeIndexCursor {
protected RTreeSearchCursor[] rtreeCursors;
-
- public abstract void next() throws HyracksDataException;
-
- public abstract boolean hasNext() throws HyracksDataException;
-
- public abstract void reset() throws HyracksDataException;
-
protected boolean open = false;
- protected BTreeRangeSearchCursor[] btreeCursors;
+ protected ITreeIndexCursor[] btreeCursors;
protected ITreeIndexAccessor[] diskRTreeAccessors;
protected ITreeIndexAccessor[] diskBTreeAccessors;
private MultiComparator btreeCmp;
@@ -58,6 +53,7 @@
return rtreeCursors[cursorIndex];
}
+ @Override
public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
LSMRTreeCursorInitialState lsmInitialState = (LSMRTreeCursorInitialState) initialState;
btreeCmp = lsmInitialState.getBTreeCmp();
@@ -69,27 +65,42 @@
diskBTreeAccessors = lsmInitialState.getBTreeAccessors();
rtreeCursors = new RTreeSearchCursor[numberOfTrees];
- btreeCursors = new BTreeRangeSearchCursor[numberOfTrees];
+ btreeCursors = new ITreeIndexCursor[numberOfTrees];
- for (int i = 0; i < numberOfTrees; i++) {
+ int i = 0;
+ if (includeMemRTree) {
rtreeCursors[i] = new RTreeSearchCursor((IRTreeInteriorFrame) lsmInitialState
.getRTreeInteriorFrameFactory().createFrame(), (IRTreeLeafFrame) lsmInitialState
.getRTreeLeafFrameFactory().createFrame());
+ // No need for a bloom filter for the in-memory BTree.
btreeCursors[i] = new BTreeRangeSearchCursor((IBTreeLeafFrame) lsmInitialState.getBTreeLeafFrameFactory()
.createFrame(), false);
+ ++i;
}
+ for (; i < numberOfTrees; i++) {
+ rtreeCursors[i] = new RTreeSearchCursor((IRTreeInteriorFrame) lsmInitialState
+ .getRTreeInteriorFrameFactory().createFrame(), (IRTreeLeafFrame) lsmInitialState
+ .getRTreeLeafFrameFactory().createFrame());
+
+ btreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitialState
+ .getBTreeLeafFrameFactory().createFrame(), false,
+ ((LSMRTreeImmutableComponent) operationalComponents.get(i)).getBloomFilter());
+ }
+
rtreeSearchPredicate = (SearchPredicate) searchPred;
btreeRangePredicate = new RangePredicate(null, null, true, true, btreeCmp, btreeCmp);
open = true;
}
+ @Override
public ICachedPage getPage() {
// do nothing
return null;
}
+ @Override
public void close() throws HyracksDataException {
if (!open) {
return;
@@ -111,18 +122,22 @@
open = false;
}
+ @Override
public void setBufferCache(IBufferCache bufferCache) {
// do nothing
}
+ @Override
public void setFileId(int fileId) {
// do nothing
}
+ @Override
public ITupleReference getTuple() {
return frameTuple;
}
+ @Override
public boolean exclusiveLatchNodes() {
return false;
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeComponentFactory.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeComponentFactory.java
index 1681fbd..56e3d28 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeComponentFactory.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeComponentFactory.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.impls;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
@@ -27,16 +29,21 @@
public class LSMRTreeComponentFactory implements ILSMComponentFactory {
private final TreeIndexFactory<RTree> rtreeFactory;
private final TreeIndexFactory<BTree> btreeFactory;
+ private final BloomFilterFactory bloomFilterFactory;
- public LSMRTreeComponentFactory(TreeIndexFactory<RTree> rtreeFactory, TreeIndexFactory<BTree> btreeFactory) {
+ public LSMRTreeComponentFactory(TreeIndexFactory<RTree> rtreeFactory, TreeIndexFactory<BTree> btreeFactory,
+ BloomFilterFactory bloomFilterFactory) {
this.rtreeFactory = rtreeFactory;
this.btreeFactory = btreeFactory;
+ this.bloomFilterFactory = bloomFilterFactory;
}
@Override
- public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException {
+ public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException,
+ HyracksDataException {
return new LSMRTreeImmutableComponent(rtreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()),
- btreeFactory.createIndexInstance(cfr.getDeleteIndexFileReference()));
+ btreeFactory.createIndexInstance(cfr.getDeleteIndexFileReference()),
+ bloomFilterFactory.createBloomFiltertInstance(cfr.getBloomFilterFileReference()));
}
@Override
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java
index 041fda1..e698990 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFileManager.java
@@ -69,7 +69,8 @@
String baseName = baseDir + ts + SPLIT_STRING + ts;
// Begin timestamp and end timestamp are identical since it is a flush
return new LSMComponentFileReferences(createFlushFile(baseName + SPLIT_STRING + RTREE_STRING),
- createFlushFile(baseName + SPLIT_STRING + BTREE_STRING), null);
+ createFlushFile(baseName + SPLIT_STRING + BTREE_STRING), createFlushFile(baseName + SPLIT_STRING
+ + BLOOM_FILTER_STRING));
}
@Override
@@ -81,7 +82,8 @@
String baseName = baseDir + firstTimestampRange[0] + SPLIT_STRING + lastTimestampRange[1];
// Get the range of timestamps by taking the earliest and the latest timestamps
return new LSMComponentFileReferences(createMergeFile(baseName + SPLIT_STRING + RTREE_STRING),
- createMergeFile(baseName + SPLIT_STRING + BTREE_STRING), null);
+ createMergeFile(baseName + SPLIT_STRING + BTREE_STRING), createMergeFile(baseName + SPLIT_STRING
+ + BLOOM_FILTER_STRING));
}
@Override
@@ -89,15 +91,37 @@
List<LSMComponentFileReferences> validFiles = new ArrayList<LSMComponentFileReferences>();
ArrayList<ComparableFileName> allRTreeFiles = new ArrayList<ComparableFileName>();
ArrayList<ComparableFileName> allBTreeFiles = new ArrayList<ComparableFileName>();
+ ArrayList<ComparableFileName> allBloomFilterFiles = new ArrayList<ComparableFileName>();
// Gather files from all IODeviceHandles.
for (IODeviceHandle dev : ioManager.getIODevices()) {
- cleanupAndGetValidFilesInternal(dev, btreeFilter, btreeFactory, allBTreeFiles);
- HashSet<String> btreeFilesSet = new HashSet<String>();
- for (ComparableFileName cmpFileName : allBTreeFiles) {
+ cleanupAndGetValidFilesInternal(dev, bloomFilterFilter, null, allBloomFilterFiles);
+ HashSet<String> bloomFilterFilesSet = new HashSet<String>();
+ for (ComparableFileName cmpFileName : allBloomFilterFiles) {
int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
- btreeFilesSet.add(cmpFileName.fileName.substring(0, index));
+ bloomFilterFilesSet.add(cmpFileName.fileName.substring(0, index));
}
+
+ // List of valid BTree files that may or may not have a bloom filter buddy. Will check for buddies below.
+ ArrayList<ComparableFileName> tmpAllBTreeFiles = new ArrayList<ComparableFileName>();
+ cleanupAndGetValidFilesInternal(dev, btreeFilter, btreeFactory, tmpAllBTreeFiles);
+ // Look for buddy bloom filters for all valid BTrees.
+ // If no buddy is found, delete the file, otherwise add the BTree to allBTreeFiles.
+ HashSet<String> btreeFilesSet = new HashSet<String>();
+ for (ComparableFileName cmpFileName : tmpAllBTreeFiles) {
+ int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
+ String file = cmpFileName.fileName.substring(0, index);
+ if (bloomFilterFilesSet.contains(file)) {
+ allBTreeFiles.add(cmpFileName);
+ btreeFilesSet.add(cmpFileName.fileName.substring(0, index));
+ } else {
+ // Couldn't find the corresponding bloom filter file; thus, delete
+ // the BTree file.
+ File invalidBTreeFile = new File(cmpFileName.fullPath);
+ invalidBTreeFile.delete();
+ }
+ }
+
// List of valid RTree files that may or may not have a BTree buddy. Will check for buddies below.
ArrayList<ComparableFileName> tmpAllRTreeFiles = new ArrayList<ComparableFileName>();
cleanupAndGetValidFilesInternal(dev, rtreeFilter, rtreeFactory, tmpAllRTreeFiles);
@@ -117,24 +141,26 @@
}
}
// Sanity check.
- if (allRTreeFiles.size() != allBTreeFiles.size()) {
- throw new HyracksDataException("Unequal number of valid RTree and BTree files found. Aborting cleanup.");
+ if (allRTreeFiles.size() != allBTreeFiles.size() || allBTreeFiles.size() != allBloomFilterFiles.size()) {
+ throw new HyracksDataException(
+ "Unequal number of valid RTree, BTree, and Bloom Filter files found. Aborting cleanup.");
}
// Trivial cases.
- if (allRTreeFiles.isEmpty() || allBTreeFiles.isEmpty()) {
+ if (allRTreeFiles.isEmpty() || allBTreeFiles.isEmpty() || allBloomFilterFiles.isEmpty()) {
return validFiles;
}
- if (allRTreeFiles.size() == 1 && allBTreeFiles.size() == 1) {
+ if (allRTreeFiles.size() == 1 && allBTreeFiles.size() == 1 && allBloomFilterFiles.size() == 1) {
validFiles.add(new LSMComponentFileReferences(allRTreeFiles.get(0).fileRef, allBTreeFiles.get(0).fileRef,
- null));
+ allBloomFilterFiles.get(0).fileRef));
return validFiles;
}
// Sorts files names from earliest to latest timestamp.
Collections.sort(allRTreeFiles);
Collections.sort(allBTreeFiles);
+ Collections.sort(allBloomFilterFiles);
List<ComparableFileName> validComparableRTreeFiles = new ArrayList<ComparableFileName>();
ComparableFileName lastRTree = allRTreeFiles.get(0);
@@ -144,25 +170,37 @@
ComparableFileName lastBTree = allBTreeFiles.get(0);
validComparableBTreeFiles.add(lastBTree);
+ List<ComparableFileName> validComparableBloomFilterFiles = new ArrayList<ComparableFileName>();
+ ComparableFileName lastBloomFilter = allBloomFilterFiles.get(0);
+ validComparableBloomFilterFiles.add(lastBloomFilter);
+
for (int i = 1; i < allRTreeFiles.size(); i++) {
ComparableFileName currentRTree = allRTreeFiles.get(i);
ComparableFileName currentBTree = allBTreeFiles.get(i);
+ ComparableFileName currentBloomFilter = allBloomFilterFiles.get(i);
// Current start timestamp is greater than last stop timestamp.
if (currentRTree.interval[0].compareTo(lastRTree.interval[1]) > 0
- && currentBTree.interval[0].compareTo(lastBTree.interval[1]) > 0) {
+ && currentBTree.interval[0].compareTo(lastBTree.interval[1]) > 0
+ && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[1]) > 0) {
validComparableRTreeFiles.add(currentRTree);
validComparableBTreeFiles.add(currentBTree);
+ validComparableBloomFilterFiles.add(currentBloomFilter);
lastRTree = currentRTree;
lastBTree = currentBTree;
+ lastBloomFilter = currentBloomFilter;
} else if (currentRTree.interval[0].compareTo(lastRTree.interval[0]) >= 0
&& currentRTree.interval[1].compareTo(lastRTree.interval[1]) <= 0
&& currentBTree.interval[0].compareTo(lastBTree.interval[0]) >= 0
- && currentBTree.interval[1].compareTo(lastBTree.interval[1]) <= 0) {
+ && currentBTree.interval[1].compareTo(lastBTree.interval[1]) <= 0
+ && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[0]) >= 0
+ && currentBloomFilter.interval[1].compareTo(lastBloomFilter.interval[1]) <= 0) {
// Invalid files are completely contained in last interval.
File invalidRTreeFile = new File(currentRTree.fullPath);
invalidRTreeFile.delete();
File invalidBTreeFile = new File(currentBTree.fullPath);
invalidBTreeFile.delete();
+ File invalidBloomFilterFile = new File(currentBloomFilter.fullPath);
+ invalidBloomFilterFile.delete();
} else {
// This scenario should not be possible.
throw new HyracksDataException("Found LSM files with overlapping but not contained timetamp intervals.");
@@ -173,13 +211,17 @@
// files come first.
Collections.sort(validComparableRTreeFiles, recencyCmp);
Collections.sort(validComparableBTreeFiles, recencyCmp);
+ Collections.sort(validComparableBloomFilterFiles, recencyCmp);
Iterator<ComparableFileName> rtreeFileIter = validComparableRTreeFiles.iterator();
Iterator<ComparableFileName> btreeFileIter = validComparableBTreeFiles.iterator();
+ Iterator<ComparableFileName> bloomFilterFileIter = validComparableBloomFilterFiles.iterator();
while (rtreeFileIter.hasNext() && btreeFileIter.hasNext()) {
ComparableFileName cmpRTreeFileName = rtreeFileIter.next();
ComparableFileName cmpBTreeFileName = btreeFileIter.next();
- validFiles.add(new LSMComponentFileReferences(cmpRTreeFileName.fileRef, cmpBTreeFileName.fileRef, null));
+ ComparableFileName cmpBloomFilterFileName = bloomFilterFileIter.next();
+ validFiles.add(new LSMComponentFileReferences(cmpRTreeFileName.fileRef, cmpBTreeFileName.fileRef,
+ cmpBloomFilterFileName.fileRef));
}
return validFiles;
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java
index 8c5c6fe..7b7f2bc 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java
@@ -19,14 +19,17 @@
private final ILSMComponent flushingComponent;
private final FileReference rtreeFlushTarget;
private final FileReference btreeFlushTarget;
+ private final FileReference bloomFilterFlushTarget;
private final ILSMIOOperationCallback callback;
public LSMRTreeFlushOperation(ILSMIndexAccessorInternal accessor, ILSMComponent flushingComponent,
- FileReference rtreeFlushTarget, FileReference btreeFlushTarget, ILSMIOOperationCallback callback) {
+ FileReference rtreeFlushTarget, FileReference btreeFlushTarget, FileReference bloomFilterFlushTarget,
+ ILSMIOOperationCallback callback) {
this.accessor = accessor;
this.flushingComponent = flushingComponent;
this.rtreeFlushTarget = rtreeFlushTarget;
this.btreeFlushTarget = btreeFlushTarget;
+ this.bloomFilterFlushTarget = bloomFilterFlushTarget;
this.callback = callback;
}
@@ -41,6 +44,7 @@
devs.add(rtreeFlushTarget.getDeviceHandle());
if (btreeFlushTarget != null) {
devs.add(btreeFlushTarget.getDeviceHandle());
+ devs.add(bloomFilterFlushTarget.getDeviceHandle());
}
return devs;
}
@@ -63,6 +67,10 @@
return btreeFlushTarget;
}
+ public FileReference getBloomFilterFlushTarget() {
+ return bloomFilterFlushTarget;
+ }
+
public ILSMComponent getFlushingComponent() {
return flushingComponent;
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeImmutableComponent.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeImmutableComponent.java
index afba3a0..8d20c14 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeImmutableComponent.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeImmutableComponent.java
@@ -1,6 +1,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.impls;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractImmutableLSMComponent;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
@@ -8,10 +9,12 @@
public class LSMRTreeImmutableComponent extends AbstractImmutableLSMComponent {
private final RTree rtree;
private final BTree btree;
+ private final BloomFilter bloomFilter;
- public LSMRTreeImmutableComponent(RTree rtree, BTree btree) {
+ public LSMRTreeImmutableComponent(RTree rtree, BTree btree, BloomFilter bloomFilter) {
this.rtree = rtree;
this.btree = btree;
+ this.bloomFilter = bloomFilter;
}
@Override
@@ -21,6 +24,8 @@
if (btree != null) {
btree.deactivate();
btree.destroy();
+ bloomFilter.deactivate();
+ bloomFilter.destroy();
}
}
@@ -32,4 +37,7 @@
return btree;
}
+ public BloomFilter getBloomFilter() {
+ return bloomFilter;
+ }
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java
index d6a8693..0e05a93 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java
@@ -20,16 +20,18 @@
private final ITreeIndexCursor cursor;
private final FileReference rtreeMergeTarget;
private final FileReference btreeMergeTarget;
+ private final FileReference bloomFilterMergeTarget;
private final ILSMIOOperationCallback callback;
public LSMRTreeMergeOperation(ILSMIndexAccessorInternal accessor, List<ILSMComponent> mergingComponents,
ITreeIndexCursor cursor, FileReference rtreeMergeTarget, FileReference btreeMergeTarget,
- ILSMIOOperationCallback callback) {
+ FileReference bloomFilterMergeTarget, ILSMIOOperationCallback callback) {
this.accessor = accessor;
this.mergingComponents = mergingComponents;
this.cursor = cursor;
this.rtreeMergeTarget = rtreeMergeTarget;
this.btreeMergeTarget = btreeMergeTarget;
+ this.bloomFilterMergeTarget = bloomFilterMergeTarget;
this.callback = callback;
}
@@ -41,6 +43,7 @@
devs.add(component.getRTree().getFileReference().getDeviceHandle());
if (component.getBTree() != null) {
devs.add(component.getBTree().getFileReference().getDeviceHandle());
+ devs.add(component.getBloomFilter().getFileReference().getDeviceHandle());
}
}
return devs;
@@ -52,6 +55,7 @@
devs.add(rtreeMergeTarget.getDeviceHandle());
if (btreeMergeTarget != null) {
devs.add(btreeMergeTarget.getDeviceHandle());
+ devs.add(bloomFilterMergeTarget.getDeviceHandle());
}
return devs;
}
@@ -74,6 +78,10 @@
return btreeMergeTarget;
}
+ public FileReference getBloomFilterMergeTarget() {
+ return bloomFilterMergeTarget;
+ }
+
public ITreeIndexCursor getCursor() {
return cursor;
}
@@ -81,5 +89,4 @@
public List<ILSMComponent> getMergingComponents() {
return mergingComponents;
}
-
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
index f5e6f3d..d898f6c 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
@@ -19,11 +19,10 @@
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
-public class LSMRTreeSearchCursor extends LSMRTreeAbstractCursor implements ITreeIndexCursor {
+public class LSMRTreeSearchCursor extends LSMRTreeAbstractCursor {
private int currentCursror;
@@ -64,7 +63,7 @@
}
@Override
- public boolean hasNext() throws HyracksDataException {
+ public boolean hasNext() throws HyracksDataException, IndexException {
if (foundNext) {
return true;
}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java
index 46cf050..f691839 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java
@@ -20,11 +20,10 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
-public class LSMRTreeSortedCursor extends LSMRTreeAbstractCursor implements ITreeIndexCursor {
+public class LSMRTreeSortedCursor extends LSMRTreeAbstractCursor {
private ILinearizeComparator linearizeCmp;
private boolean[] depletedRtreeCursors;
@@ -63,7 +62,7 @@
}
@Override
- public boolean hasNext() throws HyracksDataException {
+ public boolean hasNext() throws HyracksDataException, IndexException {
while (!foundNext) {
frameTuple = null;
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java
index ea3fc46..478d076 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java
@@ -102,7 +102,7 @@
LSMRTreeImmutableComponent component;
try {
component = createDiskComponent(componentFactory,
- lsmComonentFileReference.getInsertIndexFileReference(), null, false);
+ lsmComonentFileReference.getInsertIndexFileReference(), null, null, false);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
@@ -205,7 +205,7 @@
opCtx.getComponentHolder().add(flushingComponent);
ILSMIndexAccessorInternal accessor = new LSMRTreeWithAntiMatterTuplesAccessor(lsmHarness, opCtx);
ioScheduler.scheduleOperation(new LSMRTreeFlushOperation(accessor, flushingComponent, relFlushFileRefs
- .getInsertIndexFileReference(), null, callback));
+ .getInsertIndexFileReference(), null, null, callback));
}
@Override
@@ -221,7 +221,7 @@
SearchPredicate rtreeNullPredicate = new SearchPredicate(null, null);
memRTreeAccessor.search(rtreeScanCursor, rtreeNullPredicate);
LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, flushOp.getRTreeFlushTarget(),
- null, true);
+ null, null, true);
RTree diskRTree = component.getRTree();
// scan the memory BTree
@@ -234,13 +234,13 @@
// Since the LSM-RTree is used as a secondary assumption, the
// primary key will be the last comparator in the BTree comparators
if (rTreeTupleSorter == null) {
- rTreeTupleSorter = new TreeTupleSorter(memRTreeTuples, flushingComponent.getRTree().getFileId(),
- linearizerArray, rtreeLeafFrameFactory.createFrame(), rtreeLeafFrameFactory.createFrame(),
- flushingComponent.getRTree().getBufferCache(), comparatorFields);
+ rTreeTupleSorter = new TreeTupleSorter(flushingComponent.getRTree().getFileId(), linearizerArray,
+ rtreeLeafFrameFactory.createFrame(), rtreeLeafFrameFactory.createFrame(), flushingComponent
+ .getRTree().getBufferCache(), comparatorFields);
- bTreeTupleSorter = new TreeTupleSorter(memBTreeTuples, flushingComponent.getBTree().getFileId(),
- linearizerArray, btreeLeafFrameFactory.createFrame(), btreeLeafFrameFactory.createFrame(),
- flushingComponent.getBTree().getBufferCache(), comparatorFields);
+ bTreeTupleSorter = new TreeTupleSorter(flushingComponent.getBTree().getFileId(), linearizerArray,
+ btreeLeafFrameFactory.createFrame(), btreeLeafFrameFactory.createFrame(), flushingComponent
+ .getBTree().getBufferCache(), comparatorFields);
} else {
rTreeTupleSorter.reset();
bTreeTupleSorter.reset();
@@ -249,11 +249,9 @@
// RTree.
boolean isEmpty = true;
- if (rtreeScanCursor.hasNext()) {
- isEmpty = false;
- }
try {
while (rtreeScanCursor.hasNext()) {
+ isEmpty = false;
rtreeScanCursor.next();
rTreeTupleSorter.insertTupleEntry(rtreeScanCursor.getPageId(), rtreeScanCursor.getTupleOffset());
}
@@ -265,11 +263,9 @@
}
isEmpty = true;
- if (btreeScanCursor.hasNext()) {
- isEmpty = false;
- }
try {
while (btreeScanCursor.hasNext()) {
+ isEmpty = false;
btreeScanCursor.next();
bTreeTupleSorter.insertTupleEntry(btreeScanCursor.getPageId(), btreeScanCursor.getTupleOffset());
}
@@ -281,8 +277,8 @@
}
IIndexBulkLoader rTreeBulkloader = diskRTree.createBulkLoader(1.0f, false, 0L);
- LSMRTreeFlushCursor cursor = new LSMRTreeFlushCursor(rTreeTupleSorter, bTreeTupleSorter, comparatorFields,
- linearizerArray);
+ LSMRTreeWithAntiMatterTuplesFlushCursor cursor = new LSMRTreeWithAntiMatterTuplesFlushCursor(rTreeTupleSorter,
+ bTreeTupleSorter, comparatorFields, linearizerArray);
cursor.open(null, null);
try {
@@ -297,9 +293,6 @@
}
rTreeBulkloader.end();
-
- memRTreeTuples = 0;
- memBTreeTuples = 0;
return component;
}
@@ -316,7 +309,7 @@
LSMComponentFileReferences relMergeFileRefs = getMergeTargetFileName(mergingComponents);
ILSMIndexAccessorInternal accessor = new LSMRTreeWithAntiMatterTuplesAccessor(lsmHarness, rctx);
ioScheduler.scheduleOperation(new LSMRTreeMergeOperation(accessor, mergingComponents, cursor, relMergeFileRefs
- .getInsertIndexFileReference(), null, callback));
+ .getInsertIndexFileReference(), null, null, callback));
}
@Override
@@ -334,7 +327,7 @@
// Bulk load the tuples from all on-disk RTrees into the new RTree.
LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, mergeOp.getRTreeMergeTarget(),
- null, true);
+ null, null, true);
RTree mergedRTree = component.getRTree();
IIndexBulkLoader bulkloader = mergedRTree.createBulkLoader(1.0f, false, 0L);
try {
@@ -380,7 +373,8 @@
private ILSMComponent createBulkLoadTarget() throws HyracksDataException, IndexException {
LSMComponentFileReferences relFlushFileRefs = fileManager.getRelFlushFileReference();
- return createDiskComponent(bulkLoaComponentFactory, relFlushFileRefs.getInsertIndexFileReference(), null, true);
+ return createDiskComponent(bulkLoaComponentFactory, relFlushFileRefs.getInsertIndexFileReference(), null, null,
+ true);
}
public class LSMRTreeWithAntiMatterTuplesBulkLoader implements IIndexBulkLoader {
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesComponentFactory.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesComponentFactory.java
index 0ca353b..0149800 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesComponentFactory.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesComponentFactory.java
@@ -32,7 +32,8 @@
@Override
public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException {
- return new LSMRTreeImmutableComponent(rtreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()), null);
+ return new LSMRTreeImmutableComponent(rtreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()),
+ null, null);
}
@Override
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushCursor.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFlushCursor.java
similarity index 96%
rename from hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushCursor.java
rename to hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFlushCursor.java
index 65f3f4b..22e6929 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushCursor.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesFlushCursor.java
@@ -25,7 +25,7 @@
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
-public class LSMRTreeFlushCursor implements ITreeIndexCursor {
+public class LSMRTreeWithAntiMatterTuplesFlushCursor implements ITreeIndexCursor {
private final TreeTupleSorter rTreeTupleSorter;
private final TreeTupleSorter bTreeTupleSorter;
private final int[] comparatorFields;
@@ -36,7 +36,7 @@
private ITupleReference btreeTuple;
private boolean foundNext = false;
- public LSMRTreeFlushCursor(TreeTupleSorter rTreeTupleSorter, TreeTupleSorter bTreeTupleSorter,
+ public LSMRTreeWithAntiMatterTuplesFlushCursor(TreeTupleSorter rTreeTupleSorter, TreeTupleSorter bTreeTupleSorter,
int[] comparatorFields, IBinaryComparatorFactory[] comparatorFactories) {
this.rTreeTupleSorter = rTreeTupleSorter;
this.bTreeTupleSorter = bTreeTupleSorter;
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/TreeTupleSorter.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/TreeTupleSorter.java
index d63cff9..294c2b8 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/TreeTupleSorter.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/TreeTupleSorter.java
@@ -29,6 +29,7 @@
import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
public class TreeTupleSorter implements ITreeIndexCursor {
+ private final static int INITIAL_SIZE = 1000000;
private int numTuples;
private int currentTupleIndex;
private int[] tPointers;
@@ -38,18 +39,18 @@
private ITreeIndexTupleReference frameTuple1;
private ITreeIndexTupleReference frameTuple2;
private final int fileId;
- private final static int ARRAY_GROWTH = 1000; // Must be at least of size 2
+ private final static int ARRAY_GROWTH = 1000000; // Must be at least of size 2
private final int[] comparatorFields;
private final MultiComparator cmp;
- public TreeTupleSorter(int initialSize, int fileId, IBinaryComparatorFactory[] comparatorFactories,
- ITreeIndexFrame leafFrame1, ITreeIndexFrame leafFrame2, IBufferCache bufferCache, int[] comparatorFields) {
+ public TreeTupleSorter(int fileId, IBinaryComparatorFactory[] comparatorFactories, ITreeIndexFrame leafFrame1,
+ ITreeIndexFrame leafFrame2, IBufferCache bufferCache, int[] comparatorFields) {
this.fileId = fileId;
this.leafFrame1 = leafFrame1;
this.leafFrame2 = leafFrame2;
this.bufferCache = bufferCache;
this.comparatorFields = comparatorFields;
- tPointers = new int[initialSize * 2];
+ tPointers = new int[INITIAL_SIZE * 2];
frameTuple1 = leafFrame1.createTupleReference();
frameTuple2 = leafFrame2.createTupleReference();
currentTupleIndex = 0;
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java
index b3a4ab0..6c9fce6 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java
@@ -22,6 +22,7 @@
import edu.uci.ics.hyracks.api.io.IIOManager;
import edu.uci.ics.hyracks.data.std.primitive.DoublePointable;
import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMLeafFrameFactory;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
@@ -105,13 +106,20 @@
int[] comparatorFields = { 0 };
IBinaryComparatorFactory[] linearizerArray = { linearizeCmpFactory };
+ int[] bloomFilterKeyFields = new int[btreeCmpFactories.length];
+ for (int i = 0; i < btreeCmpFactories.length; i++) {
+ bloomFilterKeyFields[i] = i;
+ }
+ BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider,
+ bloomFilterKeyFields);
+
ILSMIndexFileManager fileNameManager = new LSMRTreeFileManager(ioManager, diskFileMapProvider, file,
diskRTreeFactory, diskBTreeFactory, startIODeviceIndex);
LSMRTree lsmTree = new LSMRTree(memBufferCache, memFreePageManager, rtreeInteriorFrameFactory,
rtreeLeafFrameFactory, btreeInteriorFrameFactory, btreeLeafFrameFactory, fileNameManager,
- diskRTreeFactory, diskBTreeFactory, diskFileMapProvider, typeTraits.length, rtreeCmpFactories,
- btreeCmpFactories, linearizeCmpFactory, comparatorFields, linearizerArray, mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ diskRTreeFactory, diskBTreeFactory, bloomFilterFactory, diskFileMapProvider, typeTraits.length,
+ rtreeCmpFactories, btreeCmpFactories, linearizeCmpFactory, comparatorFields, linearizerArray,
+ mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider);
return lsmTree;
}
diff --git a/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeSearchCursor.java b/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeSearchCursor.java
index fd80071..6b5b1b5 100644
--- a/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeSearchCursor.java
+++ b/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeSearchCursor.java
@@ -33,7 +33,7 @@
private int fileId = -1;
private ICachedPage page = null;
private IRTreeInteriorFrame interiorFrame = null;
- private IRTreeLeafFrame leafFrame = null;
+ protected IRTreeLeafFrame leafFrame = null;
private IBufferCache bufferCache = null;
private SearchPredicate pred;
@@ -88,7 +88,7 @@
return page;
}
- private boolean fetchNextLeafPage() throws HyracksDataException {
+ protected boolean fetchNextLeafPage() throws HyracksDataException {
boolean succeeded = false;
if (readLatched) {
page.releaseReadLatch();
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/AbstractOperationCallbackTest.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/AbstractOperationCallbackTest.java
index 33ef61a..41dfdfe 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/AbstractOperationCallbackTest.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/AbstractOperationCallbackTest.java
@@ -12,6 +12,7 @@
@SuppressWarnings("rawtypes")
protected final ISerializerDeserializer[] keySerdes;
protected final MultiComparator cmp;
+ protected final int[] bloomFilterKeyFields;
protected IIndex index;
@@ -20,6 +21,10 @@
public AbstractOperationCallbackTest() {
this.keySerdes = new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE };
this.cmp = MultiComparator.create(SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length));
+ bloomFilterKeyFields = new int[NUM_KEY_FIELDS];
+ for (int i = 0; i < NUM_KEY_FIELDS; ++i) {
+ bloomFilterKeyFields[i] = i;
+ }
}
public void setup() throws Exception {
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
index 81f7fce..970526e 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
@@ -54,8 +54,8 @@
protected static final Logger LOGGER = Logger.getLogger(OrderedIndexExamplesTest.class.getName());
protected final Random rnd = new Random(50);
- protected abstract ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException;
+ protected abstract ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException;
/**
* Fixed-Length Key,Value Example. Create a tree index with one fixed-length
@@ -82,7 +82,11 @@
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -162,7 +166,11 @@
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -234,7 +242,12 @@
cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
cmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+ bloomFilterKeyFields[1] = 1;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -313,7 +326,11 @@
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -393,7 +410,11 @@
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -495,7 +516,11 @@
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -581,7 +606,12 @@
cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
cmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+ bloomFilterKeyFields[1] = 1;
+
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
@@ -649,9 +679,14 @@
Random rnd = new Random();
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
+
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[keyFieldCount];
+ bloomFilterKeyFields[0] = 0;
+
int ins = 1000;
for (int i = 1; i < ins; i++) {
- ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories);
+ ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
treeIndex.create();
treeIndex.activate();
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
index f739d33..fa22f6b 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
@@ -53,8 +53,8 @@
protected abstract void tearDown() throws HyracksDataException;
- protected abstract IIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException;
+ protected abstract IIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException;
protected abstract IIndexTestWorkerFactory getWorkerFactory();
@@ -75,7 +75,13 @@
ITypeTraits[] typeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] cmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes, numKeys);
- IIndex index = createIndex(typeTraits, cmpFactories);
+ // This is only used for the LSM-BTree.
+ int[] bloomFilterKeyFields = new int[numKeys];
+ for (int i = 0; i < numKeys; ++i) {
+ bloomFilterKeyFields[i] = i;
+ }
+
+ IIndex index = createIndex(typeTraits, cmpFactories, bloomFilterKeyFields);
IIndexTestWorkerFactory workerFactory = getWorkerFactory();
// 4 batches per thread.
diff --git a/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/BloomFilterTest.java b/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/BloomFilterTest.java
index 8856da1..6dab32c 100644
--- a/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/BloomFilterTest.java
+++ b/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/edu/uci/ics/hyracks/storage/am/bloomfilter/BloomFilterTest.java
@@ -31,7 +31,9 @@
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomCalculations;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
+import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification;
import edu.uci.ics.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
@@ -55,14 +57,19 @@
int numElements = 100;
int[] keyFields = { 0 };
- int numHashes = 5;
BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(),
keyFields);
+ double acceptanleFalsePositiveRate = 0.1;
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ acceptanleFalsePositiveRate);
+
bf.create();
bf.activate();
- IIndexBulkLoader builder = bf.createBuilder(numElements, numHashes);
+ IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(),
+ bloomFilterSpec.getNumBucketsPerElements());
int fieldCount = 2;
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
@@ -109,14 +116,19 @@
int numElements = 10000;
int[] keyFields = { 2, 4, 1 };
- int numHashes = 10;
BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(),
keyFields);
+ double acceptanleFalsePositiveRate = 0.1;
+ int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
+ BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
+ acceptanleFalsePositiveRate);
+
bf.create();
bf.activate();
- IIndexBulkLoader builder = bf.createBuilder(numElements, numHashes);
+ IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(),
+ bloomFilterSpec.getNumBucketsPerElements());
int fieldCount = 5;
ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
diff --git a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeExamplesTest.java b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeExamplesTest.java
index 6d9d565..c02d53d 100644
--- a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeExamplesTest.java
+++ b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeExamplesTest.java
@@ -40,8 +40,8 @@
harness.tearDown();
}
- protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException {
+ protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException {
return BTreeUtils.createBTree(harness.getBufferCache(), harness.getFileMapProvider(), typeTraits, cmpFactories,
BTreeLeafFrameType.REGULAR_NSM, harness.getFileReference());
}
diff --git a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeMultiThreadTest.java b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeMultiThreadTest.java
index 746e885..3f38c05 100644
--- a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeMultiThreadTest.java
+++ b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeMultiThreadTest.java
@@ -47,8 +47,8 @@
}
@Override
- protected ITreeIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException {
+ protected ITreeIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException {
return BTreeUtils.createBTree(harness.getBufferCache(), harness.getFileMapProvider(), typeTraits, cmpFactories,
BTreeLeafFrameType.REGULAR_NSM, harness.getFileReference());
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java
index 644f348..539ed3e 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java
@@ -32,12 +32,12 @@
private final LSMBTreeTestHarness harness = new LSMBTreeTestHarness();
@Override
- protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException {
+ protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException {
return LSMBTreeUtils.createLSMTree(harness.getMemBufferCache(), harness.getMemFreePageManager(),
harness.getIOManager(), harness.getFileReference(), harness.getDiskBufferCache(),
- harness.getDiskFileMapProvider(), typeTraits, cmpFactories, harness.getMergePolicy(),
- harness.getOperationTrackerFactory(), harness.getIOScheduler(),
+ harness.getDiskFileMapProvider(), typeTraits, cmpFactories, bloomFilterKeyFields,
+ harness.getMergePolicy(), harness.getOperationTrackerFactory(), harness.getIOScheduler(),
harness.getIOOperationCallbackProvider());
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java
index 978ff86..648e70f 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.lsm.btree;
import org.junit.Test;
@@ -30,8 +45,8 @@
index = LSMBTreeUtils.createLSMTree(harness.getMemBufferCache(), harness.getMemFreePageManager(),
harness.getIOManager(), harness.getFileReference(), harness.getDiskBufferCache(),
harness.getDiskFileMapProvider(), SerdeUtils.serdesToTypeTraits(keySerdes),
- SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length), harness.getMergePolicy(),
- NoOpOperationTrackerFactory.INSTANCE, harness.getIOScheduler(),
+ SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length), bloomFilterKeyFields,
+ harness.getMergePolicy(), NoOpOperationTrackerFactory.INSTANCE, harness.getIOScheduler(),
harness.getIOOperationCallbackProvider());
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java
index 64f58c3..f8ad0b2 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java
@@ -35,8 +35,8 @@
index = LSMBTreeUtils.createLSMTree(harness.getMemBufferCache(), harness.getMemFreePageManager(),
harness.getIOManager(), harness.getFileReference(), harness.getDiskBufferCache(),
harness.getDiskFileMapProvider(), SerdeUtils.serdesToTypeTraits(keySerdes),
- SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length), harness.getMergePolicy(),
- NoOpOperationTrackerFactory.INSTANCE, harness.getIOScheduler(),
+ SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length), bloomFilterKeyFields,
+ harness.getMergePolicy(), NoOpOperationTrackerFactory.INSTANCE, harness.getIOScheduler(),
harness.getIOOperationCallbackProvider());
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java
index 0a0b201..c494448 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java
@@ -48,12 +48,12 @@
}
@Override
- protected ITreeIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories)
- throws TreeIndexException {
+ protected ITreeIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
+ int[] bloomFilterKeyFields) throws TreeIndexException {
return LSMBTreeUtils.createLSMTree(harness.getMemBufferCache(), harness.getMemFreePageManager(),
harness.getIOManager(), harness.getFileReference(), harness.getDiskBufferCache(),
- harness.getDiskFileMapProvider(), typeTraits, cmpFactories, harness.getMergePolicy(),
- harness.getOperationTrackerFactory(), harness.getIOScheduler(),
+ harness.getDiskFileMapProvider(), typeTraits, cmpFactories, bloomFilterKeyFields,
+ harness.getMergePolicy(), harness.getOperationTrackerFactory(), harness.getIOScheduler(),
harness.getIOOperationCallbackProvider());
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
index 62186c2..5d2185a 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
@@ -74,7 +74,8 @@
private final int onDiskNumPages;
public LSMTreeRunner(int numBatches, int inMemPageSize, int inMemNumPages, int onDiskPageSize, int onDiskNumPages,
- ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories) throws BTreeException, HyracksException {
+ ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories, int[] bloomFilterKeyFields)
+ throws BTreeException, HyracksException {
this.numBatches = numBatches;
this.onDiskPageSize = onDiskPageSize;
@@ -95,8 +96,8 @@
new LIFOMetaDataFrameFactory());
this.ioScheduler = SynchronousScheduler.INSTANCE;
lsmtree = LSMBTreeUtils.createLSMTree(memBufferCache, memFreePageManager, ioManager, file, bufferCache, fmp,
- typeTraits, cmpFactories, NoMergePolicy.INSTANCE, ThreadCountingOperationTrackerFactory.INSTANCE,
- ioScheduler, NoOpIOOperationCallback.INSTANCE);
+ typeTraits, cmpFactories, bloomFilterKeyFields, NoMergePolicy.INSTANCE,
+ ThreadCountingOperationTrackerFactory.INSTANCE, ioScheduler, NoOpIOOperationCallback.INSTANCE);
}
@Override
diff --git a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java
index e15845f..f790fde 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java
@@ -71,9 +71,13 @@
throws Exception {
ITypeTraits[] typeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] cmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes, numKeyFields);
+ int[] bloomFilterKeyFields = new int[numKeyFields];
+ for (int i = 0; i < numKeyFields; ++i) {
+ bloomFilterKeyFields[i] = i;
+ }
LSMBTree lsmTree = LSMBTreeUtils.createLSMTree(memBufferCache, memFreePageManager, ioManager, file,
- diskBufferCache, diskFileMapProvider, typeTraits, cmpFactories, mergePolicy, opTrackerFactory,
- ioScheduler, ioOpCallbackProvider);
+ diskBufferCache, diskFileMapProvider, typeTraits, cmpFactories, bloomFilterKeyFields, mergePolicy,
+ opTrackerFactory, ioScheduler, ioOpCallbackProvider);
LSMBTreeTestContext testCtx = new LSMBTreeTestContext(fieldSerdes, lsmTree);
return testCtx;
}