Added range search cursor for on-disk inverted index in preparation for implementing merge.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1842 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/tuples/ConcatenatingTupleReference.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/tuples/ConcatenatingTupleReference.java
index ca1fa1e..d82004c 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/tuples/ConcatenatingTupleReference.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/tuples/ConcatenatingTupleReference.java
@@ -17,6 +17,8 @@
import java.util.Arrays;
+import javax.swing.plaf.basic.BasicInternalFrameTitlePane.MaximizeAction;
+
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
/**
@@ -58,6 +60,14 @@
}
}
+ public int getNumTuples() {
+ return numTuples;
+ }
+
+ public boolean hasMaxTuples() {
+ return numTuples == tuples.length;
+ }
+
@Override
public int getFieldCount() {
return totalFieldCount;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java
index d5266ff..3fe5b57 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java
@@ -18,6 +18,8 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
public interface IInvertedIndexAccessor extends IIndexAccessor {
@@ -25,4 +27,9 @@
public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey)
throws HyracksDataException, IndexException;
+
+ public IIndexCursor createRangeSearchCursor();
+
+ public void rangeSearch(IIndexCursor cursor, ISearchPredicate searchPred) throws IndexException,
+ HyracksDataException;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
index c76799f..c7d6075 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
@@ -94,6 +94,19 @@
}
@Override
+ public IIndexCursor createRangeSearchCursor() {
+ return new LSMInvertedIndexRangeSearchCursor();
+ }
+
+ @Override
+ public void rangeSearch(IIndexCursor cursor, ISearchPredicate searchPred) throws IndexException,
+ HyracksDataException {
+ // TODO: Figure out what the initial state is here.
+ //LSMInvertedIndexRangeSearchCursor rangeSearchCursor = (LSMInvertedIndexRangeSearchCursor) cursor;
+ //rangeSearchCursor.open(initialState, searchPred);
+ }
+
+ @Override
public void physicalDelete(ITupleReference tuple) throws HyracksDataException, IndexException {
// TODO: Do we need this?
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
index 8dac1d0..a3e7bf1 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2010 by The Regents of the University of California
+ * Copyright 2009-2012 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
index c0acf81..7cc33e5 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
@@ -18,7 +18,9 @@
import edu.uci.ics.hyracks.api.context.IHyracksCommonContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeRangeSearchCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexOpContext;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
@@ -76,6 +78,18 @@
index.openInvertedListCursor(listCursor, searchKey, opCtx);
}
+ @Override
+ public IIndexCursor createRangeSearchCursor() {
+ IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) index.getBTree().getLeafFrameFactory().createFrame();
+ return new BTreeRangeSearchCursor(leafFrame, false);
+ }
+
+ @Override
+ public void rangeSearch(IIndexCursor cursor, ISearchPredicate searchPred) throws IndexException,
+ HyracksDataException {
+ btreeAccessor.search(cursor, searchPred);
+ }
+
public BTreeAccessor getBTreeAccessor() {
return btreeAccessor;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
index bd88c8d..eecb0cd 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
@@ -463,6 +463,17 @@
}
@Override
+ public IIndexCursor createRangeSearchCursor() {
+ return new OnDiskInvertedIndexRangeSearchCursor(index, opCtx);
+ }
+
+ @Override
+ public void rangeSearch(IIndexCursor cursor, ISearchPredicate searchPred) throws HyracksDataException {
+ OnDiskInvertedIndexRangeSearchCursor rangeSearchCursor = (OnDiskInvertedIndexRangeSearchCursor) cursor;
+ rangeSearchCursor.open(null, searchPred);
+ }
+
+ @Override
public void insert(ITupleReference tuple) throws HyracksDataException, IndexException {
throw new UnsupportedOperationException("Insert not supported by inverted index.");
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java
new file mode 100644
index 0000000..e4c9110
--- /dev/null
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
+import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexOpContext;
+import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
+import edu.uci.ics.hyracks.storage.am.common.tuples.ConcatenatingTupleReference;
+import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+
+/**
+ * Scans a range of tokens, returning tuples containing a token and an inverted-list element.
+ */
+public class OnDiskInvertedIndexRangeSearchCursor implements IIndexCursor {
+
+ private final BTree btree;
+ private final IIndexAccessor btreeAccessor;
+ private final IInvertedIndex invIndex;
+ private final IIndexOpContext opCtx;
+ private final IInvertedListCursor invListCursor;
+ private boolean unpinNeeded;
+
+ private final IIndexCursor btreeCursor;
+ private RangePredicate btreePred;
+
+ private final PermutingTupleReference tokenTuple;
+ private ConcatenatingTupleReference concatTuple;
+
+ public OnDiskInvertedIndexRangeSearchCursor(IInvertedIndex invIndex, IIndexOpContext opCtx) {
+ this.btree = ((OnDiskInvertedIndex) invIndex).getBTree();
+ this.btreeAccessor = btree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ this.invIndex = invIndex;
+ this.opCtx = opCtx;
+ // Project away non-token fields of the BTree tuples.
+ int[] fieldPermutation = new int[invIndex.getTokenTypeTraits().length];
+ for (int i = 0; i < invIndex.getTokenTypeTraits().length; i++) {
+ fieldPermutation[i] = i;
+ }
+ tokenTuple = new PermutingTupleReference(fieldPermutation);
+ btreeCursor = btreeAccessor.createSearchCursor();
+ concatTuple = new ConcatenatingTupleReference(2);
+ invListCursor = invIndex.createInvertedListCursor();
+ unpinNeeded = false;
+ }
+
+ @Override
+ public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
+ this.btreePred = (RangePredicate) searchPred;
+ try {
+ btreeAccessor.search(btreeCursor, btreePred);
+ } catch (IndexException e) {
+ throw new HyracksDataException(e);
+ }
+ invListCursor.pinPages();
+ }
+
+ @Override
+ public boolean hasNext() throws HyracksDataException {
+ if (invListCursor.hasNext()) {
+ return true;
+ }
+ invListCursor.unpinPages();
+ unpinNeeded = false;
+ if (!btreeCursor.hasNext()) {
+ return false;
+ }
+ btreeCursor.next();
+ tokenTuple.reset(btreeCursor.getTuple());
+ try {
+ invIndex.openInvertedListCursor(invListCursor, tokenTuple, opCtx);
+ } catch (IndexException e) {
+ throw new HyracksDataException(e);
+ }
+ invListCursor.pinPages();
+ unpinNeeded = true;
+ concatTuple.reset();
+ concatTuple.addTuple(tokenTuple);
+ return true;
+ }
+
+ @Override
+ public void next() throws HyracksDataException {
+ invListCursor.next();
+ if (concatTuple.hasMaxTuples()) {
+ concatTuple.removeLastTuple();
+ }
+ concatTuple.addTuple(invListCursor.getTuple());
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ if (unpinNeeded) {
+ invListCursor.unpinPages();
+ }
+ btreeCursor.close();
+ }
+
+ @Override
+ public void reset() throws HyracksDataException {
+ if (unpinNeeded) {
+ invListCursor.unpinPages();
+ }
+ open(null, btreePred);
+ }
+
+ @Override
+ public ITupleReference getTuple() {
+ return concatTuple;
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
index a39ec46..d5696c5 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
@@ -30,6 +30,7 @@
import java.util.SortedSet;
import java.util.TreeSet;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.data.std.util.ByteArrayAccessibleOutputStream;
@@ -39,19 +40,20 @@
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.btree.OrderedIndexTestUtils;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.CheckTuple;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.datagen.DocumentStringFieldValueGenerator;
import edu.uci.ics.hyracks.storage.am.common.datagen.IFieldValueGenerator;
import edu.uci.ics.hyracks.storage.am.common.datagen.SortedIntegerFieldValueGenerator;
import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
-import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizerFactory;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
@@ -122,6 +124,52 @@
}
}
+ public static void compareActualAndExpectedIndexes(InvertedIndexTestContext testCtx) throws HyracksDataException,
+ IndexException {
+ IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
+ int tokenFieldCount = invIndex.getTokenTypeTraits().length;
+ int invListFieldCount = invIndex.getInvListTypeTraits().length;
+ IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) invIndex.createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ IIndexCursor invIndexCursor = invIndexAccessor.createRangeSearchCursor();
+ MultiComparator tokenCmp = MultiComparator.create(invIndex.getTokenCmpFactories());
+ IBinaryComparatorFactory[] tupleCmpFactories = new IBinaryComparatorFactory[tokenFieldCount + invListFieldCount];
+ for (int i = 0; i < tokenFieldCount; i++) {
+ tupleCmpFactories[i] = invIndex.getTokenCmpFactories()[i];
+ }
+ for (int i = 0; i < invListFieldCount; i++) {
+ tupleCmpFactories[tokenFieldCount + i] = invIndex.getInvListCmpFactories()[i];
+ }
+ MultiComparator tupleCmp = MultiComparator.create(tupleCmpFactories);
+ RangePredicate nullPred = new RangePredicate(null, null, true, true, tokenCmp, tokenCmp);
+ invIndexAccessor.rangeSearch(invIndexCursor, nullPred);
+
+ // Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
+ ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
+ ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
+ ArrayTupleReference expectedTuple = new ArrayTupleReference();
+
+ Iterator<CheckTuple> expectedIter = testCtx.getCheckTuples().iterator();
+
+ // Compare index elements.
+ int count = 0;
+ try {
+ while (invIndexCursor.hasNext() && expectedIter.hasNext()) {
+ invIndexCursor.next();
+ ITupleReference actualTuple = invIndexCursor.getTuple();
+ CheckTuple expected = expectedIter.next();
+ OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, expectedTuple, fieldSerdes);
+ if (tupleCmp.compare(actualTuple, expectedTuple) != 0) {
+ fail("Index elements differ for token '" + expected.getField(0) + "' at position " + count + ".");
+ }
+ count++;
+ }
+ } finally {
+ invIndexCursor.close();
+ }
+ }
+
+ /*
@SuppressWarnings("unchecked")
public static void compareActualAndExpectedIndexes(InvertedIndexTestContext testCtx) throws HyracksDataException,
IndexException {
@@ -194,6 +242,7 @@
}
}
}
+ */
/**
* Determine the expected results with the simple ScanCount algorithm.
@@ -245,7 +294,7 @@
}
}
}
-
+
/*
public static OnDiskInvertedIndex createTestInvertedIndex(LSMInvertedIndexTestHarness harness, IBinaryTokenizer tokenizer)
throws HyracksDataException {