Implemented in-memory component for length-partitioned inverted indexes.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_lsm_length_filter@2466 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
index 4f667b2..c1cab0a 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
@@ -204,6 +204,7 @@
}
bufferCache.unpin(page);
}
+ tupleBuilder.reset();
tupleIndex = 0;
page = null;
pred = null;
@@ -216,7 +217,7 @@
close();
} catch (Exception e) {
e.printStackTrace();
- }
+ }
}
@Override
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java
index c62cc57..de703ac 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java
@@ -44,7 +44,7 @@
public int getStartOff();
public boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp) throws HyracksDataException, IndexException;
-
+
// for debugging
@SuppressWarnings("rawtypes")
public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException, IndexException;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IPartitionedInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IPartitionedInvertedIndex.java
new file mode 100644
index 0000000..013cff4
--- /dev/null
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IPartitionedInvertedIndex.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api;
+
+import java.util.ArrayList;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedListPartitions;
+
+public interface IPartitionedInvertedIndex {
+ public void openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
+ int numTokensLowerBound, int numTokensUpperBound, InvertedListPartitions invListPartitions)
+ throws HyracksDataException, IndexException;
+
+ public void cleanupPartitionState(ArrayList<IInvertedListCursor> invListCursors) throws HyracksDataException;
+}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index 851451e..2ee1fe1 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -312,7 +312,7 @@
/**
* The keys in the in-memory deleted-keys BTree only refer to on-disk components.
* We delete documents from the in-memory inverted index by deleting its entries directly,
- * and do NOT add the deleted key to the deleted-keys BTree.
+ * while still adding the deleted key to the deleted-keys BTree.
* Otherwise, inserts would have to remove keys from the in-memory deleted-keys BTree which
* may cause incorrect behavior (lost deletes) in the following pathological case:
* Insert doc 1, flush, delete doc 1, insert doc 1
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
index f0af06a..668250c 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
@@ -44,16 +44,16 @@
public class InMemoryInvertedIndex implements IInvertedIndex {
- private final BTree btree;
- private final FileReference memBTreeFile = new FileReference(new File("memBTree"));
- private final ITypeTraits[] tokenTypeTraits;
- private final IBinaryComparatorFactory[] tokenCmpFactories;
- private final ITypeTraits[] invListTypeTraits;
- private final IBinaryComparatorFactory[] invListCmpFactories;
- private final IBinaryTokenizerFactory tokenizerFactory;
+ protected final BTree btree;
+ protected final FileReference memBTreeFile = new FileReference(new File("memBTree"));
+ protected final ITypeTraits[] tokenTypeTraits;
+ protected final IBinaryComparatorFactory[] tokenCmpFactories;
+ protected final ITypeTraits[] invListTypeTraits;
+ protected final IBinaryComparatorFactory[] invListCmpFactories;
+ protected final IBinaryTokenizerFactory tokenizerFactory;
- private final ITypeTraits[] btreeTypeTraits;
- private final IBinaryComparatorFactory[] btreeCmpFactories;
+ protected final ITypeTraits[] btreeTypeTraits;
+ protected final IBinaryComparatorFactory[] btreeCmpFactories;
public InMemoryInvertedIndex(IBufferCache memBufferCache, IFreePageManager memFreePageManager,
ITypeTraits[] invListTypeTraits, IBinaryComparatorFactory[] invListCmpFactories,
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
index 5a352c3..a62aaf1 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
@@ -46,7 +46,7 @@
public InMemoryInvertedIndexAccessor(InMemoryInvertedIndex index, IIndexOperationContext opCtx) {
this.opCtx = opCtx;
this.index = index;
- this.searcher = new TOccurrenceSearcher(hyracksCtx, index);
+ this.searcher = createSearcher();
this.btreeAccessor = (BTreeAccessor) index.getBTree().createAccessor(NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
}
@@ -62,7 +62,7 @@
opCtx.setOperation(IndexOperation.DELETE);
index.delete(tuple, btreeAccessor, opCtx);
}
-
+
@Override
public IIndexCursor createSearchCursor() {
return new OnDiskInvertedIndexSearchCursor(searcher, index.getInvListTypeTraits().length);
@@ -89,13 +89,13 @@
IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) index.getBTree().getLeafFrameFactory().createFrame();
return new BTreeRangeSearchCursor(leafFrame, false);
}
-
+
@Override
public void rangeSearch(IIndexCursor cursor, ISearchPredicate searchPred) throws IndexException,
HyracksDataException {
btreeAccessor.search(cursor, searchPred);
}
-
+
public BTreeAccessor getBTreeAccessor() {
return btreeAccessor;
}
@@ -109,4 +109,8 @@
public void upsert(ITupleReference tuple) throws HyracksDataException, IndexException {
throw new UnsupportedOperationException("Upsert not supported by in-memory inverted index.");
}
+
+ protected IInvertedIndexSearcher createSearcher() {
+ return new TOccurrenceSearcher(hyracksCtx, index);
+ }
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexOpContext.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexOpContext.java
index 4a77e52..c109cfd 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexOpContext.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexOpContext.java
@@ -39,7 +39,7 @@
public MultiComparator tokenFieldsCmp;
// To generate in-memory BTree tuples for insertions.
- private final IBinaryTokenizerFactory tokenizerFactory;
+ protected final IBinaryTokenizerFactory tokenizerFactory;
public InvertedIndexTokenizingTupleIterator tupleIter;
public InMemoryInvertedIndexOpContext(BTree btree, IBinaryComparatorFactory[] tokenCmpFactories,
@@ -52,19 +52,16 @@
@Override
public void setOperation(IndexOperation newOp) {
switch (newOp) {
- case INSERT:
+ case INSERT:
case DELETE: {
if (tupleIter == null) {
- IBinaryTokenizer tokenizer = tokenizerFactory.createTokenizer();
- tupleIter = new InvertedIndexTokenizingTupleIterator(tokenCmpFactories.length,
- btree.getFieldCount() - tokenCmpFactories.length, tokenizer);
+ setTokenizingTupleIterator();
}
break;
}
case SEARCH: {
if (btreePred == null) {
btreePred = new RangePredicate(null, null, true, true, null, null);
- // TODO: Ignore opcallbacks for now.
btreeAccessor = (BTreeAccessor) btree.createAccessor(NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
btreeCmp = MultiComparator.create(btree.getComparatorFactories());
@@ -88,4 +85,10 @@
public IndexOperation getOperation() {
return op;
}
+
+ protected void setTokenizingTupleIterator() {
+ IBinaryTokenizer tokenizer = tokenizerFactory.createTokenizer();
+ tupleIter = new InvertedIndexTokenizingTupleIterator(tokenCmpFactories.length, btree.getFieldCount()
+ - tokenCmpFactories.length, tokenizer);
+ }
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
index 21e2bf7..41dc7d8 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
@@ -63,15 +63,18 @@
}
public void prepare(BTreeAccessor btreeAccessor, RangePredicate btreePred, MultiComparator tokenFieldsCmp,
- MultiComparator btreeCmp) {
- this.btreeAccessor = btreeAccessor;
- this.btreeCursor = btreeAccessor.createSearchCursor();
- this.countingCursor = btreeAccessor.createCountingSearchCursor();
- this.btreePred = btreePred;
- this.btreePred.setLowKeyComparator(tokenFieldsCmp);
- this.btreePred.setHighKeyComparator(tokenFieldsCmp);
- this.tokenFieldsCmp = tokenFieldsCmp;
- this.btreeCmp = btreeCmp;
+ MultiComparator btreeCmp) throws HyracksDataException, IndexException {
+ // Avoid object creation if this.btreeAccessor == btreeAccessor.
+ if (this.btreeAccessor != btreeAccessor) {
+ this.btreeAccessor = btreeAccessor;
+ this.btreeCursor = btreeAccessor.createSearchCursor();
+ this.countingCursor = btreeAccessor.createCountingSearchCursor();
+ this.btreePred = btreePred;
+ this.btreePred.setLowKeyComparator(tokenFieldsCmp);
+ this.btreePred.setHighKeyComparator(tokenFieldsCmp);
+ this.tokenFieldsCmp = tokenFieldsCmp;
+ this.btreeCmp = btreeCmp;
+ }
}
@Override
@@ -97,6 +100,10 @@
@Override
public void pinPages() throws HyracksDataException, IndexException {
+ if (cursorNeedsClose) {
+ // Cursor has already been positioned.
+ return;
+ }
btreePred.setLowKeyComparator(tokenFieldsCmp);
btreePred.setHighKeyComparator(tokenFieldsCmp);
btreePred.setLowKey(tokenTuple, true);
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndex.java
new file mode 100644
index 0000000..43bf439
--- /dev/null
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndex.java
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import java.util.ArrayList;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeException;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.common.api.IModificationOperationCallback;
+import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearcher;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IPartitionedInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedListPartitions;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.PartitionedTOccurrenceSearcher;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.PartitionedInvertedIndexTokenizingTupleIterator;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+
+public class PartitionedInMemoryInvertedIndex extends InMemoryInvertedIndex implements IPartitionedInvertedIndex {
+
+ protected int minPartitionIndex = Integer.MAX_VALUE;
+ protected int maxPartitionIndex = Integer.MIN_VALUE;
+
+ public PartitionedInMemoryInvertedIndex(IBufferCache memBufferCache, IFreePageManager memFreePageManager,
+ ITypeTraits[] invListTypeTraits, IBinaryComparatorFactory[] invListCmpFactories,
+ ITypeTraits[] tokenTypeTraits, IBinaryComparatorFactory[] tokenCmpFactories,
+ IBinaryTokenizerFactory tokenizerFactory) throws BTreeException {
+ super(memBufferCache, memFreePageManager, invListTypeTraits, invListCmpFactories, tokenTypeTraits,
+ tokenCmpFactories, tokenizerFactory);
+ }
+
+ @Override
+ public void insert(ITupleReference tuple, BTreeAccessor btreeAccessor, IIndexOperationContext ictx)
+ throws HyracksDataException, IndexException {
+ super.insert(tuple, btreeAccessor, ictx);
+ PartitionedInMemoryInvertedIndexOpContext ctx = (PartitionedInMemoryInvertedIndexOpContext) ictx;
+ PartitionedInvertedIndexTokenizingTupleIterator tupleIter = (PartitionedInvertedIndexTokenizingTupleIterator) ctx.tupleIter;
+ updatePartitionIndexes(tupleIter.getNumTokens());
+ }
+
+ @Override
+ public void clear() throws HyracksDataException {
+ super.clear();
+ minPartitionIndex = Integer.MAX_VALUE;
+ maxPartitionIndex = Integer.MIN_VALUE;
+ }
+
+ public synchronized void updatePartitionIndexes(int numTokens) {
+ if (numTokens < minPartitionIndex) {
+ minPartitionIndex = numTokens;
+ }
+ if (numTokens > maxPartitionIndex) {
+ maxPartitionIndex = numTokens;
+ }
+ }
+
+ @Override
+ public IIndexAccessor createAccessor(IModificationOperationCallback modificationCallback,
+ ISearchOperationCallback searchCallback) {
+ return new PartitionedInMemoryInvertedIndexAccessor(this, new PartitionedInMemoryInvertedIndexOpContext(btree,
+ tokenCmpFactories, tokenizerFactory));
+ }
+
+ @Override
+ public void openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
+ int numTokensLowerBound, int numTokensUpperBound, InvertedListPartitions invListPartitions)
+ throws HyracksDataException, IndexException {
+ if (minPartitionIndex == Integer.MAX_VALUE || maxPartitionIndex == Integer.MIN_VALUE) {
+ // Index must be empty.
+ return;
+ }
+ int partitionStartIndex = minPartitionIndex;
+ int partitionEndIndex = maxPartitionIndex;
+ if (numTokensLowerBound >= 0) {
+ partitionStartIndex = Math.max(minPartitionIndex, numTokensLowerBound);
+ }
+ if (numTokensUpperBound >= 0) {
+ partitionEndIndex = Math.min(maxPartitionIndex, numTokensUpperBound);
+ }
+
+ PartitionedTOccurrenceSearcher partSearcher = (PartitionedTOccurrenceSearcher) searcher;
+ PartitionedInMemoryInvertedIndexOpContext ctx = (PartitionedInMemoryInvertedIndexOpContext) ictx;
+ ctx.setOperation(IndexOperation.SEARCH);
+ // We can pick either of the full low or high search key, since they should be identical here.
+ ITupleReference searchKey = partSearcher.getFullLowSearchKey();
+ ctx.btreePred.setLowKey(searchKey, true);
+ ctx.btreePred.setHighKey(searchKey, true);
+ // Go through all possibly partitions and see if the token matches.
+ // TODO: This procedure could be made more efficient by determining the next partition to search
+ // using the last existing partition and re-searching the BTree with an open interval as low key.
+ for (int i = partitionStartIndex; i <= partitionEndIndex; i++) {
+ partSearcher.setNumTokensBoundsInSearchKeys(i, i);
+ InMemoryInvertedListCursor inMemListCursor = (InMemoryInvertedListCursor) partSearcher
+ .getCachedInvertedListCursor();
+ inMemListCursor.prepare(ctx.btreeAccessor, ctx.btreePred, ctx.tokenFieldsCmp, ctx.btreeCmp);
+ inMemListCursor.reset(searchKey);
+ inMemListCursor.pinPages();
+ if (inMemListCursor.hasNext()) {
+ invListPartitions.addInvertedListCursor(inMemListCursor, i);
+ // Leave the cursor "pinned" to avoid re-traversing the underlying BTree when merging the inverted lists.
+ } else {
+ inMemListCursor.unpinPages();
+ }
+ }
+ }
+
+ @Override
+ public void cleanupPartitionState(ArrayList<IInvertedListCursor> invListCursors) throws HyracksDataException {
+ // Make sure to unpin/close cursors if the entire partition is pruned.
+ for (IInvertedListCursor cursor : invListCursors) {
+ cursor.unpinPages();
+ }
+ }
+}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexAccessor.java
new file mode 100644
index 0000000..813961c
--- /dev/null
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexAccessor.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearcher;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.PartitionedTOccurrenceSearcher;
+
+public class PartitionedInMemoryInvertedIndexAccessor extends InMemoryInvertedIndexAccessor {
+
+ public PartitionedInMemoryInvertedIndexAccessor(InMemoryInvertedIndex index, IIndexOperationContext opCtx) {
+ super(index, opCtx);
+ }
+
+ protected IInvertedIndexSearcher createSearcher() {
+ return new PartitionedTOccurrenceSearcher(hyracksCtx, index);
+ }
+}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexOpContext.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexOpContext.java
new file mode 100644
index 0000000..f0e5046
--- /dev/null
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexOpContext.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.PartitionedInvertedIndexTokenizingTupleIterator;
+
+public class PartitionedInMemoryInvertedIndexOpContext extends InMemoryInvertedIndexOpContext {
+
+ public PartitionedInMemoryInvertedIndexOpContext(BTree btree, IBinaryComparatorFactory[] tokenCmpFactories,
+ IBinaryTokenizerFactory tokenizerFactory) {
+ super(btree, tokenCmpFactories, tokenizerFactory);
+ }
+
+ protected void setTokenizingTupleIterator() {
+ IBinaryTokenizer tokenizer = tokenizerFactory.createTokenizer();
+ tupleIter = new PartitionedInvertedIndexTokenizingTupleIterator(tokenCmpFactories.length, btree.getFieldCount()
+ - tokenCmpFactories.length, tokenizer);
+ }
+}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
index 6614be0..f9a146d 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
@@ -15,23 +15,29 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+import java.util.ArrayList;
+
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.common.api.IModificationOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearcher;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IPartitionedInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedListPartitions;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.PartitionedTOccurrenceSearcher;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
-public class PartitionedOnDiskInvertedIndex extends OnDiskInvertedIndex {
+public class PartitionedOnDiskInvertedIndex extends OnDiskInvertedIndex implements IPartitionedInvertedIndex {
protected final int PARTITIONING_NUM_TOKENS_FIELD = 1;
@@ -56,19 +62,28 @@
return new PartitionedOnDiskInvertedIndexAccessor(this);
}
- public void openInvertedListPartitionCursors(InvertedListPartitions invListPartitions,
- ITupleReference lowSearchKey, ITupleReference highSearchKey, IIndexOperationContext ictx)
+ @Override
+ public void openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
+ int numTokensLowerBound, int numTokensUpperBound, InvertedListPartitions invListPartitions)
throws HyracksDataException, IndexException {
+ PartitionedTOccurrenceSearcher partSearcher = (PartitionedTOccurrenceSearcher) searcher;
OnDiskInvertedIndexOpContext ctx = (OnDiskInvertedIndexOpContext) ictx;
- if (lowSearchKey.getFieldCount() == 1) {
+ ITupleReference lowSearchKey = null;
+ ITupleReference highSearchKey = null;
+ partSearcher.setNumTokensBoundsInSearchKeys(numTokensLowerBound, numTokensUpperBound);
+ if (numTokensLowerBound < 0) {
ctx.btreePred.setLowKeyComparator(ctx.prefixSearchCmp);
+ lowSearchKey = partSearcher.getPrefixSearchKey();
} else {
ctx.btreePred.setLowKeyComparator(ctx.searchCmp);
+ lowSearchKey = partSearcher.getFullLowSearchKey();
}
- if (highSearchKey.getFieldCount() == 1) {
+ if (numTokensUpperBound < 0) {
ctx.btreePred.setHighKeyComparator(ctx.prefixSearchCmp);
+ highSearchKey = partSearcher.getPrefixSearchKey();
} else {
ctx.btreePred.setHighKeyComparator(ctx.searchCmp);
+ highSearchKey = partSearcher.getFullHighSearchKey();
}
ctx.btreePred.setLowKey(lowSearchKey, true);
ctx.btreePred.setHighKey(highSearchKey, true);
@@ -77,11 +92,21 @@
while (ctx.btreeCursor.hasNext()) {
ctx.btreeCursor.next();
ITupleReference btreeTuple = ctx.btreeCursor.getTuple();
- invListPartitions.addInvertedListCursor(btreeTuple);
+ int numTokens = IntegerSerializerDeserializer.getInt(
+ btreeTuple.getFieldData(PARTITIONING_NUM_TOKENS_FIELD),
+ btreeTuple.getFieldStart(PARTITIONING_NUM_TOKENS_FIELD));
+ IInvertedListCursor invListCursor = partSearcher.getCachedInvertedListCursor();
+ resetInvertedListCursor(btreeTuple, invListCursor);
+ invListPartitions.addInvertedListCursor(invListCursor, numTokens);
}
} finally {
ctx.btreeCursor.close();
ctx.btreeCursor.reset();
}
}
+
+ @Override
+ public void cleanupPartitionState(ArrayList<IInvertedListCursor> invListCursors) throws HyracksDataException {
+ // Do nothing.
+ }
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java
index 70071a1..99e61bd 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java
@@ -18,28 +18,25 @@
import java.util.ArrayList;
import java.util.Arrays;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IObjectFactory;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.ObjectCache;
public class InvertedListPartitions {
- private final int PARTITIONING_NUM_TOKENS_FIELD = 1;
private final int DEFAULT_NUM_PARTITIONS = 10;
- private final int PARTITIONS_SLACK_SIZE = 10;
- private final OnDiskInvertedIndex invIndex;
- private final ObjectCache<IInvertedListCursor> invListCursorCache;
+ private final int PARTITIONS_SLACK_SIZE = 10;
+ private final int OBJECT_CACHE_INIT_SIZE = 10;
+ private final int OBJECT_CACHE_EXPAND_SIZE = 10;
+ private final IObjectFactory<ArrayList<IInvertedListCursor>> arrayListFactory;
private final ObjectCache<ArrayList<IInvertedListCursor>> arrayListCache;
private ArrayList<IInvertedListCursor>[] partitions;
private int minValidPartitionIndex;
private int maxValidPartitionIndex;
- public InvertedListPartitions(OnDiskInvertedIndex invIndex, ObjectCache<IInvertedListCursor> invListCursorCache,
- ObjectCache<ArrayList<IInvertedListCursor>> arrayListCache) {
- this.invIndex = invIndex;
- this.invListCursorCache = invListCursorCache;
- this.arrayListCache = arrayListCache;
+ public InvertedListPartitions() {
+ this.arrayListFactory = new ArrayListFactory<IInvertedListCursor>();
+ this.arrayListCache = new ObjectCache<ArrayList<IInvertedListCursor>>(arrayListFactory, OBJECT_CACHE_INIT_SIZE,
+ OBJECT_CACHE_EXPAND_SIZE);
}
@SuppressWarnings("unchecked")
@@ -58,17 +55,12 @@
}
Arrays.fill(partitions, null);
}
- invListCursorCache.reset();
arrayListCache.reset();
minValidPartitionIndex = Integer.MAX_VALUE;
maxValidPartitionIndex = Integer.MIN_VALUE;
}
- public void addInvertedListCursor(ITupleReference btreeTuple) {
- IInvertedListCursor listCursor = invListCursorCache.getNext();
- invIndex.resetInvertedListCursor(btreeTuple, listCursor);
- int numTokens = IntegerSerializerDeserializer.getInt(btreeTuple.getFieldData(PARTITIONING_NUM_TOKENS_FIELD),
- btreeTuple.getFieldStart(PARTITIONING_NUM_TOKENS_FIELD));
+ public void addInvertedListCursor(IInvertedListCursor listCursor, int numTokens) {
if (numTokens + 1 >= partitions.length) {
partitions = Arrays.copyOf(partitions, numTokens + PARTITIONS_SLACK_SIZE);
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
index 522500f..81591e6 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2010 by The Regents of the University of California
+ * Copyright 2009-2012 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -23,18 +23,16 @@
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.tuples.ConcatenatingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IObjectFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IPartitionedInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndexSearchCursor;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.PartitionedOnDiskInvertedIndex;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.ObjectCache;
public class PartitionedTOccurrenceSearcher extends AbstractTOccurrenceSearcher {
@@ -42,20 +40,42 @@
protected final ArrayTupleReference lowerBoundTuple = new ArrayTupleReference();
protected final ArrayTupleBuilder upperBoundTupleBuilder = new ArrayTupleBuilder(1);
protected final ArrayTupleReference upperBoundTuple = new ArrayTupleReference();
- protected final ConcatenatingTupleReference partLowSearchKey = new ConcatenatingTupleReference(2);
- protected final ConcatenatingTupleReference partHighSearchKey = new ConcatenatingTupleReference(2);
+ protected final ConcatenatingTupleReference fullLowSearchKey = new ConcatenatingTupleReference(2);
+ protected final ConcatenatingTupleReference fullHighSearchKey = new ConcatenatingTupleReference(2);
- protected final IObjectFactory<ArrayList<IInvertedListCursor>> arrayListFactory;
- protected final ObjectCache<ArrayList<IInvertedListCursor>> arrayListCache;
-
- protected final InvertedListPartitions partitions;
+ protected final InvertedListPartitions partitions = new InvertedListPartitions();
public PartitionedTOccurrenceSearcher(IHyracksCommonContext ctx, IInvertedIndex invIndex) {
super(ctx, invIndex);
- this.arrayListFactory = new ArrayListFactory<IInvertedListCursor>();
- this.arrayListCache = new ObjectCache<ArrayList<IInvertedListCursor>>(arrayListFactory, 10, 10);
- OnDiskInvertedIndex partInvIndex = (OnDiskInvertedIndex) invIndex;
- this.partitions = new InvertedListPartitions(partInvIndex, invListCursorCache, arrayListCache);
+ initHelperTuples();
+ }
+
+ private void initHelperTuples() {
+ try {
+ lowerBoundTupleBuilder.reset();
+ // Write dummy value.
+ lowerBoundTupleBuilder.getDataOutput().writeInt(Integer.MIN_VALUE);
+ lowerBoundTupleBuilder.addFieldEndOffset();
+ lowerBoundTuple.reset(lowerBoundTupleBuilder.getFieldEndOffsets(), lowerBoundTupleBuilder.getByteArray());
+ // Only needed for setting the number of fields in searchKey.
+ searchKey.reset(queryTokenAccessor, 0);
+ fullLowSearchKey.reset();
+ fullLowSearchKey.addTuple(searchKey);
+ fullLowSearchKey.addTuple(lowerBoundTuple);
+
+ upperBoundTupleBuilder.reset();
+ // Write dummy value.
+ upperBoundTupleBuilder.getDataOutput().writeInt(Integer.MAX_VALUE);
+ upperBoundTupleBuilder.addFieldEndOffset();
+ upperBoundTuple.reset(upperBoundTupleBuilder.getFieldEndOffsets(), upperBoundTupleBuilder.getByteArray());
+ // Only needed for setting the number of fields in searchKey.
+ searchKey.reset(queryTokenAccessor, 0);
+ fullHighSearchKey.reset();
+ fullHighSearchKey.addTuple(searchKey);
+ fullHighSearchKey.addTuple(upperBoundTuple);
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
}
public void search(OnDiskInvertedIndexSearchCursor resultCursor, InvertedIndexSearchPredicate searchPred,
@@ -66,48 +86,14 @@
IInvertedIndexSearchModifier searchModifier = searchPred.getSearchModifier();
int numTokensLowerBound = searchModifier.getNumTokensLowerBound(numQueryTokens);
int numTokensUpperBound = searchModifier.getNumTokensUpperBound(numQueryTokens);
- ITupleReference lowSearchKey = null;
- ITupleReference highSearchKey = null;
- try {
- if (numTokensLowerBound >= 0) {
- lowerBoundTupleBuilder.reset();
- lowerBoundTupleBuilder.getDataOutput().writeInt(numTokensLowerBound);
- lowerBoundTupleBuilder.addFieldEndOffset();
- lowerBoundTuple.reset(lowerBoundTupleBuilder.getFieldEndOffsets(),
- lowerBoundTupleBuilder.getByteArray());
- // Only needed for setting the number of fields in searchKey.
- searchKey.reset(queryTokenAccessor, 0);
- partLowSearchKey.reset();
- partLowSearchKey.addTuple(searchKey);
- partLowSearchKey.addTuple(lowerBoundTuple);
- lowSearchKey = partLowSearchKey;
- } else {
- lowSearchKey = searchKey;
- }
- if (numTokensUpperBound >= 0) {
- upperBoundTupleBuilder.reset();
- upperBoundTupleBuilder.getDataOutput().writeInt(numTokensUpperBound);
- upperBoundTupleBuilder.addFieldEndOffset();
- upperBoundTuple.reset(upperBoundTupleBuilder.getFieldEndOffsets(),
- upperBoundTupleBuilder.getByteArray());
- // Only needed for setting the number of fields in searchKey.
- searchKey.reset(queryTokenAccessor, 0);
- partHighSearchKey.reset();
- partHighSearchKey.addTuple(searchKey);
- partHighSearchKey.addTuple(upperBoundTuple);
- highSearchKey = partHighSearchKey;
- } else {
- highSearchKey = searchKey;
- }
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- PartitionedOnDiskInvertedIndex partInvIndex = (PartitionedOnDiskInvertedIndex) invIndex;
+ IPartitionedInvertedIndex partInvIndex = (IPartitionedInvertedIndex) invIndex;
+ invListCursorCache.reset();
partitions.reset(numTokensLowerBound, numTokensUpperBound);
for (int i = 0; i < numQueryTokens; i++) {
searchKey.reset(queryTokenAccessor, i);
- partInvIndex.openInvertedListPartitionCursors(partitions, lowSearchKey, highSearchKey, ictx);
+ partInvIndex.openInvertedListPartitionCursors(this, ictx, numTokensLowerBound, numTokensUpperBound,
+ partitions);
}
occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
@@ -126,6 +112,7 @@
}
// Prune partition because no element in it can satisfy the occurrence threshold.
if (partitionCursors[i].size() < occurrenceThreshold) {
+ partInvIndex.cleanupPartitionState(partitionCursors[i]);
continue;
}
// Merge inverted lists of current partition.
@@ -136,4 +123,27 @@
resultCursor.open(null, searchPred);
}
+
+ public void setNumTokensBoundsInSearchKeys(int numTokensLowerBound, int numTokensUpperBound) {
+ IntegerSerializerDeserializer.putInt(numTokensLowerBound, lowerBoundTuple.getFieldData(0),
+ lowerBoundTuple.getFieldStart(0));
+ IntegerSerializerDeserializer.putInt(numTokensUpperBound, upperBoundTuple.getFieldData(0),
+ upperBoundTuple.getFieldStart(0));
+ }
+
+ public ITupleReference getPrefixSearchKey() {
+ return searchKey;
+ }
+
+ public ITupleReference getFullLowSearchKey() {
+ return fullLowSearchKey;
+ }
+
+ public ITupleReference getFullHighSearchKey() {
+ return fullHighSearchKey;
+ }
+
+ public IInvertedListCursor getCachedInvertedListCursor() {
+ return invListCursorCache.getNext();
+ }
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
index 3ffea85..1a7bffa 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
@@ -44,6 +44,7 @@
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory.InMemoryInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory.PartitionedInMemoryInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeElementInvertedListBuilder;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeElementInvertedListBuilderFactory;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex;
@@ -64,6 +65,15 @@
tokenTypeTraits, tokenCmpFactories, tokenizerFactory);
}
+ public static InMemoryInvertedIndex createPartitionedInMemoryBTreeInvertedindex(IBufferCache memBufferCache,
+ IFreePageManager memFreePageManager, ITypeTraits[] invListTypeTraits,
+ IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
+ IBinaryComparatorFactory[] tokenCmpFactories, IBinaryTokenizerFactory tokenizerFactory)
+ throws BTreeException {
+ return new PartitionedInMemoryInvertedIndex(memBufferCache, memFreePageManager, invListTypeTraits,
+ invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory);
+ }
+
public static OnDiskInvertedIndex createOnDiskInvertedIndex(IBufferCache bufferCache,
IFileMapProvider fileMapProvider, ITypeTraits[] invListTypeTraits,
IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
@@ -73,7 +83,7 @@
return new OnDiskInvertedIndex(bufferCache, fileMapProvider, builder, invListTypeTraits, invListCmpFactories,
tokenTypeTraits, tokenCmpFactories, btreeFile, invListsFile);
}
-
+
public static PartitionedOnDiskInvertedIndex createPartitionedOnDiskInvertedIndex(IBufferCache bufferCache,
IFileMapProvider fileMapProvider, ITypeTraits[] invListTypeTraits,
IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingNumTokensTupleIterator.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/PartitionedInvertedIndexTokenizingTupleIterator.java
similarity index 89%
rename from hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingNumTokensTupleIterator.java
rename to hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/PartitionedInvertedIndexTokenizingTupleIterator.java
index 347ea73..e0477b2 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingNumTokensTupleIterator.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/PartitionedInvertedIndexTokenizingTupleIterator.java
@@ -23,11 +23,11 @@
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IToken;
// TODO: We can possibly avoid copying the data into a new tuple here.
-public class InvertedIndexTokenizingNumTokensTupleIterator extends InvertedIndexTokenizingTupleIterator {
+public class PartitionedInvertedIndexTokenizingTupleIterator extends InvertedIndexTokenizingTupleIterator {
protected int numTokens = 0;
- public InvertedIndexTokenizingNumTokensTupleIterator(int tokensFieldCount, int invListFieldCount,
+ public PartitionedInvertedIndexTokenizingTupleIterator(int tokensFieldCount, int invListFieldCount,
IBinaryTokenizer tokenizer) {
super(tokensFieldCount, invListFieldCount, tokenizer);
}
@@ -65,4 +65,8 @@
// Reset tuple reference for insert operation.
tupleReference.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
}
+
+ public int getNumTokens() {
+ return numTokens;
+ }
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexDeleteTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexDeleteTest.java
new file mode 100644
index 0000000..eac7765
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexDeleteTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexDeleteTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedInMemoryInvertedIndexDeleteTest extends AbstractInvertedIndexDeleteTest {
+
+ public PartitionedInMemoryInvertedIndexDeleteTest() {
+ super(InvertedIndexType.PARTITIONED_INMEMORY, false);
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexInsertTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexInsertTest.java
new file mode 100644
index 0000000..8342efd
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexInsertTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedInMemoryInvertedIndexInsertTest extends AbstractInvertedIndexLoadTest {
+
+ public PartitionedInMemoryInvertedIndexInsertTest() {
+ super(InvertedIndexType.PARTITIONED_INMEMORY, false, 1);
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexSearchTest.java
new file mode 100644
index 0000000..385d65d
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexSearchTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexSearchTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedInMemoryInvertedIndexSearchTest extends AbstractInvertedIndexSearchTest {
+
+ public PartitionedInMemoryInvertedIndexSearchTest() {
+ super(InvertedIndexType.PARTITIONED_INMEMORY, false);
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java
index eb75a8a..4edb1f6 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java
@@ -117,13 +117,18 @@
// Create index and test context.
IInvertedIndex invIndex;
switch (invIndexType) {
- case INMEMORY:
- case PARTITIONED_INMEMORY: {
+ case INMEMORY: {
invIndex = InvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getMemBufferCache(),
harness.getMemFreePageManager(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
tokenCmpFactories, tokenizerFactory);
break;
}
+ case PARTITIONED_INMEMORY: {
+ invIndex = InvertedIndexUtils.createPartitionedInMemoryBTreeInvertedindex(harness.getMemBufferCache(),
+ harness.getMemFreePageManager(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
+ tokenCmpFactories, tokenizerFactory);
+ break;
+ }
case ONDISK: {
invIndex = InvertedIndexUtils.createOnDiskInvertedIndex(harness.getDiskBufferCache(),
harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
@@ -162,7 +167,7 @@
case PARTITIONED_INMEMORY:
case PARTITIONED_ONDISK:
case PARTITIONED_LSM: {
- indexTupleIter = new InvertedIndexTokenizingNumTokensTupleIterator(
+ indexTupleIter = new PartitionedInvertedIndexTokenizingTupleIterator(
invIndex.getTokenTypeTraits().length, invIndex.getInvListTypeTraits().length,
tokenizerFactory.createTokenizer());
break;
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
index 53237d9..38896fb 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
@@ -124,7 +124,7 @@
}
return fieldSerdes;
}
-
+
private static ISerializerDeserializer[] getHashedIndexFieldSerdes(InvertedIndexType invIndexType)
throws IndexException {
ISerializerDeserializer[] fieldSerdes = null;
@@ -150,7 +150,7 @@
}
return fieldSerdes;
}
-
+
public static LSMInvertedIndexTestContext createWordInvIndexTestContext(LSMInvertedIndexTestHarness harness,
InvertedIndexType invIndexType) throws IOException, IndexException {
ISerializerDeserializer[] fieldSerdes = getNonHashedIndexFieldSerdes(invIndexType);
@@ -161,7 +161,7 @@
fieldSerdes.length - 1, tokenizerFactory, invIndexType);
return testCtx;
}
-
+
public static LSMInvertedIndexTestContext createHashedWordInvIndexTestContext(LSMInvertedIndexTestHarness harness,
InvertedIndexType invIndexType) throws IOException, IndexException {
ISerializerDeserializer[] fieldSerdes = getHashedIndexFieldSerdes(invIndexType);
@@ -302,8 +302,8 @@
* Compares actual and expected indexes by comparing their inverted-lists one by one. Exercises the openInvertedListCursor() method of the inverted-index accessor.
*/
@SuppressWarnings("unchecked")
- public static void compareActualAndExpectedIndexes(LSMInvertedIndexTestContext testCtx) throws HyracksDataException,
- IndexException {
+ public static void compareActualAndExpectedIndexes(LSMInvertedIndexTestContext testCtx)
+ throws HyracksDataException, IndexException {
IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
MultiComparator invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories());
@@ -397,14 +397,15 @@
break;
}
}
- getExpectedResults(scanCountArray, checkTuples, searchDocument, tokenizer, tokenSerde,
- searchModifier, expectedResults, isPartitioned);
+ getExpectedResults(scanCountArray, checkTuples, searchDocument, tokenizer, tokenSerde, searchModifier,
+ expectedResults, isPartitioned);
}
-
+
@SuppressWarnings("unchecked")
public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples,
ITupleReference searchDocument, IBinaryTokenizer tokenizer, ISerializerDeserializer tokenSerde,
- IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults, boolean isPartitioned) throws IOException {
+ IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults, boolean isPartitioned)
+ throws IOException {
// Reset scan count array.
Arrays.fill(scanCountArray, 0);
expectedResults.clear();
@@ -459,7 +460,7 @@
highKey = new CheckTuple(2, 2);
highKey.appendField(tokenObj);
highKey.appendField(Integer.valueOf(numTokensUpperBound));
- }
+ }
// Get view over check tuples containing inverted-list corresponding to token.
SortedSet<CheckTuple> invList = OrderedIndexTestUtils.getPrefixExpectedSubset(checkTuples, lowKey, highKey);
@@ -471,7 +472,7 @@
scanCountArray[element]++;
}
}
-
+
// Run through scan count array, and see whether elements satisfy the given occurrence threshold.
expectedResults.clear();
for (int i = 0; i < scanCountArray.length; i++) {
@@ -507,7 +508,7 @@
int queryIndex = Math.abs(rnd.nextInt() % documentCorpus.size());
searchDocument.reset(documentCorpus.get(queryIndex));
}
-
+
// Set query tuple in search predicate.
searchPred.setQueryTuple(searchDocument);
searchPred.setQueryFieldIndex(0);