Added validation of on-disk inverted index. Added bulk-load test for on-disk inverted index, and insert test for in-memory inverted index based on a new testing framework.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1821 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
index 78c9eae..6a5c762 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
@@ -16,15 +16,16 @@
package edu.uci.ics.hyracks.storage.am.common.datagen;
import java.io.BufferedReader;
-import java.io.FileReader;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class DocumentStringFieldValueGenerator implements IFieldValueGenerator<String> {
- private final String FIRST_NAMES_FILE = "data/dist.all.first.cleaned";
- private final String LAST_NAMES_FILE = "data/dist.all.last.cleaned";
+ private final String FIRST_NAMES_FILE = "dist.all.first.cleaned";
+ private final String LAST_NAMES_FILE = "dist.all.last.cleaned";
private final int docMinWords;
private final int docMaxWords;
@@ -50,7 +51,8 @@
int count = 0;
// Read first names from data file.
- BufferedReader firstNamesReader = new BufferedReader(new FileReader(FIRST_NAMES_FILE));
+ InputStream firstNamesIn = this.getClass().getClassLoader().getResourceAsStream(FIRST_NAMES_FILE);
+ BufferedReader firstNamesReader = new BufferedReader(new InputStreamReader(firstNamesIn));
try {
while (count < maxDictionarySize && (line = firstNamesReader.readLine()) != null) {
tokenDict.add(line.trim());
@@ -61,7 +63,8 @@
}
// Read last names from data file.
- BufferedReader lastNamesReader = new BufferedReader(new FileReader(LAST_NAMES_FILE));
+ InputStream lastNamesIn = this.getClass().getClassLoader().getResourceAsStream(LAST_NAMES_FILE);
+ BufferedReader lastNamesReader = new BufferedReader(new InputStreamReader(lastNamesIn));
try {
while (count < maxDictionarySize && (line = lastNamesReader.readLine()) != null) {
tokenDict.add(line.trim());
diff --git a/hyracks-storage-am-common/data/dist.all.first.cleaned b/hyracks-storage-am-common/src/main/resources/dist.all.first.cleaned
similarity index 100%
rename from hyracks-storage-am-common/data/dist.all.first.cleaned
rename to hyracks-storage-am-common/src/main/resources/dist.all.first.cleaned
diff --git a/hyracks-storage-am-common/data/dist.all.last.cleaned b/hyracks-storage-am-common/src/main/resources/dist.all.last.cleaned
similarity index 100%
rename from hyracks-storage-am-common/data/dist.all.last.cleaned
rename to hyracks-storage-am-common/src/main/resources/dist.all.last.cleaned
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndex.java
index de54870..13a4a1a 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndex.java
@@ -25,15 +25,15 @@
public interface IInvertedIndex extends IIndex {
public IInvertedListCursor createInvertedListCursor();
-
+
public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey, IIndexOpContext ictx)
throws HyracksDataException, IndexException;
-
+
public ITypeTraits[] getInvListTypeTraits();
public IBinaryComparatorFactory[] getInvListCmpFactories();
public ITypeTraits[] getTokenTypeTraits();
- public IBinaryComparatorFactory[] getTokenCmpFactories();
+ public IBinaryComparatorFactory[] getTokenCmpFactories();
}
\ No newline at end of file
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java
new file mode 100644
index 0000000..d5266ff
--- /dev/null
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+
+public interface IInvertedIndexAccessor extends IIndexAccessor {
+ public IInvertedListCursor createInvertedListCursor();
+
+ public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey)
+ throws HyracksDataException, IndexException;
+}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java
index 0e3399d..321eab0 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java
@@ -34,7 +34,7 @@
public ITupleReference getTuple();
// getters
- public int getNumElements();
+ public int size();
public int getStartPageId();
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
index 4bce429..4d28758 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
@@ -72,7 +72,7 @@
//create all cursors
IIndexCursor cursor;
for (IIndexAccessor accessor : indexAccessors) {
- InvertedIndexAccessor invIndexAccessor = (InvertedIndexAccessor) accessor;
+ OnDiskInvertedIndexAccessor invIndexAccessor = (OnDiskInvertedIndexAccessor) accessor;
cursor = invIndexAccessor.createRangeSearchCursor();
try {
invIndexAccessor.rangeSearch(cursor, searchPred);
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
index 4699a1a..1527a40 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
@@ -35,6 +35,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.api.IndexType;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOp;
import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryBufferCache;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
@@ -143,6 +144,7 @@
public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference tupleReference,
IIndexOpContext ictx) throws HyracksDataException, IndexException {
InMemoryInvertedIndexOpContext ctx = (InMemoryInvertedIndexOpContext) ictx;
+ ctx.reset(IndexOp.SEARCH);
InMemoryInvertedListCursor inMemListCursor = (InMemoryInvertedListCursor) listCursor;
inMemListCursor.prepare(ctx.btreeAccessor, ctx.btreePred, ctx.tokenFieldsCmp, ctx.btreeCmp);
inMemListCursor.reset(tupleReference);
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
index 628c726..1278283 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
@@ -19,39 +19,40 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
-import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexOpContext;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOp;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearcher;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex.DefaultHyracksCommonContext;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndexSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.TOccurrenceSearcher;
-public class InMemoryInvertedIndexAccessor implements IIndexAccessor {
+public class InMemoryInvertedIndexAccessor implements IInvertedIndexAccessor {
// TODO: This ctx needs to go away.
protected final IHyracksCommonContext hyracksCtx = new DefaultHyracksCommonContext();
protected final IInvertedIndexSearcher searcher;
protected IIndexOpContext opCtx;
- protected InMemoryInvertedIndex inMemInvIx;
+ protected InMemoryInvertedIndex index;
protected BTreeAccessor btreeAccessor;
- public InMemoryInvertedIndexAccessor(InMemoryInvertedIndex inMemInvIx, IIndexOpContext opCtx) {
+ public InMemoryInvertedIndexAccessor(InMemoryInvertedIndex index, IIndexOpContext opCtx) {
this.opCtx = opCtx;
- this.inMemInvIx = inMemInvIx;
- this.searcher = new TOccurrenceSearcher(hyracksCtx, inMemInvIx);
+ this.index = index;
+ this.searcher = new TOccurrenceSearcher(hyracksCtx, index);
// TODO: Ignore opcallbacks for now.
- this.btreeAccessor = (BTreeAccessor) inMemInvIx.getBTree().createAccessor(NoOpOperationCallback.INSTANCE,
+ this.btreeAccessor = (BTreeAccessor) index.getBTree().createAccessor(NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
}
public void insert(ITupleReference tuple) throws HyracksDataException, IndexException {
opCtx.reset(IndexOp.INSERT);
- inMemInvIx.insert(tuple, btreeAccessor, opCtx);
+ index.insert(tuple, btreeAccessor, opCtx);
}
@Override
@@ -65,6 +66,17 @@
}
@Override
+ public IInvertedListCursor createInvertedListCursor() {
+ return index.createInvertedListCursor();
+ }
+
+ @Override
+ public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey)
+ throws HyracksDataException, IndexException {
+ index.openInvertedListCursor(listCursor, searchKey, opCtx);
+ }
+
+ @Override
public void delete(ITupleReference tuple) throws HyracksDataException, IndexException {
throw new UnsupportedOperationException("Delete not supported by in-memory inverted index.");
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
index 97fcc4e..5b003ee 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
@@ -71,7 +71,7 @@
@Override
public int compareTo(IInvertedListCursor cursor) {
- return getNumElements() - cursor.getNumElements();
+ return size() - cursor.size();
}
public void reset(ITupleReference tuple) throws HyracksDataException, TreeIndexException {
@@ -79,6 +79,7 @@
// Copy the tokens tuple for later use in btree probes.
TupleUtils.copyTuple(tokenTupleBuilder, tuple, tuple.getFieldCount());
tokenTuple.reset(tokenTupleBuilder.getFieldEndOffsets(), tokenTupleBuilder.getByteArray());
+ btreeSearchTuple.reset();
btreeSearchTuple.addTuple(tokenTuple);
btreePred.setLowKey(tuple, true);
btreePred.setHighKey(tuple, true);
@@ -117,7 +118,7 @@
}
@Override
- public int getNumElements() {
+ public int size() {
if (numElements < 0) {
btreePred.setLowKeyComparator(tokenFieldsCmp);
btreePred.setHighKeyComparator(tokenFieldsCmp);
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListCursor.java
index b221546..ed3838a 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListCursor.java
@@ -251,7 +251,7 @@
@Override
public int compareTo(IInvertedListCursor invListCursor) {
- return numElements - invListCursor.getNumElements();
+ return numElements - invListCursor.size();
}
@Override
@@ -260,7 +260,7 @@
}
@Override
- public int getNumElements() {
+ public int size() {
return numElements;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
index 8509d6b..2e72894 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
@@ -28,8 +28,10 @@
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.btree.util.BTreeUtils;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
@@ -41,8 +43,11 @@
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.api.IndexType;
import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
+import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearcher;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
@@ -235,6 +240,11 @@
}
@Override
+ public IInvertedListCursor createInvertedListCursor() {
+ return new FixedSizeElementInvertedListCursor(bufferCache, fileId, invListTypeTraits);
+ }
+
+ @Override
public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey, IIndexOpContext ictx)
throws HyracksDataException, IndexException {
OnDiskInvertedIndexOpContext ctx = (OnDiskInvertedIndexOpContext) ictx;
@@ -410,11 +420,13 @@
return btree;
}
- public class InvertedIndexAccessor implements IIndexAccessor {
+ public class OnDiskInvertedIndexAccessor implements IInvertedIndexAccessor {
+ private final OnDiskInvertedIndex index;
private final IInvertedIndexSearcher searcher;
private final IIndexOpContext opCtx = new OnDiskInvertedIndexOpContext(btree);
- public InvertedIndexAccessor(OnDiskInvertedIndex index) {
+ public OnDiskInvertedIndexAccessor(OnDiskInvertedIndex index) {
+ this.index = index;
this.searcher = new TOccurrenceSearcher(ctx, index);
}
@@ -434,6 +446,17 @@
}
@Override
+ public IInvertedListCursor createInvertedListCursor() {
+ return index.createInvertedListCursor();
+ }
+
+ @Override
+ public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey)
+ throws HyracksDataException, IndexException {
+ index.openInvertedListCursor(listCursor, searchKey, opCtx);
+ }
+
+ @Override
public void insert(ITupleReference tuple) throws HyracksDataException, IndexException {
throw new UnsupportedOperationException("Insert not supported by inverted index.");
}
@@ -457,7 +480,7 @@
@Override
public IIndexAccessor createAccessor(IModificationOperationCallback modificationCallback,
ISearchOperationCallback searchCallback) {
- return new InvertedIndexAccessor(this);
+ return new OnDiskInvertedIndexAccessor(this);
}
@Override
@@ -499,7 +522,62 @@
@Override
public void validate() throws HyracksDataException {
- throw new UnsupportedOperationException("Validation not implemented for Inverted Indexes.");
+ btree.validate();
+ // Scan the btree and validate the order of elements in each inverted-list.
+ IIndexAccessor btreeAccessor = btree.createAccessor(NoOpOperationCallback.INSTANCE,
+ NoOpOperationCallback.INSTANCE);
+ IIndexCursor btreeCursor = btreeAccessor.createSearchCursor();
+ MultiComparator btreeCmp = MultiComparator.create(btree.getComparatorFactories());
+ RangePredicate rangePred = new RangePredicate(null, null, true, true, btreeCmp, btreeCmp);
+ int[] fieldPermutation = new int[tokenTypeTraits.length];
+ for (int i = 0; i < tokenTypeTraits.length; i++) {
+ fieldPermutation[i] = i;
+ }
+ PermutingTupleReference tokenTuple = new PermutingTupleReference(fieldPermutation);
+
+ IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ IInvertedListCursor invListCursor = invIndexAccessor.createInvertedListCursor();
+ MultiComparator invListCmp = MultiComparator.create(invListCmpFactories);
+ try {
+ // Search key for finding an inverted-list in the actual index.
+ ArrayTupleBuilder prevBuilder = new ArrayTupleBuilder(invListTypeTraits.length);
+ ArrayTupleReference prevTuple = new ArrayTupleReference();
+ btreeAccessor.search(btreeCursor, rangePred);
+ while (btreeCursor.hasNext()) {
+ btreeCursor.next();
+ tokenTuple.reset(btreeCursor.getTuple());
+ // Validate inverted list by checking that the elements are totally ordered.
+ invIndexAccessor.openInvertedListCursor(invListCursor, tokenTuple);
+ invListCursor.pinPages();
+ try {
+ if (invListCursor.hasNext()) {
+ invListCursor.next();
+ ITupleReference invListElement = invListCursor.getTuple();
+ // Initialize prev tuple.
+ TupleUtils.copyTuple(prevBuilder, invListElement, invListElement.getFieldCount());
+ prevTuple.reset(prevBuilder.getFieldEndOffsets(), prevBuilder.getByteArray());
+ }
+ while (invListCursor.hasNext()) {
+ invListCursor.next();
+ ITupleReference invListElement = invListCursor.getTuple();
+ // Compare with previous element.
+ if (invListCmp.compare(invListElement, prevTuple) <= 0) {
+ throw new HyracksDataException("Index validation failed.");
+ }
+ // Set new prevTuple.
+ TupleUtils.copyTuple(prevBuilder, invListElement, invListElement.getFieldCount());
+ prevTuple.reset(prevBuilder.getFieldEndOffsets(), prevBuilder.getByteArray());
+ }
+ } finally {
+ invListCursor.unpinPages();
+ }
+ }
+ } catch (IndexException e) {
+ throw new HyracksDataException(e);
+ } finally {
+ btreeCursor.close();
+ }
}
@Override
@@ -507,11 +585,6 @@
return 0;
}
- @Override
- public IInvertedListCursor createInvertedListCursor() {
- return new FixedSizeElementInvertedListCursor(bufferCache, fileId, invListTypeTraits);
- }
-
private static ITypeTraits[] getBTreeTypeTraits(ITypeTraits[] tokenTypeTraits) {
ITypeTraits[] btreeTypeTraits = new ITypeTraits[tokenTypeTraits.length + btreeValueTypeTraits.length];
// Set key type traits.
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java
index 2786c91..a78287e 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java
@@ -208,7 +208,7 @@
newResultBuffers = swap;
invListCursors.get(i).pinPages();
- int numInvListElements = invListCursors.get(i).getNumElements();
+ int numInvListElements = invListCursors.get(i).size();
// should we binary search the next list or should we sort-merge it?
if (currentNumResults * Math.log(numInvListElements) < currentNumResults + numInvListElements) {
maxPrevBufIdx = mergeSuffixListProbe(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx,
@@ -284,7 +284,7 @@
resultFrameTupleApp.reset(newCurrentBuffer, true);
int invListTidx = 0;
- int invListNumTuples = invListCursor.getNumElements();
+ int invListNumTuples = invListCursor.size();
if (invListCursor.hasNext())
invListCursor.next();
@@ -378,7 +378,7 @@
resultFrameTupleApp.reset(newCurrentBuffer, true);
int invListTidx = 0;
- int invListNumTuples = invListCursor.getNumElements();
+ int invListNumTuples = invListCursor.size();
if (invListCursor.hasNext())
invListCursor.next();
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexTestUtils.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexTestUtils.java
index 5aaa7d9..04c64fe 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexTestUtils.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexTestUtils.java
@@ -60,13 +60,10 @@
highKey.setIsHighKey(true);
CheckTuple low = checkTuples.ceiling(lowKey);
CheckTuple high = checkTuples.floor(highKey);
- if (low == null) {
+ if (low == null || high == null) {
// Must be empty.
return new TreeSet<CheckTuple>();
}
- if (high == null) {
- throw new IllegalStateException("Upper bound is null.");
- }
if (high.compareTo(low) < 0) {
// Must be empty.
return new TreeSet<CheckTuple>();
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/AbstractInvertedIndexTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/AbstractInvertedIndexTest.java
index cba8959..8ddd49d 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/AbstractInvertedIndexTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/AbstractInvertedIndexTest.java
@@ -101,7 +101,7 @@
generateData();
invertedIndexAccessor = invertedIndex.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- IBinaryComparatorFactory[] tokenCmpFactories = harness.getTokenBinaryComparatorFactories();
+ IBinaryComparatorFactory[] tokenCmpFactories = harness.getTokenCmpFactories();
tokenComparators = new IBinaryComparator[tokenCmpFactories.length];
for (int i = 0; i < tokenCmpFactories.length; i++) {
tokenComparators[i] = tokenCmpFactories[i].createBinaryComparator();
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexTestHarness.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexTestHarness.java
index e39f79b..fc24396 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexTestHarness.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexTestHarness.java
@@ -22,17 +22,16 @@
import java.util.Random;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.io.IODeviceHandle;
import edu.uci.ics.hyracks.control.nc.io.IOManager;
-import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
-import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
-import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManager;
+import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.DualIndexInMemoryBufferCache;
+import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.DualIndexInMemoryFreePageManager;
import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryBufferCache;
import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryFreePageManager;
import edu.uci.ics.hyracks.storage.common.buffercache.HeapBufferAllocator;
@@ -62,6 +61,7 @@
protected IOManager ioManager;
protected IBufferCache diskBufferCache;
protected IFileMapProvider diskFileMapProvider;
+ protected IFreePageManager diskFreePageManager;
protected InMemoryBufferCache memBufferCache;
protected InMemoryFreePageManager memFreePageManager;
protected IHyracksTaskContext ctx;
@@ -72,19 +72,8 @@
protected String onDiskDir;
protected String btreeFileName = "btree_vocab";
protected String invIndexFileName = "inv_index";
- protected FileReference btreeFileRef;
protected FileReference invIndexFileRef;
- // Token information
- protected ITypeTraits[] tokenTypeTraits = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS };
- protected IBinaryComparatorFactory[] tokenCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) };
-
- // Inverted list information
- protected ITypeTraits[] invListTypeTraits = new ITypeTraits[] { IntegerPointable.TYPE_TRAITS };
- protected IBinaryComparatorFactory[] invListCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(IntegerPointable.FACTORY) };
-
public LSMInvertedIndexTestHarness() {
this.diskPageSize = DEFAULT_DISK_PAGE_SIZE;
this.diskNumPages = DEFAULT_DISK_NUM_PAGES;
@@ -110,8 +99,9 @@
TestStorageManagerComponentHolder.init(diskPageSize, diskNumPages, diskMaxOpenFiles);
diskBufferCache = TestStorageManagerComponentHolder.getBufferCache(ctx);
diskFileMapProvider = TestStorageManagerComponentHolder.getFileMapProvider(ctx);
- memBufferCache = new InMemoryBufferCache(new HeapBufferAllocator(), memPageSize, memNumPages);
- memFreePageManager = new InMemoryFreePageManager(memNumPages, new LIFOMetaDataFrameFactory());
+ memBufferCache = new DualIndexInMemoryBufferCache(new HeapBufferAllocator(), memPageSize, memNumPages);
+ memBufferCache.open();
+ memFreePageManager = new DualIndexInMemoryFreePageManager(memNumPages, new LIFOMetaDataFrameFactory());
ioManager = TestStorageManagerComponentHolder.getIOManager();
rnd.setSeed(RANDOM_SEED);
@@ -119,20 +109,10 @@
btreeFile.deleteOnExit();
File invIndexFile = new File(onDiskDir + invIndexFileName);
invIndexFile.deleteOnExit();
- btreeFileRef = new FileReference(btreeFile);
invIndexFileRef = new FileReference(invIndexFile);
- diskBufferCache.createFile(btreeFileRef);
- diskBufferCache.openFile(diskFileMapProvider.lookupFileId(btreeFileRef));
- diskBufferCache.createFile(invIndexFileRef);
- diskBufferCache.openFile(diskFileMapProvider.lookupFileId(invIndexFileRef));
}
public void tearDown() throws HyracksDataException {
- diskBufferCache.closeFile(diskFileMapProvider.lookupFileId(btreeFileRef));
- diskBufferCache.deleteFile(diskFileMapProvider.lookupFileId(btreeFileRef), false);
- diskBufferCache.closeFile(diskFileMapProvider.lookupFileId(invIndexFileRef));
- diskBufferCache.deleteFile(diskFileMapProvider.lookupFileId(invIndexFileRef), false);
- diskBufferCache.close();
for (IODeviceHandle dev : ioManager.getIODevices()) {
File dir = new File(dev.getPath(), onDiskDir);
FilenameFilter filter = new FilenameFilter() {
@@ -149,16 +129,13 @@
}
dir.delete();
}
+ memBufferCache.close();
}
- public int getDiskInvertedIndexFileId() throws HyracksDataException {
- return diskFileMapProvider.lookupFileId(invIndexFileRef);
+ public FileReference getInvListsFileRef() {
+ return invIndexFileRef;
}
- public int getDiskBtreeFileId() throws HyracksDataException {
- return diskFileMapProvider.lookupFileId(btreeFileRef);
- }
-
public int getDiskPageSize() {
return diskPageSize;
}
@@ -218,20 +195,4 @@
public Random getRandom() {
return rnd;
}
-
- public ITypeTraits[] getTokenTypeTraits() {
- return tokenTypeTraits;
- }
-
- public ITypeTraits[] getInvertedListTypeTraits() {
- return invListTypeTraits;
- }
-
- public IBinaryComparatorFactory[] getTokenBinaryComparatorFactories() {
- return tokenCmpFactories;
- }
-
- public IBinaryComparatorFactory[] getInvertedListBinaryComparatorFactories() {
- return invListCmpFactories;
- }
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/AbstractSingleComponentInvertedIndexTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/AbstractSingleComponentInvertedIndexTest.java
new file mode 100644
index 0000000..a035276
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/AbstractSingleComponentInvertedIndexTest.java
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.SortedSet;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.btree.OrderedIndexTestUtils;
+import edu.uci.ics.hyracks.storage.am.common.CheckTuple;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.datagen.DocumentStringFieldValueGenerator;
+import edu.uci.ics.hyracks.storage.am.common.datagen.IFieldValueGenerator;
+import edu.uci.ics.hyracks.storage.am.common.datagen.SortedIntegerFieldValueGenerator;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.LSMInvertedIndexTestHarness;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizerFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.UTF8WordTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+
+@SuppressWarnings("rawtypes")
+public abstract class AbstractSingleComponentInvertedIndexTest {
+ protected final LSMInvertedIndexTestHarness harness = new LSMInvertedIndexTestHarness();
+
+ protected int NUM_DOCS_TO_INSERT = 10000;
+
+ protected final InvertedIndexType invIndexType;
+
+ public AbstractSingleComponentInvertedIndexTest(InvertedIndexType invIndexType) {
+ this.invIndexType = invIndexType;
+ }
+
+ @Before
+ public void setUp() throws HyracksException {
+ harness.setUp();
+ }
+
+ public abstract void loadIndex(TupleGenerator tupleGen, InvertedIndexTestContext testCtx) throws IOException,
+ IndexException;
+
+ public abstract IBufferCache getBufferCache();
+
+ @Test
+ public void stringDocumentTest() throws IOException, IndexException {
+ ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+ IFieldValueGenerator[] fieldGens = new IFieldValueGenerator[2];
+ fieldGens[0] = new DocumentStringFieldValueGenerator(2, 10, 10000, harness.getRandom());
+ fieldGens[1] = new SortedIntegerFieldValueGenerator(0);
+ TupleGenerator tupleGen = new TupleGenerator(fieldGens, fieldSerdes, 0);
+
+ ITokenFactory tokenFactory = new UTF8WordTokenFactory();
+ IBinaryTokenizerFactory tokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, false,
+ tokenFactory);
+
+ InvertedIndexTestContext testCtx = InvertedIndexTestContext.create(getBufferCache(),
+ harness.getMemFreePageManager(), harness.getDiskFileMapProvider(), harness.getInvListsFileRef(),
+ fieldSerdes, 1, tokenizerFactory, invIndexType);
+
+ IIndex invIndex = testCtx.getIndex();
+ invIndex.create();
+ invIndex.activate();
+
+ loadIndex(tupleGen, testCtx);
+
+ // Validate index and compare against expected index.
+ invIndex.validate();
+ compareActualAndExpectedIndexes(testCtx);
+
+ invIndex.deactivate();
+ invIndex.destroy();
+ }
+
+ @SuppressWarnings("unchecked")
+ protected void compareActualAndExpectedIndexes(InvertedIndexTestContext testCtx) throws HyracksDataException,
+ IndexException {
+ IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
+ ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
+ MultiComparator invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories());
+ IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) testCtx.getIndexAccessor();
+ int tokenFieldCount = invIndex.getTokenTypeTraits().length;
+ int invListFieldCount = invIndex.getInvListTypeTraits().length;
+ // All tokens that were inserted into the indexes.
+ Iterator<Comparable> tokensIter = testCtx.getAllTokens().iterator();
+
+ // Search key for finding an inverted-list in the actual index.
+ ArrayTupleBuilder searchKeyBuilder = new ArrayTupleBuilder(tokenFieldCount);
+ ArrayTupleReference searchKey = new ArrayTupleReference();
+ // Cursor over inverted list from actual index.
+ IInvertedListCursor actualInvListCursor = invIndexAccessor.createInvertedListCursor();
+
+ // Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
+ ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
+ // Includes the token fields.
+ ArrayTupleReference completeExpectedTuple = new ArrayTupleReference();
+ // Field permutation and permuting tuple reference to strip away token fields from completeExpectedTuple.
+ int[] fieldPermutation = new int[invListFieldCount];
+ for (int i = 0; i < fieldPermutation.length; i++) {
+ fieldPermutation[i] = tokenFieldCount + i;
+ }
+ PermutingTupleReference expectedTuple = new PermutingTupleReference(fieldPermutation);
+
+ // Iterate over all tokens. Find the inverted-lists in actual and expected indexes. Compare the inverted lists,
+ while (tokensIter.hasNext()) {
+ Comparable token = tokensIter.next();
+
+ // Position inverted-list iterator on expected index.
+ CheckTuple checkLowKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
+ checkLowKey.appendField(token);
+ CheckTuple checkHighKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
+ checkHighKey.appendField(token);
+ SortedSet<CheckTuple> expectedInvList = OrderedIndexTestUtils.getPrefixExpectedSubset(
+ testCtx.getCheckTuples(), checkLowKey, checkHighKey);
+ Iterator<CheckTuple> expectedInvListIter = expectedInvList.iterator();
+
+ // Position inverted-list cursor in actual index.
+ OrderedIndexTestUtils.createTupleFromCheckTuple(checkLowKey, searchKeyBuilder, searchKey, fieldSerdes);
+ invIndexAccessor.openInvertedListCursor(actualInvListCursor, searchKey);
+
+ if (actualInvListCursor.size() != expectedInvList.size()) {
+ fail("Actual and expected inverted lists for token '" + token.toString()
+ + "' have different sizes. Actual size: " + actualInvListCursor.size() + ". Expected size: "
+ + expectedInvList.size() + ".");
+ }
+ // Compare inverted-list elements.
+ int count = 0;
+ actualInvListCursor.pinPages();
+ try {
+ while (actualInvListCursor.hasNext() && expectedInvListIter.hasNext()) {
+ actualInvListCursor.next();
+ ITupleReference actual = actualInvListCursor.getTuple();
+ CheckTuple expected = expectedInvListIter.next();
+ OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, completeExpectedTuple,
+ fieldSerdes);
+ expectedTuple.reset(completeExpectedTuple);
+ if (invListCmp.compare(actual, expectedTuple) != 0) {
+ fail("Inverted lists of token '" + token + "' differ at position " + count + ".");
+ }
+ count++;
+ }
+ } finally {
+ actualInvListCursor.unpinPages();
+ }
+ }
+ }
+
+ @After
+ public void tearDown() throws HyracksDataException {
+ harness.tearDown();
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexInsertTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexInsertTest.java
index d9e648c..dfc4881 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexInsertTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexInsertTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2010 by The Regents of the University of California
+ * Copyright 2009-2012 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -15,42 +15,33 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
-import java.util.logging.Logger;
+import java.io.IOException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.AbstractInvertedIndexInsertTest;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.UTF8NGramTokenFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
-public class InMemoryInvertedIndexInsertTest extends AbstractInvertedIndexInsertTest {
-
- @Override
- protected void setTokenizer() {
- ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
- tokenizer = new NGramUTF8StringBinaryTokenizer(3, false, true, false, tokenFactory);
-// ITokenFactory tokenFactory = new UTF8WordTokenFactory();
-// tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+public class InMemoryInvertedIndexInsertTest extends AbstractSingleComponentInvertedIndexTest {
+
+ public InMemoryInvertedIndexInsertTest() {
+ super(InvertedIndexType.INMEMORY);
}
@Override
- protected void setInvertedIndex() throws HyracksDataException {
- invertedIndex = InvertedIndexTestUtils.createInMemoryInvertedIndex(harness, tokenizer);
- invertedIndex.create();
- invertedIndex.activate();
- invertedIndex.create(harness.getFileId());
- invertedIndex.open(harness.getFileId());
+ public void loadIndex(TupleGenerator tupleGen, InvertedIndexTestContext testCtx) throws IOException, IndexException {
+ // InMemoryInvertedIndex only supports insert.
+ for (int i = 0; i < NUM_DOCS_TO_INSERT; i++) {
+ ITupleReference tuple = tupleGen.next();
+ testCtx.getIndexAccessor().insert(tuple);
+ testCtx.insertCheckTuples(tuple);
+ }
}
@Override
- protected void setLogger() {
- LOGGER = Logger.getLogger(InMemoryInvertedIndexInsertTest.class.getName());
+ public IBufferCache getBufferCache() {
+ return harness.getMemBufferCache();
}
-
- @Override
- protected void setRandom() {
- random = true;
- }
-
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexBulkLoadTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexBulkLoadTest.java
index 97e6dc4..1c14df0 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexBulkLoadTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexBulkLoadTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2010 by The Regents of the University of California
+ * Copyright 2009-2012 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -15,231 +15,54 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
-import java.io.DataOutput;
-import java.io.File;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
+import java.io.IOException;
+import java.util.Iterator;
-import junit.framework.Assert;
-
-import org.junit.AfterClass;
-import org.junit.Test;
-
-import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.io.FileReference;
-import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
-import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
-import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
-import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
-import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.btree.OrderedIndexTestUtils;
+import edu.uci.ics.hyracks.storage.am.common.CheckTuple;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
-import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOp;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory.AbstractSingleComponentInvertedIndexTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
-import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
-import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
-import edu.uci.ics.hyracks.test.support.TestUtils;
-public class OnDiskInvertedIndexBulkLoadTest extends AbstractInvIndexTest {
+@SuppressWarnings("rawtypes")
+public class OnDiskInvertedIndexBulkLoadTest extends AbstractSingleComponentInvertedIndexTest {
- private static final int PAGE_SIZE = 32768;
- private static final int NUM_PAGES = 100;
- private static final int MAX_OPEN_FILES = 10;
- private static final int HYRACKS_FRAME_SIZE = 32768;
- private IHyracksTaskContext stageletCtx = TestUtils.create(HYRACKS_FRAME_SIZE);
-
- /**
- * This test generates a list of <word-token, id> pairs which are pre-sorted
- * on the token. Those pairs for the input to an inverted-index bulk load.
- * The contents of the inverted lists are verified against the generated
- * data.
- */
- @Test
- public void singleFieldPayloadTest() throws Exception {
- TestStorageManagerComponentHolder.init(PAGE_SIZE, NUM_PAGES, MAX_OPEN_FILES);
- IBufferCache bufferCache = TestStorageManagerComponentHolder.getBufferCache(stageletCtx);
- IFileMapProvider fmp = TestStorageManagerComponentHolder.getFileMapProvider(stageletCtx);
-
- FileReference invListsFile = new FileReference(new File(invListsFileName));
- FileReference btreeFile = new FileReference(new File(invListsFileName + "_btree"));
-
- ITypeTraits[] tokenTypeTraits = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS };
-
- IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) };
-
- int invListFields = 1;
- ITypeTraits[] invListTypeTraits = new ITypeTraits[invListFields];
- invListTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
-
- int invListKeys = 1;
- IBinaryComparatorFactory[] invListCmpFactories = new IBinaryComparatorFactory[invListKeys];
- invListCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
-
- IInvertedListBuilder invListBuilder = new FixedSizeElementInvertedListBuilder(invListTypeTraits);
- OnDiskInvertedIndex invIndex = new OnDiskInvertedIndex(bufferCache, fmp, invListBuilder, invListTypeTraits,
- invListCmpFactories, tokenTypeTraits, cmpFactories, invListsFile, btreeFile);
- invIndex.create();
- invIndex.activate();
-
- Random rnd = new Random();
- rnd.setSeed(50);
-
- ByteBuffer frame = stageletCtx.allocateFrame();
- FrameTupleAppender appender = new FrameTupleAppender(stageletCtx.getFrameSize());
- ArrayTupleBuilder tb = new ArrayTupleBuilder(2);
- DataOutput dos = tb.getDataOutput();
-
- ISerializerDeserializer[] insertSerde = { UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE };
- RecordDescriptor insertRecDesc = new RecordDescriptor(insertSerde);
- IFrameTupleAccessor accessor = new FrameTupleAccessor(stageletCtx.getFrameSize(), insertRecDesc);
- accessor.reset(frame);
- FrameTupleReference tuple = new FrameTupleReference();
-
- List<String> tokens = new ArrayList<String>();
- tokens.add("compilers");
- tokens.add("computer");
- tokens.add("databases");
- tokens.add("fast");
- tokens.add("hyracks");
- tokens.add("major");
- tokens.add("science");
- tokens.add("systems");
- tokens.add("university");
-
- ArrayList<ArrayList<Integer>> checkListElements = new ArrayList<ArrayList<Integer>>();
- for (int i = 0; i < tokens.size(); i++) {
- checkListElements.add(new ArrayList<Integer>());
- }
-
- int maxId = 1000000;
- int addProb = 0;
- int addProbStep = 10;
-
- IIndexBulkLoader bulkLoader = invIndex.createBulkLoader(BTree.DEFAULT_FILL_FACTOR, false);
-
- for (int i = 0; i < tokens.size(); i++) {
-
- addProb += addProbStep * (i + 1);
- for (int j = 0; j < maxId; j++) {
- if ((Math.abs(rnd.nextInt()) % addProb) == 0) {
-
- tb.reset();
- UTF8StringSerializerDeserializer.INSTANCE.serialize(tokens.get(i), dos);
- tb.addFieldEndOffset();
- IntegerSerializerDeserializer.INSTANCE.serialize(j, dos);
- tb.addFieldEndOffset();
-
- checkListElements.get(i).add(j);
-
- appender.reset(frame, true);
- appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
-
- tuple.reset(accessor, 0);
-
- try {
- bulkLoader.add(tuple);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- }
- }
- bulkLoader.end();
-
- // ------- START VERIFICATION -----------
-
- FrameTupleReference searchKey = new FrameTupleReference();
- IInvertedListCursor invListCursor = new FixedSizeElementInvertedListCursor(bufferCache,
- fmp.lookupFileId(invListsFile), invListTypeTraits);
-
- ISerializerDeserializer[] tokenSerde = { UTF8StringSerializerDeserializer.INSTANCE };
- RecordDescriptor tokenRecDesc = new RecordDescriptor(tokenSerde);
- FrameTupleAppender tokenAppender = new FrameTupleAppender(stageletCtx.getFrameSize());
- ArrayTupleBuilder tokenTupleBuilder = new ArrayTupleBuilder(1);
- DataOutput tokenDos = tokenTupleBuilder.getDataOutput();
- IFrameTupleAccessor tokenAccessor = new FrameTupleAccessor(stageletCtx.getFrameSize(), tokenRecDesc);
- tokenAccessor.reset(frame);
-
- // verify created inverted lists one-by-one
- OnDiskInvertedIndexOpContext opCtx = new OnDiskInvertedIndexOpContext(invIndex.getBTree());
- opCtx.reset(IndexOp.SEARCH);
- for (int i = 0; i < tokens.size(); i++) {
-
- tokenTupleBuilder.reset();
- UTF8StringSerializerDeserializer.INSTANCE.serialize(tokens.get(i), tokenDos);
- tokenTupleBuilder.addFieldEndOffset();
-
- tokenAppender.reset(frame, true);
- tokenAppender.append(tokenTupleBuilder.getFieldEndOffsets(), tokenTupleBuilder.getByteArray(), 0,
- tokenTupleBuilder.getSize());
-
- searchKey.reset(tokenAccessor, 0);
-
- invIndex.openInvertedListCursor(invListCursor, searchKey, opCtx);
-
- invListCursor.pinPages();
- int checkIndex = 0;
- while (invListCursor.hasNext()) {
- invListCursor.next();
- ITupleReference invListTuple = invListCursor.getTuple();
- int invListElement = IntegerSerializerDeserializer.getInt(invListTuple.getFieldData(0),
- invListTuple.getFieldStart(0));
- int checkInvListElement = checkListElements.get(i).get(checkIndex).intValue();
- Assert.assertEquals(invListElement, checkInvListElement);
- checkIndex++;
- }
- invListCursor.unpinPages();
- Assert.assertEquals(checkIndex, checkListElements.get(i).size());
- }
-
- // check that non-existing tokens have an empty inverted list
- List<String> nonExistingTokens = new ArrayList<String>();
- nonExistingTokens.add("watermelon");
- nonExistingTokens.add("avocado");
- nonExistingTokens.add("lemon");
-
- for (int i = 0; i < nonExistingTokens.size(); i++) {
-
- tokenTupleBuilder.reset();
- UTF8StringSerializerDeserializer.INSTANCE.serialize(nonExistingTokens.get(i), tokenDos);
- tokenTupleBuilder.addFieldEndOffset();
-
- tokenAppender.reset(frame, true);
- tokenAppender.append(tokenTupleBuilder.getFieldEndOffsets(), tokenTupleBuilder.getByteArray(), 0,
- tokenTupleBuilder.getSize());
-
- searchKey.reset(tokenAccessor, 0);
-
- invIndex.openInvertedListCursor(invListCursor, searchKey, opCtx);
-
- invListCursor.pinPages();
- Assert.assertEquals(invListCursor.hasNext(), false);
- invListCursor.unpinPages();
- }
-
- invIndex.deactivate();
- invIndex.destroy();
- bufferCache.close();
+ public OnDiskInvertedIndexBulkLoadTest() {
+ super(InvertedIndexType.ONDISK);
}
- @AfterClass
- public static void deinit() {
- AbstractInvIndexTest.tearDown();
+ @Override
+ public void loadIndex(TupleGenerator tupleGen, InvertedIndexTestContext testCtx) throws IOException, IndexException {
+ // First generate the expected index by inserting the documents one-by-one.
+ for (int i = 0; i < NUM_DOCS_TO_INSERT; i++) {
+ ITupleReference tuple = tupleGen.next();
+ testCtx.insertCheckTuples(tuple);
+ }
+ ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
+
+ // Use the expected index to bulk-load the actual index.
+ IIndexBulkLoader bulkLoader = testCtx.getIndex().createBulkLoader(1.0f, false);
+ ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(testCtx.getFieldSerdes().length);
+ ArrayTupleReference tuple = new ArrayTupleReference();
+ Iterator<CheckTuple> checkTupleIter = testCtx.getCheckTuples().iterator();
+ while (checkTupleIter.hasNext()) {
+ CheckTuple checkTuple = checkTupleIter.next();
+ OrderedIndexTestUtils.createTupleFromCheckTuple(checkTuple, tupleBuilder, tuple, fieldSerdes);
+ bulkLoader.add(tuple);
+ }
+ bulkLoader.end();
+ }
+
+ @Override
+ public IBufferCache getBufferCache() {
+ return harness.getDiskBufferCache();
}
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexBulkLoadTestOld.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexBulkLoadTestOld.java
new file mode 100644
index 0000000..b8b42ed
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexBulkLoadTestOld.java
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+
+import java.io.DataOutput;
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import junit.framework.Assert;
+
+import org.junit.AfterClass;
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOp;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
+import edu.uci.ics.hyracks.test.support.TestUtils;
+
+public class OnDiskInvertedIndexBulkLoadTestOld extends AbstractInvIndexTest {
+
+ private static final int PAGE_SIZE = 32768;
+ private static final int NUM_PAGES = 100;
+ private static final int MAX_OPEN_FILES = 10;
+ private static final int HYRACKS_FRAME_SIZE = 32768;
+ private IHyracksTaskContext stageletCtx = TestUtils.create(HYRACKS_FRAME_SIZE);
+
+ /**
+ * This test generates a list of <word-token, id> pairs which are pre-sorted
+ * on the token. Those pairs for the input to an inverted-index bulk load.
+ * The contents of the inverted lists are verified against the generated
+ * data.
+ */
+ @Test
+ public void singleFieldPayloadTest() throws Exception {
+ TestStorageManagerComponentHolder.init(PAGE_SIZE, NUM_PAGES, MAX_OPEN_FILES);
+ IBufferCache bufferCache = TestStorageManagerComponentHolder.getBufferCache(stageletCtx);
+ IFileMapProvider fmp = TestStorageManagerComponentHolder.getFileMapProvider(stageletCtx);
+
+ FileReference invListsFile = new FileReference(new File(invListsFileName));
+ FileReference btreeFile = new FileReference(new File(invListsFileName + "_btree"));
+
+ ITypeTraits[] tokenTypeTraits = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS };
+
+ IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+ .of(UTF8StringPointable.FACTORY) };
+
+ int invListFields = 1;
+ ITypeTraits[] invListTypeTraits = new ITypeTraits[invListFields];
+ invListTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
+
+ int invListKeys = 1;
+ IBinaryComparatorFactory[] invListCmpFactories = new IBinaryComparatorFactory[invListKeys];
+ invListCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
+
+ IInvertedListBuilder invListBuilder = new FixedSizeElementInvertedListBuilder(invListTypeTraits);
+ OnDiskInvertedIndex invIndex = new OnDiskInvertedIndex(bufferCache, fmp, invListBuilder, invListTypeTraits,
+ invListCmpFactories, tokenTypeTraits, cmpFactories, invListsFile, btreeFile);
+ invIndex.create();
+ invIndex.activate();
+
+ Random rnd = new Random();
+ rnd.setSeed(50);
+
+ ByteBuffer frame = stageletCtx.allocateFrame();
+ FrameTupleAppender appender = new FrameTupleAppender(stageletCtx.getFrameSize());
+ ArrayTupleBuilder tb = new ArrayTupleBuilder(2);
+ DataOutput dos = tb.getDataOutput();
+
+ ISerializerDeserializer[] insertSerde = { UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE };
+ RecordDescriptor insertRecDesc = new RecordDescriptor(insertSerde);
+ IFrameTupleAccessor accessor = new FrameTupleAccessor(stageletCtx.getFrameSize(), insertRecDesc);
+ accessor.reset(frame);
+ FrameTupleReference tuple = new FrameTupleReference();
+
+ List<String> tokens = new ArrayList<String>();
+ tokens.add("compilers");
+ tokens.add("computer");
+ tokens.add("databases");
+ tokens.add("fast");
+ tokens.add("hyracks");
+ tokens.add("major");
+ tokens.add("science");
+ tokens.add("systems");
+ tokens.add("university");
+
+ ArrayList<ArrayList<Integer>> checkListElements = new ArrayList<ArrayList<Integer>>();
+ for (int i = 0; i < tokens.size(); i++) {
+ checkListElements.add(new ArrayList<Integer>());
+ }
+
+ int maxId = 1000000;
+ int addProb = 0;
+ int addProbStep = 10;
+
+ IIndexBulkLoader bulkLoader = invIndex.createBulkLoader(BTree.DEFAULT_FILL_FACTOR, false);
+
+ for (int i = 0; i < tokens.size(); i++) {
+
+ addProb += addProbStep * (i + 1);
+ for (int j = 0; j < maxId; j++) {
+ if ((Math.abs(rnd.nextInt()) % addProb) == 0) {
+
+ tb.reset();
+ UTF8StringSerializerDeserializer.INSTANCE.serialize(tokens.get(i), dos);
+ tb.addFieldEndOffset();
+ IntegerSerializerDeserializer.INSTANCE.serialize(j, dos);
+ tb.addFieldEndOffset();
+
+ checkListElements.get(i).add(j);
+
+ appender.reset(frame, true);
+ appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
+
+ tuple.reset(accessor, 0);
+
+ try {
+ bulkLoader.add(tuple);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+ bulkLoader.end();
+
+ // ------- START VERIFICATION -----------
+
+ FrameTupleReference searchKey = new FrameTupleReference();
+ IInvertedListCursor invListCursor = new FixedSizeElementInvertedListCursor(bufferCache,
+ fmp.lookupFileId(invListsFile), invListTypeTraits);
+
+ ISerializerDeserializer[] tokenSerde = { UTF8StringSerializerDeserializer.INSTANCE };
+ RecordDescriptor tokenRecDesc = new RecordDescriptor(tokenSerde);
+ FrameTupleAppender tokenAppender = new FrameTupleAppender(stageletCtx.getFrameSize());
+ ArrayTupleBuilder tokenTupleBuilder = new ArrayTupleBuilder(1);
+ DataOutput tokenDos = tokenTupleBuilder.getDataOutput();
+ IFrameTupleAccessor tokenAccessor = new FrameTupleAccessor(stageletCtx.getFrameSize(), tokenRecDesc);
+ tokenAccessor.reset(frame);
+
+ // verify created inverted lists one-by-one
+ OnDiskInvertedIndexOpContext opCtx = new OnDiskInvertedIndexOpContext(invIndex.getBTree());
+ opCtx.reset(IndexOp.SEARCH);
+ for (int i = 0; i < tokens.size(); i++) {
+
+ tokenTupleBuilder.reset();
+ UTF8StringSerializerDeserializer.INSTANCE.serialize(tokens.get(i), tokenDos);
+ tokenTupleBuilder.addFieldEndOffset();
+
+ tokenAppender.reset(frame, true);
+ tokenAppender.append(tokenTupleBuilder.getFieldEndOffsets(), tokenTupleBuilder.getByteArray(), 0,
+ tokenTupleBuilder.getSize());
+
+ searchKey.reset(tokenAccessor, 0);
+
+ invIndex.openInvertedListCursor(invListCursor, searchKey, opCtx);
+
+ invListCursor.pinPages();
+ int checkIndex = 0;
+ while (invListCursor.hasNext()) {
+ invListCursor.next();
+ ITupleReference invListTuple = invListCursor.getTuple();
+ int invListElement = IntegerSerializerDeserializer.getInt(invListTuple.getFieldData(0),
+ invListTuple.getFieldStart(0));
+ int checkInvListElement = checkListElements.get(i).get(checkIndex).intValue();
+ Assert.assertEquals(invListElement, checkInvListElement);
+ checkIndex++;
+ }
+ invListCursor.unpinPages();
+ Assert.assertEquals(checkIndex, checkListElements.get(i).size());
+ }
+
+ // check that non-existing tokens have an empty inverted list
+ List<String> nonExistingTokens = new ArrayList<String>();
+ nonExistingTokens.add("watermelon");
+ nonExistingTokens.add("avocado");
+ nonExistingTokens.add("lemon");
+
+ for (int i = 0; i < nonExistingTokens.size(); i++) {
+
+ tokenTupleBuilder.reset();
+ UTF8StringSerializerDeserializer.INSTANCE.serialize(nonExistingTokens.get(i), tokenDos);
+ tokenTupleBuilder.addFieldEndOffset();
+
+ tokenAppender.reset(frame, true);
+ tokenAppender.append(tokenTupleBuilder.getFieldEndOffsets(), tokenTupleBuilder.getByteArray(), 0,
+ tokenTupleBuilder.getSize());
+
+ searchKey.reset(tokenAccessor, 0);
+
+ invIndex.openInvertedListCursor(invListCursor, searchKey, opCtx);
+
+ invListCursor.pinPages();
+ Assert.assertEquals(invListCursor.hasNext(), false);
+ invListCursor.unpinPages();
+ }
+
+ invIndex.deactivate();
+ invIndex.destroy();
+ bufferCache.close();
+ }
+
+ @AfterClass
+ public static void deinit() {
+ AbstractInvIndexTest.tearDown();
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexSearchPerfTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexSearchPerfTest.java
index afdb4e3..15734a0 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexSearchPerfTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexSearchPerfTest.java
@@ -33,7 +33,7 @@
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex.InvertedIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex.OnDiskInvertedIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.JaccardSearchModifier;
@@ -147,7 +147,7 @@
private void runQueries(IInvertedIndexSearchModifier searchModifier, int numQueries) throws Exception {
rnd.setSeed(50);
- InvertedIndexAccessor accessor = (InvertedIndexAccessor) invIndex.createAccessor(
+ OnDiskInvertedIndexAccessor accessor = (OnDiskInvertedIndexAccessor) invIndex.createAccessor(
NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizer, searchModifier);
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexSearchTest.java
index 90c8394..6f93014 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexSearchTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexSearchTest.java
@@ -36,7 +36,7 @@
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex.InvertedIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex.OnDiskInvertedIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.EditDistanceSearchModifier;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
@@ -176,7 +176,7 @@
*/
private void runQueries(IInvertedIndexSearchModifier searchModifier, int numQueries) throws Exception {
rnd.setSeed(50);
- InvertedIndexAccessor accessor = (InvertedIndexAccessor) invIndex.createAccessor(
+ OnDiskInvertedIndexAccessor accessor = (OnDiskInvertedIndexAccessor) invIndex.createAccessor(
NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizer, searchModifier);
for (int i = 0; i < numQueries; i++) {
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
index 134c421..5648d33 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
@@ -15,16 +15,23 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util;
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
import java.util.Collection;
+import java.util.HashSet;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.dataflow.common.util.SerdeUtils;
import edu.uci.ics.hyracks.storage.am.btree.OrderedIndexTestContext;
import edu.uci.ics.hyracks.storage.am.common.CheckTuple;
import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.InvertedIndexException;
@@ -42,9 +49,17 @@
};
protected IBinaryComparatorFactory[] allCmpFactories;
-
- public InvertedIndexTestContext(ISerializerDeserializer[] fieldSerdes, IIndex index) {
+ protected IBinaryTokenizerFactory tokenizerFactory;
+ protected InvertedIndexInsertTupleIterator indexInsertIter;
+ protected HashSet<Comparable> allTokens = new HashSet<Comparable>();
+
+ public InvertedIndexTestContext(ISerializerDeserializer[] fieldSerdes, IIndex index,
+ IBinaryTokenizerFactory tokenizerFactory) {
super(fieldSerdes, index);
+ this.tokenizerFactory = tokenizerFactory;
+ IInvertedIndex invIndex = (IInvertedIndex) index;
+ indexInsertIter = new InvertedIndexInsertTupleIterator(invIndex.getTokenTypeTraits().length,
+ invIndex.getInvListTypeTraits().length, tokenizerFactory.createTokenizer());
}
@Override
@@ -73,8 +88,7 @@
public static InvertedIndexTestContext create(IBufferCache bufferCache, IFreePageManager freePageManager,
IFileMapProvider fileMapProvider, FileReference invListsFile, ISerializerDeserializer[] fieldSerdes,
- int tokenFieldCount, IBinaryTokenizerFactory tokenizerFactory, InvertedIndexType invIndexType)
- throws Exception {
+ int tokenFieldCount, IBinaryTokenizerFactory tokenizerFactory, InvertedIndexType invIndexType) throws IndexException {
ITypeTraits[] allTypeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] allCmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes,
fieldSerdes.length);
@@ -110,10 +124,47 @@
throw new InvertedIndexException("Unknow inverted-index type '" + invIndexType + "'.");
}
}
- InvertedIndexTestContext testCtx = new InvertedIndexTestContext(fieldSerdes, invIndex);
+ InvertedIndexTestContext testCtx = new InvertedIndexTestContext(fieldSerdes, invIndex, tokenizerFactory);
return testCtx;
}
+ public void insertCheckTuples(ITupleReference tuple) throws HyracksDataException {
+ indexInsertIter.reset(tuple);
+ while (indexInsertIter.hasNext()) {
+ indexInsertIter.next();
+ ITupleReference insertTuple = indexInsertIter.getTuple();
+ CheckTuple checkTuple = createCheckTuple(insertTuple);
+ insertCheckTuple(checkTuple, getCheckTuples());
+ allTokens.add(checkTuple.getField(0));
+ }
+ }
+
+ public void deleteCheckTuples(ITupleReference tuple) throws HyracksDataException {
+ indexInsertIter.reset(tuple);
+ while (indexInsertIter.hasNext()) {
+ indexInsertIter.next();
+ ITupleReference deteleTuple = indexInsertIter.getTuple();
+ CheckTuple checkTuple = createCheckTuple(deteleTuple);
+ deleteCheckTuple(checkTuple, getCheckTuples());
+ }
+ }
+
+ public HashSet<Comparable> getAllTokens() {
+ return allTokens;
+ }
+
+ @SuppressWarnings("unchecked")
+ public CheckTuple createCheckTuple(ITupleReference tuple) throws HyracksDataException {
+ CheckTuple checkTuple = new CheckTuple(fieldSerdes.length, fieldSerdes.length);
+ for (int i = 0; i < fieldSerdes.length; i++) {
+ ByteArrayInputStream bains = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i), tuple.getFieldLength(i));
+ DataInput in = new DataInputStream(bains);
+ Comparable field = (Comparable) fieldSerdes[i].deserialize(in);
+ checkTuple.appendField(field);
+ }
+ return checkTuple;
+ }
+
@Override
public void upsertCheckTuple(CheckTuple checkTuple, Collection<CheckTuple> checkTuples) {
throw new UnsupportedOperationException("Upsert not supported by inverted index.");
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
index b22117d..06fcb44 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
@@ -54,22 +54,22 @@
IBinaryComparatorFactory[] btreeCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
.of(UTF8StringPointable.FACTORY) };
return InvertedIndexUtils.createInvertedIndex(harness.getDiskBufferCache(),
- harness.getInvertedListTypeTraits(), harness.getInvertedListBinaryComparatorFactories(), tokenizer);
+ harness.getInvListTypeTraits(), harness.getInvListCmpFactories(), tokenizer);
}
public static InMemoryInvertedIndex createInMemoryInvertedIndex(LSMInvertedIndexTestHarness harness,
IBinaryTokenizer tokenizer) {
return InvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getMemBufferCache(),
- harness.getMemFreePageManager(), harness.getTokenTypeTraits(), harness.getInvertedListTypeTraits(),
- harness.getTokenBinaryComparatorFactories(), harness.getInvertedListBinaryComparatorFactories(),
+ harness.getMemFreePageManager(), harness.getTokenTypeTraits(), harness.getInvListTypeTraits(),
+ harness.getTokenCmpFactories(), harness.getInvListCmpFactories(),
tokenizer);
}
public static LSMInvertedIndex createLSMInvertedIndex(LSMInvertedIndexTestHarness harness,
IBinaryTokenizer tokenizer) {
return InvertedIndexUtils.createLSMInvertedIndex(harness.getMemBufferCache(),
- harness.getMemFreePageManager(), harness.getTokenTypeTraits(), harness.getInvertedListTypeTraits(),
- harness.getTokenBinaryComparatorFactories(), harness.getInvertedListBinaryComparatorFactories(),
+ harness.getMemFreePageManager(), harness.getTokenTypeTraits(), harness.getInvListTypeTraits(),
+ harness.getTokenCmpFactories(), harness.getInvListCmpFactories(),
tokenizer, harness.getDiskBufferCache(),
new LinkedListFreePageManagerFactory(harness.getDiskBufferCache(), new LIFOMetaDataFrameFactory()),
harness.getIOManager(), harness.getOnDiskDir(), harness.getDiskFileMapProvider());