Fixed deletion protocol in the lsm-inverted-index. The in-memory deleted-keys BTree now only contains keys referring to on-disk components. Deletions that refer to documents in the in-memory inverted index are physically removed from there, and no entry to the deleted-keys BTree is made. This behavior seems necessary to avoid a pathological case of 'lost deletes' to on-disk components (more details in comments of the code).
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1864 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
index 6a5c762..b12bb7d 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
@@ -92,4 +92,8 @@
public List<String> getTokenDictionary() {
return tokenDict;
}
+
+ @Override
+ public void reset() {
+ }
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java
index d535404..c98c249 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java
@@ -28,4 +28,8 @@
public Double next() {
return rnd.nextDouble();
}
+
+ @Override
+ public void reset() {
+ }
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java
index 53450f6..7c3ff81 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java
@@ -28,4 +28,8 @@
public Float next() {
return rnd.nextFloat();
}
+
+ @Override
+ public void reset() {
+ }
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IFieldValueGenerator.java
index 6f743ac..dfeead6 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IFieldValueGenerator.java
@@ -17,4 +17,5 @@
public interface IFieldValueGenerator<T> {
public T next();
+ public void reset();
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java
index 88f7bab..cd6e2a6 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java
@@ -28,4 +28,8 @@
public Integer next() {
return rnd.nextInt();
}
+
+ @Override
+ public void reset() {
+ }
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java
index 7c94f18..6b86278 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java
@@ -90,4 +90,8 @@
return strBuilder.toString();
}
+
+ @Override
+ public void reset() {
+ }
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java
index 228d12e..e93b8de 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java
@@ -16,17 +16,26 @@
package edu.uci.ics.hyracks.storage.am.common.datagen;
public class SortedDoubleFieldValueGenerator implements IFieldValueGenerator<Double> {
- private double val = 0.0d;
-
+ private double val;
+ private final double startVal;
+
public SortedDoubleFieldValueGenerator() {
+ startVal = 0.0d;
+ reset();
}
public SortedDoubleFieldValueGenerator(double startVal) {
- val = startVal;
+ this.startVal = startVal;
+ reset();
}
@Override
public Double next() {
return val++;
}
+
+ @Override
+ public void reset() {
+ val = startVal;
+ }
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java
index 13fd47b..fb163e1 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java
@@ -17,16 +17,25 @@
public class SortedFloatFieldValueGenerator implements IFieldValueGenerator<Float> {
private float val = 0.0f;
-
+ private final float startVal;
+
public SortedFloatFieldValueGenerator() {
+ startVal = 0.0f;
+ reset();
}
public SortedFloatFieldValueGenerator(float startVal) {
- val = startVal;
+ this.startVal = startVal;
+ reset();
}
@Override
public Float next() {
return val++;
}
+
+ @Override
+ public void reset() {
+ val = startVal;
+ }
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java
index b4a13cb..a036772 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java
@@ -17,16 +17,25 @@
public class SortedIntegerFieldValueGenerator implements IFieldValueGenerator<Integer> {
private int val = 0;
+ private final int startVal;
public SortedIntegerFieldValueGenerator() {
+ startVal = 0;
+ reset();
}
public SortedIntegerFieldValueGenerator(int startVal) {
- val = startVal;
+ this.startVal = startVal;
+ reset();
}
@Override
public Integer next() {
return val++;
}
+
+ @Override
+ public void reset() {
+ val = startVal;
+ }
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
index ef395ef..6bf01a4 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
@@ -39,4 +39,8 @@
}
return strBuilder.toString();
}
+
+ @Override
+ public void reset() {
+ }
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/TupleGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/TupleGenerator.java
index bfc56b6..1099af2 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/TupleGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/TupleGenerator.java
@@ -63,4 +63,10 @@
public ITupleReference get() {
return tuple;
}
+
+ public void reset() {
+ for (IFieldValueGenerator fieldGen : fieldGens) {
+ fieldGen.reset();
+ }
+ }
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index 3d7de71..4965e45 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -26,7 +26,6 @@
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
@@ -341,8 +340,14 @@
}
/**
+ * The keys in the in-memory deleted-keys BTree only refers to on-disk components.
+ * We delete documents from the in-memory inverted index by deleting its entries directly,
+ * and do NOT add the deleted key to the deleted-keys BTree.
+ * Otherwise, inserts would have to remove keys from the in-memory deleted-keys BTree which
+ * may cause incorrect behavior in the following pathological case:
+ * Insert doc 1, flush, delete doc 1, insert doc 1
+ * After the sequence above doc 1 will now appear twice because the delete of the on-disk doc 1 has been lost.
* Insert:
- * - Delete key from deleted-keys BTree (ignore if key does not exist).
* - Insert document into in-memory inverted index.
* Delete:
* - Delete document from in-memory inverted index (ignore if it does not exist).
@@ -354,13 +359,6 @@
LSMInvertedIndexOpContext ctx = (LSMInvertedIndexOpContext) ictx;
switch (ctx.getIndexOp()) {
case INSERT: {
- // First remove the possibly deleted key from the deleted-keys BTree.
- ctx.keysOnlyTuple.reset(tuple);
- try {
- ctx.deletedKeysBTreeAccessor.delete(ctx.keysOnlyTuple);
- } catch (BTreeNonExistentKeyException e) {
- // The key did not exist in the deleted-keys BTree.
- }
// Insert into the in-memory inverted index.
ctx.memInvIndexAccessor.insert(tuple);
break;
@@ -452,7 +450,6 @@
}
@Override
- // TODO: Deal with deletions properly.
public Object merge(List<Object> mergedComponents, ILSMIOOperation operation) throws HyracksDataException,
IndexException {
LSMInvertedIndexMergeOperation mergeOp = (LSMInvertedIndexMergeOperation) operation;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
index 353626b..3eee4b7 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
@@ -72,8 +72,8 @@
protected boolean isDeleted(PriorityQueueElement checkElement) throws HyracksDataException {
keysOnlyTuple.reset(checkElement.getTuple());
int end = checkElement.getCursorIndex();
- for (int i = 0; i <= end; i++) {
- deletedKeysBTreeCursor.reset();
+ for (int i = 0; i < end; i++) {
+ deletedKeysBTreeCursor.reset();
try {
deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursor, keySearchPred);
if (deletedKeysBTreeCursor.hasNext()) {
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
index 983c0bc..6c2de44 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
@@ -69,7 +69,7 @@
protected boolean isDeleted(ITupleReference key) throws HyracksDataException {
keySearchPred.setLowKey(key, true);
keySearchPred.setHighKey(key, true);
- for (int i = 0; i <= accessorIndex; i++) {
+ for (int i = 0; i < accessorIndex; i++) {
deletedKeysBTreeCursor.reset();
try {
deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursor, keySearchPred);
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
index 737fad5..348b69b 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
@@ -26,6 +26,8 @@
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
@@ -33,9 +35,12 @@
public abstract class AbstractInvertedIndexDeleteTest extends AbstractInvertedIndexTest {
protected final int numInsertRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_INSERT_ROUNDS;
- protected final int numDeleteRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_INSERT_ROUNDS;
+ protected final int numDeleteRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_DELETE_ROUNDS;
protected final boolean bulkLoad;
+ protected int NUM_QUERIES = 10000;
+ protected int[] scanCountArray = new int[NUM_DOCS_TO_INSERT];
+
public AbstractInvertedIndexDeleteTest(InvertedIndexType invIndexType, boolean bulkLoad) {
super(invIndexType);
this.bulkLoad = bulkLoad;
@@ -48,6 +53,9 @@
invIndex.activate();
for (int i = 0; i < numInsertRounds; i++) {
+ // Start generating documents ids from 0 again.
+ tupleGen.reset();
+
if (bulkLoad) {
InvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
} else {
@@ -62,8 +70,18 @@
int numTuplesPerDeleteRound = (int) Math.ceil((float) testCtx.getDocumentCorpus().size()
/ (float) numDeleteRounds);
for (int j = 0; j < numDeleteRounds; j++) {
+ System.out.println("DELETE ROUND: " + i + " " + j);
+
InvertedIndexTestUtils.deleteFromInvIndex(testCtx, harness.getRandom(), numTuplesPerDeleteRound);
validateAndCheckIndex(testCtx);
+
+ System.out.println("TESTING SEARCHES");
+
+ IInvertedIndexSearchModifier searchModifier = new ConjunctiveSearchModifier();
+ InvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(), NUM_QUERIES, searchModifier,
+ scanCountArray);
+
+ System.out.println("DONE WITH TESTING SEARCHES");
}
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
index 1c5fcf6..0f750c6 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
@@ -15,31 +15,15 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
-import static org.junit.Assert.fail;
-
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Random;
import org.junit.Test;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
-import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
-import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
-import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
@@ -50,9 +34,6 @@
protected int[] scanCountArray = new int[NUM_DOCS_TO_INSERT];
protected final boolean bulkLoad;
- // Probability that a randomly generated query is used, instead of a document from the corpus.
- protected final float randomQueryProb = 0.9f;
-
public AbstractInvertedIndexSearchTest(InvertedIndexType invIndexType, boolean bulkLoad) {
super(invIndexType);
this.bulkLoad = bulkLoad;
@@ -71,89 +52,9 @@
}
invIndex.validate();
- IInvertedIndexAccessor accessor = (IInvertedIndexAccessor) invIndex.createAccessor(
- NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- IBinaryTokenizer tokenizer = testCtx.getTokenizerFactory().createTokenizer();
- InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizer, searchModifier);
- Random rnd = harness.getRandom();
- List<ITupleReference> documentCorpus = testCtx.getDocumentCorpus();
- // Project away the primary-key field.
- int[] fieldPermutation = new int[] { 0 };
- PermutingTupleReference searchDocument = new PermutingTupleReference(fieldPermutation);
-
- IIndexCursor resultCursor = accessor.createSearchCursor();
- for (int i = 0; i < NUM_QUERIES; i++) {
- if (rnd.nextFloat() <= randomQueryProb) {
- // Generate a random query.
- ITupleReference randomQuery = tupleGen.next();
- searchDocument.reset(randomQuery);
- } else {
- // Pick a random document from the corpus to use as the search query.
- int queryIndex = Math.abs(rnd.nextInt() % documentCorpus.size());
- searchDocument.reset(documentCorpus.get(queryIndex));
- }
-
- // DEBUG
- /*
- StringBuilder builder = new StringBuilder();
- UTF8StringPointable.toString(builder, searchDocument.getFieldData(0), searchDocument.getFieldStart(0));
- String query = builder.toString();
-
- System.out.println("QUERY: " + i + " " + query + " " + isRandom);
- if (query.equals("Patricia Mary")) {
- System.out.println("HERE WE GO, DEBUG IT!");
- }
- */
-
- // Set query tuple in search predicate.
- searchPred.setQueryTuple(searchDocument);
- searchPred.setQueryFieldIndex(0);
-
- resultCursor.reset();
- boolean panic = false;
- try {
- accessor.search(resultCursor, searchPred);
- } catch (OccurrenceThresholdPanicException e) {
- // ignore panic queries
- panic = true;
- }
-
- if (!panic) {
- // Consume cursor and deserialize results so we can sort them. Some search cursors may not deliver the result sorted (e.g., LSM search cursor).
- ArrayList<Integer> actualResults = new ArrayList<Integer>();
- while (resultCursor.hasNext()) {
- resultCursor.next();
- ITupleReference resultTuple = resultCursor.getTuple();
- int actual = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
- resultTuple.getFieldStart(0));
- actualResults.add(Integer.valueOf(actual));
- }
- Collections.sort(actualResults);
-
- // Get expected results.
- List<Integer> expectedResults = new ArrayList<Integer>();
- InvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(),
- searchDocument, tokenizer, testCtx.getFieldSerdes()[0],
- searchModifier, expectedResults);
-
- Iterator<Integer> expectedIter = expectedResults.iterator();
- Iterator<Integer> actualIter = actualResults.iterator();
- while (expectedIter.hasNext() && actualIter.hasNext()) {
- int expected = expectedIter.next();
- int actual = actualIter.next();
- if (actual != expected) {
- fail("Query results do not match. Encountered: " + actual + ". Expected: " + expected + "");
- }
- }
- if (expectedIter.hasNext()) {
- fail("Query results do not match. Actual results missing.");
- }
- if (actualIter.hasNext()) {
- fail("Query results do not match. Actual contains too many results.");
- }
- }
- }
-
+ InvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(), NUM_QUERIES, searchModifier,
+ scanCountArray);
+
invIndex.deactivate();
invIndex.destroy();
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
index 1c8809d..aaf0564 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
@@ -23,7 +23,9 @@
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
@@ -57,6 +59,8 @@
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizerFactory;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
@@ -68,6 +72,9 @@
@SuppressWarnings("rawtypes")
public class InvertedIndexTestUtils {
+ // Probability that a randomly generated query is used, instead of a document from the corpus.
+ protected static final float RQNDOM_QUERY_PROB = 0.9f;
+
public static TupleGenerator createStringDocumentTupleGen(Random rnd) throws IOException {
IFieldValueGenerator[] fieldGens = new IFieldValueGenerator[2];
fieldGens[0] = new DocumentStringFieldValueGenerator(2, 10, 10000, rnd);
@@ -172,7 +179,6 @@
Iterator<CheckTuple> expectedIter = testCtx.getCheckTuples().iterator();
// Compare index elements.
- int count = 0;
try {
while (invIndexCursor.hasNext() && expectedIter.hasNext()) {
invIndexCursor.next();
@@ -180,9 +186,14 @@
CheckTuple expected = expectedIter.next();
OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, expectedTuple, fieldSerdes);
if (tupleCmp.compare(actualTuple, expectedTuple) != 0) {
- fail("Index elements differ for token '" + expected.getField(0) + "' at position " + count + ".");
+ fail("Index entries differ for token '" + expected.getField(0) + "'.");
}
- count++;
+ }
+ if (expectedIter.hasNext()) {
+ fail("Indexes do not match. Actual index is missing entries.");
+ }
+ if (invIndexCursor.hasNext()) {
+ fail("Indexes do not match. Actual index contains too many entries.");
}
} finally {
invIndexCursor.close();
@@ -317,6 +328,86 @@
}
}
+ public static void testIndexSearch(InvertedIndexTestContext testCtx, TupleGenerator tupleGen, Random rnd,
+ int numQueries, IInvertedIndexSearchModifier searchModifier, int[] scanCountArray) throws IOException,
+ IndexException {
+ IInvertedIndex invIndex = testCtx.invIndex;
+ IInvertedIndexAccessor accessor = (IInvertedIndexAccessor) invIndex.createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ IBinaryTokenizer tokenizer = testCtx.getTokenizerFactory().createTokenizer();
+ InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizer, searchModifier);
+ List<ITupleReference> documentCorpus = testCtx.getDocumentCorpus();
+ // Project away the primary-key field.
+ int[] fieldPermutation = new int[] { 0 };
+ PermutingTupleReference searchDocument = new PermutingTupleReference(fieldPermutation);
+
+ IIndexCursor resultCursor = accessor.createSearchCursor();
+ for (int i = 0; i < numQueries; i++) {
+ if (rnd.nextFloat() <= RQNDOM_QUERY_PROB || documentCorpus.isEmpty()) {
+ // Generate a random query.
+ ITupleReference randomQuery = tupleGen.next();
+ searchDocument.reset(randomQuery);
+ } else {
+ // Pick a random document from the corpus to use as the search query.
+ int queryIndex = Math.abs(rnd.nextInt() % documentCorpus.size());
+ searchDocument.reset(documentCorpus.get(queryIndex));
+ }
+
+ // Set query tuple in search predicate.
+ searchPred.setQueryTuple(searchDocument);
+ searchPred.setQueryFieldIndex(0);
+
+ resultCursor.reset();
+ boolean panic = false;
+ try {
+ accessor.search(resultCursor, searchPred);
+ } catch (OccurrenceThresholdPanicException e) {
+ // ignore panic queries
+ panic = true;
+ }
+
+ try {
+ if (!panic) {
+ // Consume cursor and deserialize results so we can sort them. Some search cursors may not deliver the result sorted (e.g., LSM search cursor).
+ ArrayList<Integer> actualResults = new ArrayList<Integer>();
+ while (resultCursor.hasNext()) {
+ resultCursor.next();
+ ITupleReference resultTuple = resultCursor.getTuple();
+ int actual = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(0));
+ actualResults.add(Integer.valueOf(actual));
+ }
+ Collections.sort(actualResults);
+
+ // Get expected results.
+ List<Integer> expectedResults = new ArrayList<Integer>();
+ InvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(), searchDocument,
+ tokenizer, testCtx.getFieldSerdes()[0], searchModifier, expectedResults);
+
+ Iterator<Integer> expectedIter = expectedResults.iterator();
+ Iterator<Integer> actualIter = actualResults.iterator();
+ while (expectedIter.hasNext() && actualIter.hasNext()) {
+ int expected = expectedIter.next();
+ int actual = actualIter.next();
+ if (actual != expected) {
+ fail("Query results do not match. Encountered: " + actual + ". Expected: " + expected + "");
+ }
+ }
+ if (expectedIter.hasNext()) {
+ fail("Query results do not match. Actual results missing.");
+ }
+ if (actualIter.hasNext()) {
+ fail("Query results do not match. Actual contains too many results.");
+ }
+ }
+ } finally {
+ resultCursor.close();
+ }
+ }
+ }
+
+
+
/*
public static OnDiskInvertedIndex createTestInvertedIndex(LSMInvertedIndexTestHarness harness, IBinaryTokenizer tokenizer)
throws HyracksDataException {