Improved and fixed a few bugs in the lsm inverted index tests.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1867 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/ConjunctiveSearchModifier.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/ConjunctiveSearchModifier.java
index 183e7f6..1d260f0 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/ConjunctiveSearchModifier.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/ConjunctiveSearchModifier.java
@@ -28,4 +28,9 @@
public int getNumPrefixLists(int numQueryTokens) {
return 1;
}
+
+ @Override
+ public String toString() {
+ return "Conjunctive Search Modifier";
+ }
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/EditDistanceSearchModifier.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/EditDistanceSearchModifier.java
index bba0c97..0580319 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/EditDistanceSearchModifier.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/EditDistanceSearchModifier.java
@@ -52,4 +52,9 @@
public void setEdThresh(int edThresh) {
this.edThresh = edThresh;
}
+
+ @Override
+ public String toString() {
+ return "Edit Distance Search Modifier, GramLen: " + gramLength + ", Threshold: " + edThresh;
+ }
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/JaccardSearchModifier.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/JaccardSearchModifier.java
index 022cd69..6878f48 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/JaccardSearchModifier.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/JaccardSearchModifier.java
@@ -45,4 +45,9 @@
public void setJaccThresh(float jaccThresh) {
this.jaccThresh = jaccThresh;
}
+
+ @Override
+ public String toString() {
+ return "Jaccard Search Modifier, Threshold: " + jaccThresh;
+ }
}
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
index 59485b2..d8ebb1d 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
@@ -74,10 +74,13 @@
public static final int LSM_INVINDEX_MAX_OPEN_FILES = 10;
public static final int LSM_INVINDEX_HYRACKS_FRAME_SIZE = 32768;
+ public static final int LSM_INVINDEX_NUM_DOCS_TO_INSERT = 10000;
public static final int LSM_INVINDEX_NUM_BULKLOAD_ROUNDS = 5;
public static final int LSM_INVINDEX_MAX_TREES_TO_MERGE = 5;
public static final int LSM_INVINDEX_NUM_INSERT_ROUNDS = 3;
public static final int LSM_INVINDEX_NUM_DELETE_ROUNDS = 3;
+ // Allocate a generous size to make sure we have enough elements for all tests.
+ public static final int LSM_INVINDEX_SCAN_COUNT_ARRAY_SIZE = 1000000;
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMergeTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMergeTest.java
index 6d5c4e4..30e22e0 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMergeTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMergeTest.java
@@ -35,7 +35,7 @@
private final int maxTreesToMerge = AccessMethodTestsConfig.LSM_INVINDEX_MAX_TREES_TO_MERGE;
public LSMInvertedIndexMergeTest() {
- super(InvertedIndexType.LSM, true, 5);
+ super(InvertedIndexType.LSM, true, 1);
}
@Override
@@ -60,6 +60,7 @@
invIndexAccessor.merge(ioop);
}
validateAndCheckIndex(testCtx);
+ runTinySearchWorkload(testCtx, tupleGen);
}
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
index 348b69b..ad47653 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
@@ -16,18 +16,13 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
import org.junit.Test;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
@@ -38,9 +33,6 @@
protected final int numDeleteRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_DELETE_ROUNDS;
protected final boolean bulkLoad;
- protected int NUM_QUERIES = 10000;
- protected int[] scanCountArray = new int[NUM_DOCS_TO_INSERT];
-
public AbstractInvertedIndexDeleteTest(InvertedIndexType invIndexType, boolean bulkLoad) {
super(invIndexType);
this.bulkLoad = bulkLoad;
@@ -51,7 +43,7 @@
IIndex invIndex = testCtx.getIndex();
invIndex.create();
invIndex.activate();
-
+
for (int i = 0; i < numInsertRounds; i++) {
// Start generating documents ids from 0 again.
tupleGen.reset();
@@ -60,28 +52,15 @@
InvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
} else {
InvertedIndexTestUtils.insertIntoInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
- }
- validateAndCheckIndex(testCtx);
-
- List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>();
- documentCorpus.addAll(testCtx.getDocumentCorpus());
+ }
// Delete all documents in a couple of rounds.
int numTuplesPerDeleteRound = (int) Math.ceil((float) testCtx.getDocumentCorpus().size()
/ (float) numDeleteRounds);
for (int j = 0; j < numDeleteRounds; j++) {
- System.out.println("DELETE ROUND: " + i + " " + j);
-
InvertedIndexTestUtils.deleteFromInvIndex(testCtx, harness.getRandom(), numTuplesPerDeleteRound);
validateAndCheckIndex(testCtx);
-
- System.out.println("TESTING SEARCHES");
-
- IInvertedIndexSearchModifier searchModifier = new ConjunctiveSearchModifier();
- InvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(), NUM_QUERIES, searchModifier,
- scanCountArray);
-
- System.out.println("DONE WITH TESTING SEARCHES");
+ runTinySearchWorkload(testCtx, tupleGen);
}
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java
index b8f9ac5..bb8c851 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java
@@ -50,6 +50,7 @@
InvertedIndexTestUtils.insertIntoInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
}
validateAndCheckIndex(testCtx);
+ runTinySearchWorkload(testCtx, tupleGen);
}
invIndex.deactivate();
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
index 0f750c6..d777b85 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
@@ -30,7 +30,8 @@
public abstract class AbstractInvertedIndexSearchTest extends AbstractInvertedIndexTest {
- protected int NUM_QUERIES = 10000;
+ protected int NUM_DOC_QUERIES = 8000;
+ protected int NUM_RANDOM_QUERIES = 2000;
protected int[] scanCountArray = new int[NUM_DOCS_TO_INSERT];
protected final boolean bulkLoad;
@@ -52,8 +53,8 @@
}
invIndex.validate();
- InvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(), NUM_QUERIES, searchModifier,
- scanCountArray);
+ InvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(), NUM_DOC_QUERIES,
+ NUM_RANDOM_QUERIES, searchModifier, scanCountArray);
invIndex.deactivate();
invIndex.destroy();
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexTest.java
index 6636d4c..945da8c 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexTest.java
@@ -15,6 +15,10 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
import org.junit.After;
import org.junit.Before;
@@ -22,14 +26,29 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.JaccardSearchModifier;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
public abstract class AbstractInvertedIndexTest {
+ protected final Logger LOGGER = Logger.getLogger(AbstractInvertedIndexTest.class.getName());
+
protected final LSMInvertedIndexTestHarness harness = new LSMInvertedIndexTestHarness();
- protected int NUM_DOCS_TO_INSERT = 10000;
+ protected final int NUM_DOCS_TO_INSERT = AccessMethodTestsConfig.LSM_INVINDEX_NUM_DOCS_TO_INSERT;
+ protected final int[] SCAN_COUNT_ARRAY = new int[AccessMethodTestsConfig.LSM_INVINDEX_SCAN_COUNT_ARRAY_SIZE];
+
+ protected final int TINY_WORKLOAD_NUM_DOC_QUERIES = 800;
+ protected final int TINY_WORKLOAD_NUM_RANDOM_QUERIES = 200;
+
+ // Note: The edit-distance search modifier is tested separately.
+ protected final IInvertedIndexSearchModifier[] TEST_SEARCH_MODIFIERS = new IInvertedIndexSearchModifier[] {
+ new ConjunctiveSearchModifier(), new JaccardSearchModifier(0.8f), new JaccardSearchModifier(0.5f) };
protected final InvertedIndexType invIndexType;
@@ -46,9 +65,17 @@
public void tearDown() throws HyracksDataException {
harness.tearDown();
}
-
+
+ /**
+ * Validates the index, and compares it against the expected index.
+ * This test is only for verifying the integrity and correctness of the index,
+ * it does not ensure the correctness of index searches.
+ */
protected void validateAndCheckIndex(InvertedIndexTestContext testCtx) throws HyracksDataException, IndexException {
IIndex invIndex = testCtx.getIndex();
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("Validating index: " + invIndex);
+ }
// Validate index and compare against expected index.
invIndex.validate();
if (invIndexType == InvertedIndexType.INMEMORY || invIndexType == InvertedIndexType.ONDISK) {
@@ -57,4 +84,18 @@
}
InvertedIndexTestUtils.compareActualAndExpectedIndexesRangeSearch(testCtx);
}
+
+ /**
+ * Runs a workload of queries using different search modifiers, and verifies the correctness of the results.
+ */
+ protected void runTinySearchWorkload(InvertedIndexTestContext testCtx, TupleGenerator tupleGen) throws IOException,
+ IndexException {
+ for (IInvertedIndexSearchModifier searchModifier : TEST_SEARCH_MODIFIERS) {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("Running test workload with: " + searchModifier.toString());
+ }
+ InvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(),
+ TINY_WORKLOAD_NUM_DOC_QUERIES, TINY_WORKLOAD_NUM_RANDOM_QUERIES, searchModifier, SCAN_COUNT_ARRAY);
+ }
+ }
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
index 16e1426..295cbc9 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
@@ -128,6 +128,7 @@
}
public void tearDown() throws HyracksDataException {
+ diskBufferCache.close();
for (IODeviceHandle dev : ioManager.getIODevices()) {
File dir = new File(dev.getPath(), onDiskDir);
FilenameFilter filter = new FilenameFilter() {
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
index 7152e61..79efae3 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
@@ -148,7 +148,7 @@
indexTupleIter.next();
ITupleReference insertTuple = indexTupleIter.getTuple();
CheckTuple checkTuple = createCheckTuple(insertTuple);
- insertCheckTuple(checkTuple, getCheckTuples());
+ insertCheckTuple(checkTuple, checkTuples);
allTokens.add(checkTuple.getField(0));
}
}
@@ -160,7 +160,7 @@
indexTupleIter.next();
ITupleReference insertTuple = indexTupleIter.getTuple();
CheckTuple checkTuple = createCheckTuple(insertTuple);
- deleteCheckTuple(checkTuple, getCheckTuples());
+ deleteCheckTuple(checkTuple, checkTuples);
}
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
index aaf0564..bcbd0fa 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
@@ -111,7 +111,7 @@
IIndexBulkLoader bulkLoader = testCtx.getIndex().createBulkLoader(1.0f, false);
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(testCtx.getFieldSerdes().length);
ArrayTupleReference tuple = new ArrayTupleReference();
- Iterator<CheckTuple> checkTupleIter = testCtx.getCheckTuples().iterator();
+ Iterator<CheckTuple> checkTupleIter = tmpMemIndex.iterator();
while (checkTupleIter.hasNext()) {
CheckTuple checkTuple = checkTupleIter.next();
OrderedIndexTestUtils.createTupleFromCheckTuple(checkTuple, tupleBuilder, tuple, fieldSerdes);
@@ -329,7 +329,7 @@
}
public static void testIndexSearch(InvertedIndexTestContext testCtx, TupleGenerator tupleGen, Random rnd,
- int numQueries, IInvertedIndexSearchModifier searchModifier, int[] scanCountArray) throws IOException,
+ int numDocQueries, int numRandomQueries, IInvertedIndexSearchModifier searchModifier, int[] scanCountArray) throws IOException,
IndexException {
IInvertedIndex invIndex = testCtx.invIndex;
IInvertedIndexAccessor accessor = (IInvertedIndexAccessor) invIndex.createAccessor(
@@ -342,8 +342,10 @@
PermutingTupleReference searchDocument = new PermutingTupleReference(fieldPermutation);
IIndexCursor resultCursor = accessor.createSearchCursor();
+ int numQueries = numDocQueries + numRandomQueries;
for (int i = 0; i < numQueries; i++) {
- if (rnd.nextFloat() <= RQNDOM_QUERY_PROB || documentCorpus.isEmpty()) {
+ // If number of documents in the corpus io less than numDocQueries, then replace the remaining ones with random queries.
+ if (i >= numDocQueries || i >= documentCorpus.size()) {
// Generate a random query.
ITupleReference randomQuery = tupleGen.next();
searchDocument.reset(randomQuery);