Changed operator memory settings. Made some minor perf improvements to inverted index searches.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_lsm_experiments@2601 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java b/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
index a9f5561..399a6e0 100644
--- a/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
+++ b/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
@@ -17,8 +17,8 @@
public PhysicalOptimizationConfig() {
int frameSize = 131072;
setInt(FRAMESIZE, frameSize);
- setInt(MAX_FRAMES_EXTERNAL_SORT, (int) (((long) 512 * MB) / frameSize));
- setInt(MAX_FRAMES_EXTERNAL_GROUP_BY, (int) (((long) 512 * MB) / frameSize));
+ setInt(MAX_FRAMES_EXTERNAL_SORT, (int) (((long) 256 * MB) / frameSize));
+ setInt(MAX_FRAMES_EXTERNAL_GROUP_BY, (int) (((long) 256 * MB) / frameSize));
// use http://www.rsok.com/~jrm/printprimes.html to find prime numbers
setInt(DEFAULT_HASH_GROUP_TABLE_SIZE, 10485767);
@@ -45,7 +45,7 @@
public int getMaxFramesExternalGroupBy() {
int frameSize = getFrameSize();
- return getInt(MAX_FRAMES_EXTERNAL_GROUP_BY, (int) (((long) 512 * MB) / frameSize));
+ return getInt(MAX_FRAMES_EXTERNAL_GROUP_BY, (int) (((long) 256 * MB) / frameSize));
}
public void setMaxFramesExternalGroupBy(int frameLimit) {
diff --git a/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/util/JoinUtils.java b/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/util/JoinUtils.java
index ddc00e3..80f62be 100644
--- a/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/util/JoinUtils.java
+++ b/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/util/JoinUtils.java
@@ -49,10 +49,11 @@
private final static int MB = 1048576;
private final static double DEFAULT_FUDGE_FACTOR = 1.3;
- private final static int MAX_RECORDS_PER_FRAME = 512;
- private final static int DEFAULT_FRAME_SIZE = 32768;
+ //private final static int MAX_RECORDS_PER_FRAME = 512;
+ private final static int MAX_RECORDS_PER_FRAME = 2048;
+ private final static int DEFAULT_FRAME_SIZE = 131072;
private final static int MAX_LEFT_INPUT_SIZE_HYBRID_HASH = (int) (140L * 1024 * MB / DEFAULT_FRAME_SIZE);
- private final static int DEFAULT_MEMORY_SIZE_HYBRID_HASH = (int) (256L * MB / DEFAULT_FRAME_SIZE);
+ private final static int DEFAULT_MEMORY_SIZE_HYBRID_HASH = (int) (256 * MB / DEFAULT_FRAME_SIZE);
public static void setJoinAlgorithmAndExchangeAlgo(AbstractBinaryJoinOperator op, IOptimizationContext context)
throws AlgebricksException {
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IPartitionedInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IPartitionedInvertedIndex.java
index 7db972c..cec4691 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IPartitionedInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IPartitionedInvertedIndex.java
@@ -21,7 +21,9 @@
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedListPartitions;
public interface IPartitionedInvertedIndex {
- public void openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
+ public boolean openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions)
throws HyracksDataException, IndexException;
+
+ public boolean isEmpty();
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndex.java
index 13e9b5c..5bc47fd 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndex.java
@@ -86,7 +86,7 @@
}
@Override
- public void openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
+ public boolean openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions)
throws HyracksDataException, IndexException {
short minPartitionIndex;
@@ -98,7 +98,7 @@
if (minPartitionIndex == Short.MAX_VALUE && maxPartitionIndex == Short.MIN_VALUE) {
// Index must be empty.
- return;
+ return false;
}
short partitionStartIndex = minPartitionIndex;
short partitionEndIndex = maxPartitionIndex;
@@ -127,5 +127,18 @@
inMemListCursor.reset(searchKey);
invListPartitions.addInvertedListCursor(inMemListCursor, i);
}
+ return true;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ partitionIndexLock.readLock().lock();
+ if (minPartitionIndex == Short.MAX_VALUE && maxPartitionIndex == Short.MIN_VALUE) {
+ // Index must be empty.
+ partitionIndexLock.readLock().unlock();
+ return true;
+ }
+ partitionIndexLock.readLock().unlock();
+ return false;
}
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
index 5b2ec60..4c6cabc 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
@@ -61,7 +61,7 @@
}
@Override
- public void openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
+ public boolean openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions)
throws HyracksDataException, IndexException {
PartitionedTOccurrenceSearcher partSearcher = (PartitionedTOccurrenceSearcher) searcher;
@@ -86,6 +86,7 @@
ctx.btreePred.setLowKey(lowSearchKey, true);
ctx.btreePred.setHighKey(highSearchKey, true);
ctx.btreeAccessor.search(ctx.btreeCursor, ctx.btreePred);
+ boolean tokenExists = false;
try {
while (ctx.btreeCursor.hasNext()) {
ctx.btreeCursor.next();
@@ -96,10 +97,17 @@
IInvertedListCursor invListCursor = partSearcher.getCachedInvertedListCursor();
resetInvertedListCursor(btreeTuple, invListCursor);
invListPartitions.addInvertedListCursor(invListCursor, numTokens);
+ tokenExists = true;
}
} finally {
ctx.btreeCursor.close();
ctx.btreeCursor.reset();
}
+ return tokenExists;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return false;
}
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
index fb8b9b0..2884bbc 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
@@ -80,32 +80,43 @@
public void search(OnDiskInvertedIndexSearchCursor resultCursor, InvertedIndexSearchPredicate searchPred,
IIndexOperationContext ictx) throws HyracksDataException, IndexException {
+ IPartitionedInvertedIndex partInvIndex = (IPartitionedInvertedIndex) invIndex;
+ searchResult.reset();
+ if (partInvIndex.isEmpty()) {
+ return;
+ }
+
tokenizeQuery(searchPred);
short numQueryTokens = (short) queryTokenAccessor.getTupleCount();
IInvertedIndexSearchModifier searchModifier = searchPred.getSearchModifier();
short numTokensLowerBound = searchModifier.getNumTokensLowerBound(numQueryTokens);
short numTokensUpperBound = searchModifier.getNumTokensUpperBound(numQueryTokens);
-
- IPartitionedInvertedIndex partInvIndex = (IPartitionedInvertedIndex) invIndex;
+
+ occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
+ if (occurrenceThreshold <= 0) {
+ throw new OccurrenceThresholdPanicException("Merge Threshold is <= 0. Failing Search.");
+ }
+
+ short maxCountPossible = numQueryTokens;
invListCursorCache.reset();
partitions.reset(numTokensLowerBound, numTokensUpperBound);
for (int i = 0; i < numQueryTokens; i++) {
searchKey.reset(queryTokenAccessor, i);
- partInvIndex.openInvertedListPartitionCursors(this, ictx, numTokensLowerBound, numTokensUpperBound,
- partitions);
- }
-
- occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
- if (occurrenceThreshold <= 0) {
- throw new OccurrenceThresholdPanicException("Merge Threshold is <= 0. Failing Search.");
+ if (!partInvIndex.openInvertedListPartitionCursors(this, ictx, numTokensLowerBound, numTokensUpperBound,
+ partitions)) {
+ maxCountPossible--;
+ // No results possible.
+ if (maxCountPossible < occurrenceThreshold) {
+ return;
+ }
+ }
}
// Process the partitions one-by-one.
ArrayList<IInvertedListCursor>[] partitionCursors = partitions.getPartitions();
short start = partitions.getMinValidPartitionIndex();
short end = partitions.getMaxValidPartitionIndex();
- searchResult.reset();
for (int i = start; i <= end; i++) {
if (partitionCursors[i] == null) {
continue;