Merged hyracks_lsm_tree r2599:r2780.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_lsm_experiments@2781 123451ca-8445-de46-9d55-352943316053
diff --git a/deploy.sh b/deploy.sh
new file mode 100755
index 0000000..32fde34
--- /dev/null
+++ b/deploy.sh
@@ -0,0 +1 @@
+scp hyracks-server/target/hyracks-server-0.2.2-SNAPSHOT-binary-assembly.zip abehm@asterix-master.ics.uci.edu:/home/abehm/hyracks
\ No newline at end of file
diff --git a/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java b/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
index 31061db..b97597d 100644
--- a/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
+++ b/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
@@ -345,7 +345,7 @@
AbstractLogicalOperator aop = (AbstractLogicalOperator) arg;
if (aop.getOperatorTag() != LogicalOperatorTag.UNNEST_MAP)
return Boolean.FALSE;
- UnnestOperator unnestOpArg = (UnnestOperator) copyAndSubstituteVar(op, arg);
+ UnnestMapOperator unnestOpArg = (UnnestMapOperator) copyAndSubstituteVar(op, arg);
boolean isomorphic = VariableUtilities.varListEqualUnordered(op.getVariables(), unnestOpArg.getVariables());
if (!isomorphic)
return Boolean.FALSE;
diff --git a/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/NLJoinPOperator.java b/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/NLJoinPOperator.java
index 8cbd2d8..ec829db 100644
--- a/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/NLJoinPOperator.java
+++ b/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/NLJoinPOperator.java
@@ -111,9 +111,10 @@
throw new NotImplementedException(partitioningType + " nested loop joins are not implemented.");
}
+ // ALEX: HACK TO FLIP WHICH SIDE IS BROADCAST
StructuralPropertiesVector[] pv = new StructuralPropertiesVector[2];
- pv[0] = new StructuralPropertiesVector(new BroadcastPartitioningProperty(null), null);
- pv[1] = new StructuralPropertiesVector(null, null);
+ pv[1] = new StructuralPropertiesVector(new BroadcastPartitioningProperty(null), null);
+ pv[0] = new StructuralPropertiesVector(null, null);
return new PhysicalRequirements(pv, IPartitioningRequirementsCoordinator.NO_COORDINATION);
}
diff --git a/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java b/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
index 9ce910b..399a6e0 100644
--- a/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
+++ b/hyracks-algebricks/hyracks-algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
@@ -15,9 +15,9 @@
private Properties properties = new Properties();
public PhysicalOptimizationConfig() {
- int frameSize = 32768;
+ int frameSize = 131072;
setInt(FRAMESIZE, frameSize);
- setInt(MAX_FRAMES_EXTERNAL_SORT, (int) (((long) 512 * MB) / frameSize));
+ setInt(MAX_FRAMES_EXTERNAL_SORT, (int) (((long) 256 * MB) / frameSize));
setInt(MAX_FRAMES_EXTERNAL_GROUP_BY, (int) (((long) 256 * MB) / frameSize));
// use http://www.rsok.com/~jrm/printprimes.html to find prime numbers
diff --git a/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java b/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java
index cbe2b4a..089bbc5 100644
--- a/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java
+++ b/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java
@@ -94,6 +94,8 @@
*/
public class ExtractCommonExpressionsRule implements IAlgebraicRewriteRule {
+ private final List<ILogicalExpression> originalAssignExprs = new ArrayList<ILogicalExpression>();
+
private final CommonExpressionSubstitutionVisitor substVisitor = new CommonExpressionSubstitutionVisitor();
private final Map<ILogicalExpression, ExprEquivalenceClass> exprEqClassMap = new HashMap<ILogicalExpression, ExprEquivalenceClass>();
@@ -124,11 +126,11 @@
return modified;
}
- private void updateEquivalenceClassMap(LogicalVariable lhs, Mutable<ILogicalExpression> rhsExprRef, ILogicalOperator op) {
- ExprEquivalenceClass exprEqClass = exprEqClassMap.get(rhsExprRef.getValue());
+ private void updateEquivalenceClassMap(LogicalVariable lhs, Mutable<ILogicalExpression> rhsExprRef, ILogicalExpression rhsExpr, ILogicalOperator op) {
+ ExprEquivalenceClass exprEqClass = exprEqClassMap.get(rhsExpr);
if (exprEqClass == null) {
exprEqClass = new ExprEquivalenceClass(op, rhsExprRef);
- exprEqClassMap.put(rhsExprRef.getValue(), exprEqClass);
+ exprEqClassMap.put(rhsExpr, exprEqClass);
}
exprEqClass.setVariable(lhs);
}
@@ -159,6 +161,23 @@
return modified;
}
+ // Remember a copy of the original assign expressions, so we can add them to the equivalence class map
+ // after replacing expressions within the assign operator itself.
+ if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
+ AssignOperator assignOp = (AssignOperator) op;
+ originalAssignExprs.clear();
+ int numVars = assignOp.getVariables().size();
+ for (int i = 0; i < numVars; i++) {
+ Mutable<ILogicalExpression> exprRef = assignOp.getExpressions().get(i);
+ ILogicalExpression expr = exprRef.getValue();
+ if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE
+ || expr.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
+ continue;
+ }
+ originalAssignExprs.add(expr.cloneExpression());
+ }
+ }
+
// Perform common subexpression elimination.
substVisitor.setOperator(op);
if (op.acceptExpressionTransform(substVisitor)) {
@@ -178,7 +197,10 @@
}
// Update equivalence class map.
LogicalVariable lhs = assignOp.getVariables().get(i);
- updateEquivalenceClassMap(lhs, exprRef, op);
+ updateEquivalenceClassMap(lhs, exprRef, exprRef.getValue(), op);
+
+ // Update equivalence class map with original assign expression.
+ updateEquivalenceClassMap(lhs, exprRef, originalAssignExprs.get(i), op);
}
}
diff --git a/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/util/JoinUtils.java b/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/util/JoinUtils.java
index ddc00e3..80f62be 100644
--- a/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/util/JoinUtils.java
+++ b/hyracks-algebricks/hyracks-algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/util/JoinUtils.java
@@ -49,10 +49,11 @@
private final static int MB = 1048576;
private final static double DEFAULT_FUDGE_FACTOR = 1.3;
- private final static int MAX_RECORDS_PER_FRAME = 512;
- private final static int DEFAULT_FRAME_SIZE = 32768;
+ //private final static int MAX_RECORDS_PER_FRAME = 512;
+ private final static int MAX_RECORDS_PER_FRAME = 2048;
+ private final static int DEFAULT_FRAME_SIZE = 131072;
private final static int MAX_LEFT_INPUT_SIZE_HYBRID_HASH = (int) (140L * 1024 * MB / DEFAULT_FRAME_SIZE);
- private final static int DEFAULT_MEMORY_SIZE_HYBRID_HASH = (int) (256L * MB / DEFAULT_FRAME_SIZE);
+ private final static int DEFAULT_MEMORY_SIZE_HYBRID_HASH = (int) (256 * MB / DEFAULT_FRAME_SIZE);
public static void setJoinAlgorithmAndExchangeAlgo(AbstractBinaryJoinOperator op, IOptimizationContext context)
throws AlgebricksException {
diff --git a/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/CCConfig.java b/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/CCConfig.java
index 6c208fe..5e45cdb 100644
--- a/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/CCConfig.java
+++ b/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/CCConfig.java
@@ -39,7 +39,7 @@
public int heartbeatPeriod = 10000;
@Option(name = "-max-heartbeat-lapse-periods", usage = "Sets the maximum number of missed heartbeats before a node is marked as dead (default: 5)")
- public int maxHeartbeatLapsePeriods = 5;
+ public int maxHeartbeatLapsePeriods = 10000;
@Option(name = "-profile-dump-period", usage = "Sets the time duration between two profile dumps from each node controller in milliseconds. 0 to disable. (default: 0)")
public int profileDumpPeriod = 0;
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelper.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelper.java
index ea7c3b4..66ca349 100644
--- a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelper.java
+++ b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelper.java
@@ -26,7 +26,7 @@
public abstract class AbstractLSMIndexDataflowHelper extends IndexDataflowHelper {
protected static int DEFAULT_MEM_PAGE_SIZE = 32768;
- protected static int DEFAULT_MEM_NUM_PAGES = 1000;
+ protected static int DEFAULT_MEM_NUM_PAGES = 10;
protected final int memPageSize;
protected final int memNumPages;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IPartitionedInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IPartitionedInvertedIndex.java
index 7db972c..89fd69d 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IPartitionedInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IPartitionedInvertedIndex.java
@@ -15,13 +15,17 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api;
+import java.util.ArrayList;
+
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedListPartitions;
public interface IPartitionedInvertedIndex {
- public void openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
- short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions)
- throws HyracksDataException, IndexException;
+ public boolean openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
+ short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions,
+ ArrayList<IInvertedListCursor> cursorsOrderedByTokens) throws HyracksDataException, IndexException;
+
+ public boolean isEmpty();
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java
index 0ad10a7..6f28f61 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java
@@ -121,14 +121,13 @@
}
}
}
-
- if (appender.getTupleCount() > 0) {
- FrameUtils.flushFrame(writeBuffer, writer);
- }
}
@Override
public void close() throws HyracksDataException {
+ if (appender.getTupleCount() > 0) {
+ FrameUtils.flushFrame(writeBuffer, writer);
+ }
writer.close();
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndex.java
index 13e9b5c..7c3f4e4 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndex.java
@@ -14,6 +14,7 @@
*/
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+import java.util.ArrayList;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
@@ -30,6 +31,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearcher;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IPartitionedInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedListPartitions;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.PartitionedTOccurrenceSearcher;
@@ -86,9 +88,9 @@
}
@Override
- public void openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
- short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions)
- throws HyracksDataException, IndexException {
+ public boolean openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
+ short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions,
+ ArrayList<IInvertedListCursor> cursorsOrderedByTokens) throws HyracksDataException, IndexException {
short minPartitionIndex;
short maxPartitionIndex;
partitionIndexLock.readLock().lock();
@@ -98,7 +100,7 @@
if (minPartitionIndex == Short.MAX_VALUE && maxPartitionIndex == Short.MIN_VALUE) {
// Index must be empty.
- return;
+ return false;
}
short partitionStartIndex = minPartitionIndex;
short partitionEndIndex = maxPartitionIndex;
@@ -108,7 +110,7 @@
if (numTokensUpperBound >= 0) {
partitionEndIndex = (short) Math.min(maxPartitionIndex, numTokensUpperBound);
}
-
+
PartitionedTOccurrenceSearcher partSearcher = (PartitionedTOccurrenceSearcher) searcher;
PartitionedInMemoryInvertedIndexOpContext ctx = (PartitionedInMemoryInvertedIndexOpContext) ictx;
ctx.setOperation(IndexOperation.SEARCH);
@@ -127,5 +129,18 @@
inMemListCursor.reset(searchKey);
invListPartitions.addInvertedListCursor(inMemListCursor, i);
}
+ return true;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ partitionIndexLock.readLock().lock();
+ if (minPartitionIndex == Short.MAX_VALUE && maxPartitionIndex == Short.MIN_VALUE) {
+ // Index must be empty.
+ partitionIndexLock.readLock().unlock();
+ return true;
+ }
+ partitionIndexLock.readLock().unlock();
+ return false;
}
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListCursor.java
index ed8f600..f55a700 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeElementInvertedListCursor.java
@@ -46,6 +46,8 @@
private final FixedSizeTupleReference tuple;
private ICachedPage[] pages = new ICachedPage[10];
private int[] elementIndexes = new int[10];
+
+ private boolean pinned = false;
public FixedSizeElementInvertedListCursor(IBufferCache bufferCache, int fileId, ITypeTraits[] invListFields) {
this.bufferCache = bufferCache;
@@ -84,12 +86,16 @@
@Override
public void pinPages() throws HyracksDataException {
+ if (pinned) {
+ return;
+ }
int pix = 0;
for (int i = startPageId; i <= endPageId; i++) {
pages[pix] = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, i), false);
pages[pix].acquireReadLatch();
pix++;
}
+ pinned = true;
}
@Override
@@ -99,6 +105,7 @@
pages[i].releaseReadLatch();
bufferCache.unpin(pages[i]);
}
+ pinned = false;
}
private void positionCursor(int elementIx) {
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
index 5b2ec60..6e395e7 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndex.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+import java.util.ArrayList;
+
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -61,9 +63,9 @@
}
@Override
- public void openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
- short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions)
- throws HyracksDataException, IndexException {
+ public boolean openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx,
+ short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions,
+ ArrayList<IInvertedListCursor> cursorsOrderedByTokens) throws HyracksDataException, IndexException {
PartitionedTOccurrenceSearcher partSearcher = (PartitionedTOccurrenceSearcher) searcher;
OnDiskInvertedIndexOpContext ctx = (OnDiskInvertedIndexOpContext) ictx;
ITupleReference lowSearchKey = null;
@@ -86,6 +88,7 @@
ctx.btreePred.setLowKey(lowSearchKey, true);
ctx.btreePred.setHighKey(highSearchKey, true);
ctx.btreeAccessor.search(ctx.btreeCursor, ctx.btreePred);
+ boolean tokenExists = false;
try {
while (ctx.btreeCursor.hasNext()) {
ctx.btreeCursor.next();
@@ -95,11 +98,19 @@
btreeTuple.getFieldStart(PARTITIONING_NUM_TOKENS_FIELD));
IInvertedListCursor invListCursor = partSearcher.getCachedInvertedListCursor();
resetInvertedListCursor(btreeTuple, invListCursor);
+ cursorsOrderedByTokens.add(invListCursor);
invListPartitions.addInvertedListCursor(invListCursor, numTokens);
+ tokenExists = true;
}
} finally {
ctx.btreeCursor.close();
ctx.btreeCursor.reset();
}
+ return tokenExists;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return false;
}
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
index fb8b9b0..05aa6d0 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java
@@ -43,6 +43,9 @@
protected final ConcatenatingTupleReference fullLowSearchKey = new ConcatenatingTupleReference(2);
protected final ConcatenatingTupleReference fullHighSearchKey = new ConcatenatingTupleReference(2);
+ // HACK for better IO perf
+ protected final ArrayList<IInvertedListCursor> cursorsOrderedByTokens = new ArrayList<IInvertedListCursor>();
+
protected final InvertedListPartitions partitions = new InvertedListPartitions();
public PartitionedTOccurrenceSearcher(IHyracksCommonContext ctx, IInvertedIndex invIndex) {
@@ -80,32 +83,63 @@
public void search(OnDiskInvertedIndexSearchCursor resultCursor, InvertedIndexSearchPredicate searchPred,
IIndexOperationContext ictx) throws HyracksDataException, IndexException {
+ IPartitionedInvertedIndex partInvIndex = (IPartitionedInvertedIndex) invIndex;
+ searchResult.reset();
+ if (partInvIndex.isEmpty()) {
+ return;
+ }
+
tokenizeQuery(searchPred);
short numQueryTokens = (short) queryTokenAccessor.getTupleCount();
IInvertedIndexSearchModifier searchModifier = searchPred.getSearchModifier();
short numTokensLowerBound = searchModifier.getNumTokensLowerBound(numQueryTokens);
short numTokensUpperBound = searchModifier.getNumTokensUpperBound(numQueryTokens);
-
- IPartitionedInvertedIndex partInvIndex = (IPartitionedInvertedIndex) invIndex;
- invListCursorCache.reset();
- partitions.reset(numTokensLowerBound, numTokensUpperBound);
- for (int i = 0; i < numQueryTokens; i++) {
- searchKey.reset(queryTokenAccessor, i);
- partInvIndex.openInvertedListPartitionCursors(this, ictx, numTokensLowerBound, numTokensUpperBound,
- partitions);
- }
-
+
occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
if (occurrenceThreshold <= 0) {
throw new OccurrenceThresholdPanicException("Merge Threshold is <= 0. Failing Search.");
}
-
- // Process the partitions one-by-one.
+
+ short maxCountPossible = numQueryTokens;
+ invListCursorCache.reset();
+ partitions.reset(numTokensLowerBound, numTokensUpperBound);
+ cursorsOrderedByTokens.clear();
+ for (int i = 0; i < numQueryTokens; i++) {
+ searchKey.reset(queryTokenAccessor, i);
+ if (!partInvIndex.openInvertedListPartitionCursors(this, ictx, numTokensLowerBound, numTokensUpperBound,
+ partitions, cursorsOrderedByTokens)) {
+ maxCountPossible--;
+ // No results possible.
+ if (maxCountPossible < occurrenceThreshold) {
+ return;
+ }
+ }
+ }
+
ArrayList<IInvertedListCursor>[] partitionCursors = partitions.getPartitions();
short start = partitions.getMinValidPartitionIndex();
short end = partitions.getMaxValidPartitionIndex();
- searchResult.reset();
+
+ // HACK FOR BETTER IO
+ if (!cursorsOrderedByTokens.isEmpty()) {
+ for (int i = start; i <= end; i++) {
+ if (partitionCursors[i] == null) {
+ continue;
+ }
+ // Prune partition because no element in it can satisfy the occurrence threshold.
+ if (partitionCursors[i].size() < occurrenceThreshold) {
+ cursorsOrderedByTokens.removeAll(partitionCursors[i]);
+ }
+ }
+ // Pin all the cursors in the order of tokens.
+ int numCursors = cursorsOrderedByTokens.size();
+ for (int i = 0; i < numCursors; i++) {
+ cursorsOrderedByTokens.get(i).pinPages();
+ }
+ }
+
+ // Process the partitions one-by-one.
for (int i = start; i <= end; i++) {
if (partitionCursors[i] == null) {
continue;
@@ -119,7 +153,7 @@
invListMerger.reset();
invListMerger.merge(partitionCursors[i], occurrenceThreshold, numPrefixLists, searchResult);
}
-
+
resultCursor.open(null, searchPred);
}